1
0
personal-data/personal_data/fetchers/jellyfin_watch_history.py

74 lines
2.6 KiB
Python
Raw Normal View History

2024-09-08 18:20:09 +00:00
import dataclasses
import datetime
import logging
import re
import bs4
from typing import Any
from collections.abc import Iterator
from jellyfin_apiclient_python import JellyfinClient
from ..data import DeduplicateMode, Scraper
from .. import secrets, parse_util, html_util, _version
logger = logging.getLogger(__name__)
URL_SITE_ROOT = 'https://steamcommunity.com/'
URL_GAME_ACHIVEMENTS = URL_SITE_ROOT+'id/{username}/stats/appid/{appid}'
FORMAT_DATE_HEADER = '%d/%m/%YYYY'
def iterate_series(client):
result = client.jellyfin.user_items(params = {
'includeItemTypes': 'Series',
'parentId': 'a656b907eb3a73532e40e44b968d0225',
'userId': 'dd95c1085c1b4e83ba8e8853fbc644ab',
})
yield from result['Items']
def iterate_watched_episodes_of_series(client, series_id: str):
result = client.jellyfin.user_items(params = {
'filters': 'IsPlayed',
'recursive': True,
'includeItemTypes': 'Episode',
'parentId': series_id,
'userId': 'dd95c1085c1b4e83ba8e8853fbc644ab',
'fields': 'AirTime',
})
yield from result['Items']
@dataclasses.dataclass(frozen=True)
class JellyfinWatchHistoryScraper(Scraper):
dataset_name = 'show_episodes_watched'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict[str, Any]]:
client = JellyfinClient()
client.config.app('personal_data', _version.__version__,
'test_machine', 'unique_id_1')
client.config.data["auth.ssl"] = False
client.auth.connect_to_address(secrets.JELLYFIN_URL)
client.auth.login(secrets.JELLYFIN_URL, secrets.JELLYFIN_USERNAME, secrets.JELLYFIN_PASSWORD)
for series_data in iterate_series(client):
series_id = series_data['Id']
for episode_data in iterate_watched_episodes_of_series(client, series_id):
episode_index = episode_data.get('IndexNumber')
if episode_index is None:
continue
yield {
'series.name': episode_data['SeriesName'],
'season.name': episode_data['SeasonName'],
'episode.index': int(episode_index),
'episode.name': episode_data['Name'],
'me.last_played_time': episode_data['UserData']['LastPlayedDate'],
'episode.duration_seconds': episode_data['RunTimeTicks'] / 10000000,
'episode.premiere_date': episode_data.get('PremiereDate'),
}
del episode_data
del series_data, series_id