diff --git a/personal_data/fetchers/youtube.py b/personal_data/fetchers/youtube.py index acd22a1..e067f64 100644 --- a/personal_data/fetchers/youtube.py +++ b/personal_data/fetchers/youtube.py @@ -3,6 +3,7 @@ import json import logging import subprocess from dataclasses import dataclass +from typing import ClassVar from personal_data.data import DeduplicateMode, Scraper from personal_data.secrets import YOUTUBE_AUTH @@ -34,7 +35,6 @@ def scrape(watch_history: bool) -> list[dict[str, str]]: url, ] - print(ytdlp_args) result = subprocess.run( ytdlp_args, capture_output=True, @@ -42,22 +42,20 @@ def scrape(watch_history: bool) -> list[dict[str, str]]: ) if result.returncode != 0: - raise RuntimeError( - f'Non-zero returncode in command: {result.returncode}\n\n{result.stderr}', - ) + message = 'Non-zero returncode in command: ' + str(result.returncode) + "\n\n" + result.stderr + raise RuntimeError(message) - print(result.stderr) - print(result.stdout) output = [] for line in result.stdout.splitlines(): data = json.loads(line) if watch_history: - if 'thumbnails' in data and data['thumbnails']: + if data.get('thumbnails'): data['thumbnail'] = data['thumbnails'][-1]['url'] - if 'timestamp' in data: + if data.get('timestamp'): data['watch_datetime'] = datetime.datetime.fromtimestamp( int(data['timestamp']), + tz=datetime.timezone.utc ).isoformat() else: data['thumbnail'] = data['thumbnails'][-1]['url'] @@ -70,7 +68,7 @@ def scrape(watch_history: bool) -> list[dict[str, str]]: class YoutubeFavoritesScraper(Scraper): dataset_name: str = 'youtube_favorites' deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS - deduplicate_ignore_columns = [] + deduplicate_ignore_columns: ClassVar[list[str]] = [] def scrape(self) -> list[dict]: yield from scrape(watch_history=False) @@ -80,7 +78,7 @@ class YoutubeFavoritesScraper(Scraper): class YoutubeWatchHistoryScraper(Scraper): dataset_name: str = 'youtube_watch_history' deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS - deduplicate_ignore_columns = [] + deduplicate_ignore_columns: ClassVar[list[str]] = [] def scrape(self) -> list[dict]: yield from scrape(watch_history=True)