1
0

fix: Remove print statements and improve error handling in YouTube fetcher

This commit is contained in:
Jon Michael Aanes (aider) 2025-03-15 22:35:06 +01:00
parent c4291f0b60
commit 638a3ae842

View File

@ -3,6 +3,7 @@ import json
import logging import logging
import subprocess import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from typing import ClassVar
from personal_data.data import DeduplicateMode, Scraper from personal_data.data import DeduplicateMode, Scraper
from personal_data.secrets import YOUTUBE_AUTH from personal_data.secrets import YOUTUBE_AUTH
@ -34,7 +35,6 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
url, url,
] ]
print(ytdlp_args)
result = subprocess.run( result = subprocess.run(
ytdlp_args, ytdlp_args,
capture_output=True, capture_output=True,
@ -42,22 +42,20 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
) )
if result.returncode != 0: if result.returncode != 0:
raise RuntimeError( message = 'Non-zero returncode in command: ' + str(result.returncode) + "\n\n" + result.stderr
f'Non-zero returncode in command: {result.returncode}\n\n{result.stderr}', raise RuntimeError(message)
)
print(result.stderr)
print(result.stdout)
output = [] output = []
for line in result.stdout.splitlines(): for line in result.stdout.splitlines():
data = json.loads(line) data = json.loads(line)
if watch_history: if watch_history:
if 'thumbnails' in data and data['thumbnails']: if data.get('thumbnails'):
data['thumbnail'] = data['thumbnails'][-1]['url'] data['thumbnail'] = data['thumbnails'][-1]['url']
if 'timestamp' in data: if data.get('timestamp'):
data['watch_datetime'] = datetime.datetime.fromtimestamp( data['watch_datetime'] = datetime.datetime.fromtimestamp(
int(data['timestamp']), int(data['timestamp']),
tz=datetime.timezone.utc
).isoformat() ).isoformat()
else: else:
data['thumbnail'] = data['thumbnails'][-1]['url'] data['thumbnail'] = data['thumbnails'][-1]['url']
@ -70,7 +68,7 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
class YoutubeFavoritesScraper(Scraper): class YoutubeFavoritesScraper(Scraper):
dataset_name: str = 'youtube_favorites' dataset_name: str = 'youtube_favorites'
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = [] deduplicate_ignore_columns: ClassVar[list[str]] = []
def scrape(self) -> list[dict]: def scrape(self) -> list[dict]:
yield from scrape(watch_history=False) yield from scrape(watch_history=False)
@ -80,7 +78,7 @@ class YoutubeFavoritesScraper(Scraper):
class YoutubeWatchHistoryScraper(Scraper): class YoutubeWatchHistoryScraper(Scraper):
dataset_name: str = 'youtube_watch_history' dataset_name: str = 'youtube_watch_history'
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = [] deduplicate_ignore_columns: ClassVar[list[str]] = []
def scrape(self) -> list[dict]: def scrape(self) -> list[dict]:
yield from scrape(watch_history=True) yield from scrape(watch_history=True)