fix: Remove print statements and improve error handling in YouTube fetcher
This commit is contained in:
parent
c4291f0b60
commit
638a3ae842
|
@ -3,6 +3,7 @@ import json
|
||||||
import logging
|
import logging
|
||||||
import subprocess
|
import subprocess
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import ClassVar
|
||||||
|
|
||||||
from personal_data.data import DeduplicateMode, Scraper
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
from personal_data.secrets import YOUTUBE_AUTH
|
from personal_data.secrets import YOUTUBE_AUTH
|
||||||
|
@ -34,7 +35,6 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
|
||||||
url,
|
url,
|
||||||
]
|
]
|
||||||
|
|
||||||
print(ytdlp_args)
|
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
ytdlp_args,
|
ytdlp_args,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
|
@ -42,22 +42,20 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.returncode != 0:
|
if result.returncode != 0:
|
||||||
raise RuntimeError(
|
message = 'Non-zero returncode in command: ' + str(result.returncode) + "\n\n" + result.stderr
|
||||||
f'Non-zero returncode in command: {result.returncode}\n\n{result.stderr}',
|
raise RuntimeError(message)
|
||||||
)
|
|
||||||
|
|
||||||
print(result.stderr)
|
|
||||||
print(result.stdout)
|
|
||||||
|
|
||||||
output = []
|
output = []
|
||||||
for line in result.stdout.splitlines():
|
for line in result.stdout.splitlines():
|
||||||
data = json.loads(line)
|
data = json.loads(line)
|
||||||
if watch_history:
|
if watch_history:
|
||||||
if 'thumbnails' in data and data['thumbnails']:
|
if data.get('thumbnails'):
|
||||||
data['thumbnail'] = data['thumbnails'][-1]['url']
|
data['thumbnail'] = data['thumbnails'][-1]['url']
|
||||||
if 'timestamp' in data:
|
if data.get('timestamp'):
|
||||||
data['watch_datetime'] = datetime.datetime.fromtimestamp(
|
data['watch_datetime'] = datetime.datetime.fromtimestamp(
|
||||||
int(data['timestamp']),
|
int(data['timestamp']),
|
||||||
|
tz=datetime.timezone.utc
|
||||||
).isoformat()
|
).isoformat()
|
||||||
else:
|
else:
|
||||||
data['thumbnail'] = data['thumbnails'][-1]['url']
|
data['thumbnail'] = data['thumbnails'][-1]['url']
|
||||||
|
@ -70,7 +68,7 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
|
||||||
class YoutubeFavoritesScraper(Scraper):
|
class YoutubeFavoritesScraper(Scraper):
|
||||||
dataset_name: str = 'youtube_favorites'
|
dataset_name: str = 'youtube_favorites'
|
||||||
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
deduplicate_ignore_columns = []
|
deduplicate_ignore_columns: ClassVar[list[str]] = []
|
||||||
|
|
||||||
def scrape(self) -> list[dict]:
|
def scrape(self) -> list[dict]:
|
||||||
yield from scrape(watch_history=False)
|
yield from scrape(watch_history=False)
|
||||||
|
@ -80,7 +78,7 @@ class YoutubeFavoritesScraper(Scraper):
|
||||||
class YoutubeWatchHistoryScraper(Scraper):
|
class YoutubeWatchHistoryScraper(Scraper):
|
||||||
dataset_name: str = 'youtube_watch_history'
|
dataset_name: str = 'youtube_watch_history'
|
||||||
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
deduplicate_ignore_columns = []
|
deduplicate_ignore_columns: ClassVar[list[str]] = []
|
||||||
|
|
||||||
def scrape(self) -> list[dict]:
|
def scrape(self) -> list[dict]:
|
||||||
yield from scrape(watch_history=True)
|
yield from scrape(watch_history=True)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user