1
0
personal-data/personal_data/fetchers/youtube.py
Jon Michael Aanes 9058279b4e
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 36s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 32s
YouTube fixed
2025-03-15 22:18:30 +01:00

42 lines
1.3 KiB
Python

import csv
import json
import logging
import subprocess
from dataclasses import dataclass
from personal_data.data import DeduplicateMode, Scraper
from ..util import safe_del
logger = logging.getLogger(__name__)
PLAYLIST_ID='PLAfDVJvDKCvOMvfoTL7eW8GkWNJwd90eV'
#PLAYLIST_ID='LL'
@dataclass(frozen=True)
class YoutubeFavoritesScraper(Scraper):
dataset_name: str = 'youtube_favorites'
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = []
def scrape(self) -> list[dict]:
"""Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output."""
result = subprocess.run(
[
'yt-dlp',
'--flat-playlist',
'--dump-json',
f'https://www.youtube.com/playlist?list={PLAYLIST_ID}',
],
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(f'Non-zero returncode in command: {result.returncode}\n\n{result.stderr}')
for line in result.stdout.splitlines():
data = json.loads(line)
data['thumbnail'] = data['thumbnails'][-1]['url']
safe_del(data, '_type', '_version', 'thumbnails')
yield data