1
0

YouTube fixed
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 36s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 32s

This commit is contained in:
Jon Michael Aanes 2025-03-15 22:18:30 +01:00
parent 3d9c694fe8
commit 9058279b4e
3 changed files with 29 additions and 47 deletions

View File

@ -4,18 +4,13 @@ from collections.abc import Iterator, Mapping
from typing import Any from typing import Any
from personal_data.data import DeduplicateMode, Scraper from personal_data.data import DeduplicateMode, Scraper
from ..util import safe_del
from .. import secrets from .. import secrets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def safe_del(d: dict, *keys: str):
for key in keys:
if key in d:
del d[key]
def to_data_point(p: dict[str, Any]) -> Mapping[str, Any]: def to_data_point(p: dict[str, Any]) -> Mapping[str, Any]:
p['owner'] = p['owner']['login'] p['owner'] = p['owner']['login']
safe_del(p, 'permissions', 'internal_tracker') safe_del(p, 'permissions', 'internal_tracker')

View File

@ -5,57 +5,37 @@ import subprocess
from dataclasses import dataclass from dataclasses import dataclass
from personal_data.data import DeduplicateMode, Scraper from personal_data.data import DeduplicateMode, Scraper
from ..util import safe_del
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
PLAYLIST_ID='PLAfDVJvDKCvOMvfoTL7eW8GkWNJwd90eV'
#PLAYLIST_ID='LL'
@dataclass(frozen=True) @dataclass(frozen=True)
class YoutubeFavoritesScraper(Scraper): class YoutubeFavoritesScraper(Scraper):
dataset_name: str = 'youtube_favorites' dataset_name: str = 'youtube_favorites'
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = []
def fetch_data(self) -> list[dict]: def scrape(self) -> list[dict]:
"""Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output.""" """Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output."""
try:
# Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID.
result = subprocess.run( result = subprocess.run(
[ [
'yt-dlp', 'yt-dlp',
'--flat-playlist', '--flat-playlist',
'--dump-json', '--dump-json',
'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID', f'https://www.youtube.com/playlist?list={PLAYLIST_ID}',
], ],
capture_output=True, capture_output=True,
check=True,
text=True, text=True,
) )
return [json.loads(line) for line in result.stdout.splitlines()]
except Exception:
logger.exception('Failed to fetch YouTube favorites')
raise
def to_csv(self, videos: list[dict]) -> str: if result.returncode != 0:
"""Convert the list of videos to CSV format.""" raise RuntimeError(f'Non-zero returncode in command: {result.returncode}\n\n{result.stderr}')
headers = ['id', 'title', 'url', 'upload_date']
rows = [headers] + [
[
video.get('id'),
video.get('title'),
video.get('url'),
video.get('upload_date'),
]
for video in videos
]
from io import StringIO
sio = StringIO() for line in result.stdout.splitlines():
csv.writer(sio).writerows(rows) data = json.loads(line)
return sio.getvalue() data['thumbnail'] = data['thumbnails'][-1]['url']
safe_del(data, '_type', '_version', 'thumbnails')
def run(self) -> None: yield data
videos = self.fetch_data()
csv_data = self.to_csv(videos)
logger.info('Fetched and converted %d videos to CSV', len(videos))
with open('youtube_favorites.csv', 'w', encoding='utf-8') as f:
f.write(csv_data)
logger.info('CSV file written to youtube_favorites.csv')

View File

@ -14,6 +14,13 @@ from . import csv_import, data
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def safe_del(d: dict, *keys: str):
for key in keys:
if key in d:
del d[key]
def equals_without_fields( def equals_without_fields(
a: Mapping[str, Any], a: Mapping[str, Any],
b: Mapping[str, Any], b: Mapping[str, Any],