import csv import json import logging import subprocess from dataclasses import dataclass from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) @dataclass(frozen=True) class YoutubeFavoritesScraper(Scraper): dataset_name: str = 'youtube_favorites' deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS def fetch_data(self) -> list[dict]: """Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output.""" try: # Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID. result = subprocess.run( [ 'yt-dlp', '--flat-playlist', '--dump-json', 'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID', ], capture_output=True, check=True, text=True, ) return [json.loads(line) for line in result.stdout.splitlines()] except Exception: logger.exception('Failed to fetch YouTube favorites') raise def to_csv(self, videos: list[dict]) -> str: """Convert the list of videos to CSV format.""" headers = ['id', 'title', 'url', 'upload_date'] rows = [headers] + [[video.get('id'), video.get('title'), video.get('url'), video.get('upload_date')] for video in videos] from io import StringIO sio = StringIO() csv.writer(sio).writerows(rows) return sio.getvalue() def run(self) -> None: videos = self.fetch_data() csv_data = self.to_csv(videos) logger.info('Fetched and converted %d videos to CSV', len(videos)) with open('youtube_favorites.csv', 'w', encoding='utf-8') as f: f.write(csv_data) logger.info('CSV file written to youtube_favorites.csv')