import csv import json import logging import subprocess from dataclasses import dataclass from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) @dataclass(frozen=True) class YoutubeFavoritesScraper(Scraper): dataset_name: str = 'youtube_favorites' deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS def fetch_data(self) -> list[dict]: """ Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output. """ try: # Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID. result = subprocess.run( [ 'yt-dlp', '--flat-playlist', '--dump-json', 'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID', ], capture_output=True, check=True, text=True, ) videos = [json.loads(line) for line in result.stdout.splitlines()] return videos except Exception as e: logger.error('Failed to fetch YouTube favorites: %s', e) raise def to_csv(self, videos: list[dict]) -> str: """ Convert the list of videos to CSV format. """ output = [] headers = ['id', 'title', 'url', 'upload_date'] output.append(headers) for video in videos: output.append( [ video.get('id'), video.get('title'), video.get('url'), video.get('upload_date'), ], ) from io import StringIO sio = StringIO() csv_writer = csv.writer(sio) csv_writer.writerows(output) return sio.getvalue() def run(self) -> None: videos = self.fetch_data() csv_data = self.to_csv(videos) logger.info('Fetched and converted %d videos to CSV', len(videos)) with open('youtube_favorites.csv', 'w', encoding='utf-8') as f: f.write(csv_data) logger.info('CSV file written to youtube_favorites.csv')