import csv import json import subprocess import logging from dataclasses import dataclass from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) @dataclass(frozen=True) class YoutubeFavoritesScraper(Scraper): dataset_name: str = "youtube_favorites" deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS def fetch_data(self) -> list[dict]: """ Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output. """ try: # Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID. result = subprocess.run( ['yt-dlp', '--flat-playlist', '--dump-json', 'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID'], capture_output=True, check=True, text=True, ) videos = [json.loads(line) for line in result.stdout.splitlines()] return videos except Exception as e: logger.error("Failed to fetch YouTube favorites: %s", e) raise def to_csv(self, videos: list[dict]) -> str: """ Convert the list of videos to CSV format. """ output = [] headers = ["id", "title", "url", "upload_date"] output.append(headers) for video in videos: output.append([ video.get("id"), video.get("title"), video.get("url"), video.get("upload_date"), ]) from io import StringIO sio = StringIO() csv_writer = csv.writer(sio) csv_writer.writerows(output) return sio.getvalue() def run(self) -> None: videos = self.fetch_data() csv_data = self.to_csv(videos) logger.info("Fetched and converted %d videos to CSV", len(videos)) with open("youtube_favorites.csv", "w", encoding="utf-8") as f: f.write(csv_data) logger.info("CSV file written to youtube_favorites.csv")