From dbc663cbbc921bffb128b7f46c2b0db5605cc038 Mon Sep 17 00:00:00 2001 From: "Jon Michael Aanes (aider)" Date: Sat, 15 Mar 2025 21:53:26 +0100 Subject: [PATCH] feat: Add YouTube favorites fetcher to export data as CSV --- personal_data/fetchers/youtube.py | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 personal_data/fetchers/youtube.py diff --git a/personal_data/fetchers/youtube.py b/personal_data/fetchers/youtube.py new file mode 100644 index 0000000..4b0a6c0 --- /dev/null +++ b/personal_data/fetchers/youtube.py @@ -0,0 +1,60 @@ +import csv +import json +import subprocess +import logging +from dataclasses import dataclass +from personal_data.data import DeduplicateMode, Scraper + +logger = logging.getLogger(__name__) + +@dataclass(frozen=True) +class YoutubeFavoritesScraper(Scraper): + dataset_name: str = "youtube_favorites" + deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS + + def fetch_data(self) -> list[dict]: + """ + Use yt-dlp to fetch the list of favorited videos. + This is a placeholder for invoking yt-dlp and parsing its output. + """ + try: + # Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID. + result = subprocess.run( + ['yt-dlp', '--flat-playlist', '--dump-json', 'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID'], + capture_output=True, + check=True, + text=True, + ) + videos = [json.loads(line) for line in result.stdout.splitlines()] + return videos + except Exception as e: + logger.error("Failed to fetch YouTube favorites: %s", e) + raise + + def to_csv(self, videos: list[dict]) -> str: + """ + Convert the list of videos to CSV format. + """ + output = [] + headers = ["id", "title", "url", "upload_date"] + output.append(headers) + for video in videos: + output.append([ + video.get("id"), + video.get("title"), + video.get("url"), + video.get("upload_date"), + ]) + from io import StringIO + sio = StringIO() + csv_writer = csv.writer(sio) + csv_writer.writerows(output) + return sio.getvalue() + + def run(self) -> None: + videos = self.fetch_data() + csv_data = self.to_csv(videos) + logger.info("Fetched and converted %d videos to CSV", len(videos)) + with open("youtube_favorites.csv", "w", encoding="utf-8") as f: + f.write(csv_data) + logger.info("CSV file written to youtube_favorites.csv")