feat: Add YouTube favorites fetcher to export data as CSV
This commit is contained in:
parent
4dfbde77ec
commit
dbc663cbbc
60
personal_data/fetchers/youtube.py
Normal file
60
personal_data/fetchers/youtube.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import subprocess
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class YoutubeFavoritesScraper(Scraper):
|
||||||
|
dataset_name: str = "youtube_favorites"
|
||||||
|
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
|
def fetch_data(self) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Use yt-dlp to fetch the list of favorited videos.
|
||||||
|
This is a placeholder for invoking yt-dlp and parsing its output.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID.
|
||||||
|
result = subprocess.run(
|
||||||
|
['yt-dlp', '--flat-playlist', '--dump-json', 'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID'],
|
||||||
|
capture_output=True,
|
||||||
|
check=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
videos = [json.loads(line) for line in result.stdout.splitlines()]
|
||||||
|
return videos
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Failed to fetch YouTube favorites: %s", e)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def to_csv(self, videos: list[dict]) -> str:
|
||||||
|
"""
|
||||||
|
Convert the list of videos to CSV format.
|
||||||
|
"""
|
||||||
|
output = []
|
||||||
|
headers = ["id", "title", "url", "upload_date"]
|
||||||
|
output.append(headers)
|
||||||
|
for video in videos:
|
||||||
|
output.append([
|
||||||
|
video.get("id"),
|
||||||
|
video.get("title"),
|
||||||
|
video.get("url"),
|
||||||
|
video.get("upload_date"),
|
||||||
|
])
|
||||||
|
from io import StringIO
|
||||||
|
sio = StringIO()
|
||||||
|
csv_writer = csv.writer(sio)
|
||||||
|
csv_writer.writerows(output)
|
||||||
|
return sio.getvalue()
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
videos = self.fetch_data()
|
||||||
|
csv_data = self.to_csv(videos)
|
||||||
|
logger.info("Fetched and converted %d videos to CSV", len(videos))
|
||||||
|
with open("youtube_favorites.csv", "w", encoding="utf-8") as f:
|
||||||
|
f.write(csv_data)
|
||||||
|
logger.info("CSV file written to youtube_favorites.csv")
|
Loading…
Reference in New Issue
Block a user