62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
import csv
|
|
import json
|
|
import logging
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
|
|
from personal_data.data import DeduplicateMode, Scraper
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class YoutubeFavoritesScraper(Scraper):
|
|
dataset_name: str = 'youtube_favorites'
|
|
deduplicate_mode: DeduplicateMode = DeduplicateMode.BY_ALL_COLUMNS
|
|
|
|
def fetch_data(self) -> list[dict]:
|
|
"""Use yt-dlp to fetch the list of favorited videos. This is a placeholder for invoking yt-dlp and parsing its output."""
|
|
try:
|
|
# Replace 'YOUR_FAVORITES_ID' with your actual favorites playlist ID.
|
|
result = subprocess.run(
|
|
[
|
|
'yt-dlp',
|
|
'--flat-playlist',
|
|
'--dump-json',
|
|
'https://www.youtube.com/playlist?list=YOUR_FAVORITES_ID',
|
|
],
|
|
capture_output=True,
|
|
check=True,
|
|
text=True,
|
|
)
|
|
return [json.loads(line) for line in result.stdout.splitlines()]
|
|
except Exception:
|
|
logger.exception('Failed to fetch YouTube favorites')
|
|
raise
|
|
|
|
def to_csv(self, videos: list[dict]) -> str:
|
|
"""Convert the list of videos to CSV format."""
|
|
headers = ['id', 'title', 'url', 'upload_date']
|
|
rows = [headers] + [
|
|
[
|
|
video.get('id'),
|
|
video.get('title'),
|
|
video.get('url'),
|
|
video.get('upload_date'),
|
|
]
|
|
for video in videos
|
|
]
|
|
from io import StringIO
|
|
|
|
sio = StringIO()
|
|
csv.writer(sio).writerows(rows)
|
|
return sio.getvalue()
|
|
|
|
def run(self) -> None:
|
|
videos = self.fetch_data()
|
|
csv_data = self.to_csv(videos)
|
|
logger.info('Fetched and converted %d videos to CSV', len(videos))
|
|
with open('youtube_favorites.csv', 'w', encoding='utf-8') as f:
|
|
f.write(csv_data)
|
|
logger.info('CSV file written to youtube_favorites.csv')
|