diff --git a/personal_data/fetchers/gitea.py b/personal_data/fetchers/gitea.py new file mode 100644 index 0000000..246ac3b --- /dev/null +++ b/personal_data/fetchers/gitea.py @@ -0,0 +1,47 @@ +import dataclasses +import datetime +import logging +from collections.abc import Iterator, Mapping +from decimal import Decimal +from typing import Any + +from personal_data.data import DeduplicateMode, Scraper + +from .. import secrets + +logger = logging.getLogger(__name__) + +def safe_del(d: dict, *keys: str): + for key in keys: + if key in d: + del d[key] + +def to_data_point(p: dict[str,Any]) ->Mapping[str, Any]: + p['owner'] = p['owner']['login'] + safe_del(p, 'permissions', 'internal_tracker') + return p + +@dataclasses.dataclass(frozen=True) +class Gitea(Scraper): + dataset_name = 'gitea_repos' + deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + deduplicate_ignore_columns = [] + + @staticmethod + def requires_cfscrape() -> bool: + return False + + def scrape(self) -> Iterator[Mapping[str, Any]]: + response = self.session.get('https://gitfub.space/api/v1/repos/search', params = { + #'uid':21, + 'private': True, + 'sort':'updated', + 'order':'desc', + 'access_token': secrets.gitea_access_token(), + }) + response.raise_for_status() + + data = response.json() + logger.info('Got %d results from Gitea', len(data['data'])) + for p in data['data']: + yield to_data_point(p) diff --git a/personal_data/secrets.py b/personal_data/secrets.py index ed63101..3eb9c13 100644 --- a/personal_data/secrets.py +++ b/personal_data/secrets.py @@ -23,6 +23,10 @@ def pbc_account_address(): def steam_username(): return secrets.load_or_fail('STEAM_USERNAME') +# Gitea +def gitea_access_token(): + return secrets.load('GITEA_ACCESS_TOKEN') + # Kucoin def kucoin_key():