48 lines
1.3 KiB
Python
48 lines
1.3 KiB
Python
|
import dataclasses
|
||
|
import datetime
|
||
|
import logging
|
||
|
from collections.abc import Iterator, Mapping
|
||
|
from decimal import Decimal
|
||
|
from typing import Any
|
||
|
|
||
|
from personal_data.data import DeduplicateMode, Scraper
|
||
|
|
||
|
from .. import secrets
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
def safe_del(d: dict, *keys: str):
|
||
|
for key in keys:
|
||
|
if key in d:
|
||
|
del d[key]
|
||
|
|
||
|
def to_data_point(p: dict[str,Any]) ->Mapping[str, Any]:
|
||
|
p['owner'] = p['owner']['login']
|
||
|
safe_del(p, 'permissions', 'internal_tracker')
|
||
|
return p
|
||
|
|
||
|
@dataclasses.dataclass(frozen=True)
|
||
|
class Gitea(Scraper):
|
||
|
dataset_name = 'gitea_repos'
|
||
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||
|
deduplicate_ignore_columns = []
|
||
|
|
||
|
@staticmethod
|
||
|
def requires_cfscrape() -> bool:
|
||
|
return False
|
||
|
|
||
|
def scrape(self) -> Iterator[Mapping[str, Any]]:
|
||
|
response = self.session.get('https://gitfub.space/api/v1/repos/search', params = {
|
||
|
#'uid':21,
|
||
|
'private': True,
|
||
|
'sort':'updated',
|
||
|
'order':'desc',
|
||
|
'access_token': secrets.gitea_access_token(),
|
||
|
})
|
||
|
response.raise_for_status()
|
||
|
|
||
|
data = response.json()
|
||
|
logger.info('Got %d results from Gitea', len(data['data']))
|
||
|
for p in data['data']:
|
||
|
yield to_data_point(p)
|