1
0

Ruff check
All checks were successful
Build container / Package-Python (push) Successful in 26s
Build container / Package-Container (push) Successful in 1m21s

This commit is contained in:
Jon Michael Aanes 2024-04-23 22:58:25 +02:00
parent afd2f4a0b3
commit 033f0dcf5b
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
10 changed files with 72 additions and 50 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.pyc
/output/
__pycache__/
/secrets/
*.sqlite

View File

@ -0,0 +1 @@
from ._version import __version__ # noqa:F401

View File

@ -1,13 +1,16 @@
import personal_data.main
import argparse
import logging
import personal_data.main
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+')
parser.add_argument('--cookiejar', action='store_true')
return parser.parse_args()
def main():
logging.basicConfig()
logging.getLogger('personal_data').setLevel('INFO')
@ -15,5 +18,6 @@ def main():
scraper_filter = frozenset(args.fetchers)
personal_data.main.main(scraper_filter, use_cookiejar=args.cookiejar)
if __name__ == '__main__':
main()

View File

@ -1 +1 @@
__version__ = '0.1.0'
__version__ = '0.1.1'

View File

@ -11,6 +11,7 @@ class DeduplicateMode(Enum):
BY_ALL_COLUMNS = 2
ONLY_LATEST = 3
@dataclasses.dataclass(frozen=True)
class Scraper(abc.ABC):
session: requests.Session

View File

@ -1,17 +1,12 @@
import dataclasses
import datetime
import logging
import re
import json
import secrets
from decimal import Decimal
import email.utils
import json
import logging
from decimal import Decimal
import bs4
import personal_data.html_util
import personal_data.parse_util
from personal_data.data import DeduplicateMode, Scraper
from .. import secrets
logger = logging.getLogger(__name__)
@ -26,9 +21,11 @@ HOSTNAME = 'reader.partisiablockchain.com'
URL_ACCOUNT_PLUGIN = 'https://{hostname}/{shard}blockchain/accountPlugin/local'
URL_ACCOUNT_PLUGIN_GLOBAL = 'https://{hostname}/{shard}blockchain/accountPlugin/global'
def shard_id_for_address(address: str) -> str:
return 'shards/Shard2/' # TODO
@dataclasses.dataclass(frozen=True)
class MpcBalance(Scraper):
dataset_name = 'defi_mpc_balance'
@ -43,7 +40,9 @@ class MpcBalance(Scraper):
response = self.session.post(url, headers=headers, data=json.dumps(data))
response.raise_for_status()
date_text = response.headers.get('last-modified') or response.headers.get('date')
date_text = response.headers.get('last-modified') or response.headers.get(
'date',
)
date = email.utils.parsedate_to_datetime(date_text)
json_data = response.json()
if json_data is None:
@ -71,7 +70,12 @@ class MpcBalance(Scraper):
shard=shard_id_for_address(address),
)
data: dict = {'path':[{'type':'field','name':'accounts'},{'type':'avl','keyType':'BLOCKCHAIN_ADDRESS','key':address}]}
data: dict = {
'path': [
{'type': 'field', 'name': 'accounts'},
{'type': 'avl', 'keyType': 'BLOCKCHAIN_ADDRESS', 'key': address},
],
}
account_data, date = self.get_json(url, data=data)
data_point = {

View File

@ -159,7 +159,9 @@ class PsnProfilesScraper(Scraper):
yield d
def scrape_game_trophies(
self, psnprofiles_id: int, game_name: str,
self,
psnprofiles_id: int,
game_name: str,
) -> Iterator[dict]:
assert isinstance(psnprofiles_id, int), psnprofiles_id
assert isinstance(game_name, str), game_name
@ -167,7 +169,8 @@ class PsnProfilesScraper(Scraper):
logger.info('Getting Game Trophies %s', psnprofiles_id)
url = URL_USER_GAME_TROPHIES.format(
psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id,
psn_id=secrets.PLAYSTATION_PSN_ID,
game_id=psnprofiles_id,
)
response = self.session.get(url)
response.raise_for_status()

View File

@ -1,7 +1,9 @@
import requests
import personal_data.secrets as secrets
import logging
import requests
from personal_data import secrets
logger = logging.getLogger(__name__)
MAILGUN_API_ENDPOINT = 'https://api.mailgun.net/v3/{mailgun_domain}/messages'
@ -33,4 +35,3 @@ def send_email(session: requests.Session, subject: str, text: str):
response.raise_for_status()
logger.info('Email sent!')
return response

View File

@ -18,16 +18,13 @@ logger = logging.getLogger(__name__)
import personal_data.data
import personal_data.fetchers.crunchyroll
import personal_data.fetchers.ffxiv_lodestone
import personal_data.fetchers.partisia_blockchain
import personal_data.fetchers.playstation
import personal_data.fetchers.psnprofiles
import personal_data.fetchers.partisia_blockchain
from personal_data._version import __version__
from personal_data import mailgun
from . import mailgun
import personal_data.mailgun as mailgun
import personal_data.secrets as secrets
CSV_DIALECT = 'one_true_dialect'
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
@ -102,14 +99,16 @@ def extend_csv_file(
return frozendict(a) == frozendict(b)
if deduplicate_mode == personal_data.data.DeduplicateMode.ONLY_LATEST:
while len(dicts) >= 2 and equals_without_fields(dicts[-1], dicts[-2], deduplicate_ignore_columns):
while len(dicts) >= 2 and equals_without_fields(
dicts[-1],
dicts[-2],
deduplicate_ignore_columns,
):
del dicts[-1]
elif deduplicate_mode != personal_data.data.DeduplicateMode.NONE:
dicts = set(dicts)
dicts = sorted(dicts, key=lambda d: tuple(str(d.get(fn, '')) for fn in fieldnames))
csvfile_in_memory = io.StringIO()
@ -151,6 +150,7 @@ STANDARD_HEADERS = {
if cfscrape:
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
pass
@ -165,12 +165,18 @@ def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
session.cookies.set_cookie(cookie)
return session
def send_notification(session: requests.Session, scraper_name: str, latest_dict: frozendict):
def send_notification(
session: requests.Session,
scraper_name: str,
latest_dict: frozendict,
):
body = ['A new update has occured for ', scraper_name, '\n']
for k, v in latest_dict.items():
body.append(f'{k}: {v}\n')
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
def main(scraper_filter: frozenset[str], use_cookiejar: bool):
if use_cookiejar:
cookiejar = browsercookie.firefox()

View File

@ -6,6 +6,7 @@ logger.setLevel(logging.INFO)
ENV_KEY_PREFIX = 'CF_PD_'
def load_secret(env_key: str) -> str:
filepath = os.environ.get(ENV_KEY_PREFIX + env_key)
if filepath is None: