Ruff check
This commit is contained in:
parent
afd2f4a0b3
commit
033f0dcf5b
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,5 @@
|
|||
*.pyc
|
||||
/output/
|
||||
__pycache__/
|
||||
/secrets/
|
||||
*.sqlite
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
from ._version import __version__ # noqa:F401
|
|
@ -1,13 +1,16 @@
|
|||
import personal_data.main
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
import personal_data.main
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+')
|
||||
parser.add_argument('--cookiejar', action='store_true')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig()
|
||||
logging.getLogger('personal_data').setLevel('INFO')
|
||||
|
@ -15,5 +18,6 @@ def main():
|
|||
scraper_filter = frozenset(args.fetchers)
|
||||
personal_data.main.main(scraper_filter, use_cookiejar=args.cookiejar)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1 +1 @@
|
|||
__version__ = '0.1.0'
|
||||
__version__ = '0.1.1'
|
||||
|
|
|
@ -11,6 +11,7 @@ class DeduplicateMode(Enum):
|
|||
BY_ALL_COLUMNS = 2
|
||||
ONLY_LATEST = 3
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Scraper(abc.ABC):
|
||||
session: requests.Session
|
||||
|
|
|
@ -1,17 +1,12 @@
|
|||
import dataclasses
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
import secrets
|
||||
from decimal import Decimal
|
||||
import email.utils
|
||||
import json
|
||||
import logging
|
||||
from decimal import Decimal
|
||||
|
||||
import bs4
|
||||
|
||||
import personal_data.html_util
|
||||
import personal_data.parse_util
|
||||
from personal_data.data import DeduplicateMode, Scraper
|
||||
|
||||
from .. import secrets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -26,9 +21,11 @@ HOSTNAME = 'reader.partisiablockchain.com'
|
|||
URL_ACCOUNT_PLUGIN = 'https://{hostname}/{shard}blockchain/accountPlugin/local'
|
||||
URL_ACCOUNT_PLUGIN_GLOBAL = 'https://{hostname}/{shard}blockchain/accountPlugin/global'
|
||||
|
||||
|
||||
def shard_id_for_address(address: str) -> str:
|
||||
return 'shards/Shard2/' # TODO
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MpcBalance(Scraper):
|
||||
dataset_name = 'defi_mpc_balance'
|
||||
|
@ -43,7 +40,9 @@ class MpcBalance(Scraper):
|
|||
|
||||
response = self.session.post(url, headers=headers, data=json.dumps(data))
|
||||
response.raise_for_status()
|
||||
date_text = response.headers.get('last-modified') or response.headers.get('date')
|
||||
date_text = response.headers.get('last-modified') or response.headers.get(
|
||||
'date',
|
||||
)
|
||||
date = email.utils.parsedate_to_datetime(date_text)
|
||||
json_data = response.json()
|
||||
if json_data is None:
|
||||
|
@ -71,7 +70,12 @@ class MpcBalance(Scraper):
|
|||
shard=shard_id_for_address(address),
|
||||
)
|
||||
|
||||
data: dict = {'path':[{'type':'field','name':'accounts'},{'type':'avl','keyType':'BLOCKCHAIN_ADDRESS','key':address}]}
|
||||
data: dict = {
|
||||
'path': [
|
||||
{'type': 'field', 'name': 'accounts'},
|
||||
{'type': 'avl', 'keyType': 'BLOCKCHAIN_ADDRESS', 'key': address},
|
||||
],
|
||||
}
|
||||
account_data, date = self.get_json(url, data=data)
|
||||
|
||||
data_point = {
|
||||
|
|
|
@ -159,7 +159,9 @@ class PsnProfilesScraper(Scraper):
|
|||
yield d
|
||||
|
||||
def scrape_game_trophies(
|
||||
self, psnprofiles_id: int, game_name: str,
|
||||
self,
|
||||
psnprofiles_id: int,
|
||||
game_name: str,
|
||||
) -> Iterator[dict]:
|
||||
assert isinstance(psnprofiles_id, int), psnprofiles_id
|
||||
assert isinstance(game_name, str), game_name
|
||||
|
@ -167,7 +169,8 @@ class PsnProfilesScraper(Scraper):
|
|||
logger.info('Getting Game Trophies %s', psnprofiles_id)
|
||||
|
||||
url = URL_USER_GAME_TROPHIES.format(
|
||||
psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id,
|
||||
psn_id=secrets.PLAYSTATION_PSN_ID,
|
||||
game_id=psnprofiles_id,
|
||||
)
|
||||
response = self.session.get(url)
|
||||
response.raise_for_status()
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
import requests
|
||||
import personal_data.secrets as secrets
|
||||
|
||||
import logging
|
||||
|
||||
import requests
|
||||
|
||||
from personal_data import secrets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAILGUN_API_ENDPOINT = 'https://api.mailgun.net/v3/{mailgun_domain}/messages'
|
||||
|
@ -33,4 +35,3 @@ def send_email(session: requests.Session, subject: str, text: str):
|
|||
response.raise_for_status()
|
||||
logger.info('Email sent!')
|
||||
return response
|
||||
|
||||
|
|
|
@ -18,16 +18,13 @@ logger = logging.getLogger(__name__)
|
|||
import personal_data.data
|
||||
import personal_data.fetchers.crunchyroll
|
||||
import personal_data.fetchers.ffxiv_lodestone
|
||||
import personal_data.fetchers.partisia_blockchain
|
||||
import personal_data.fetchers.playstation
|
||||
import personal_data.fetchers.psnprofiles
|
||||
import personal_data.fetchers.partisia_blockchain
|
||||
from personal_data._version import __version__
|
||||
from personal_data import mailgun
|
||||
|
||||
from . import mailgun
|
||||
|
||||
import personal_data.mailgun as mailgun
|
||||
import personal_data.secrets as secrets
|
||||
|
||||
CSV_DIALECT = 'one_true_dialect'
|
||||
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||
|
||||
|
@ -102,14 +99,16 @@ def extend_csv_file(
|
|||
|
||||
return frozendict(a) == frozendict(b)
|
||||
|
||||
|
||||
if deduplicate_mode == personal_data.data.DeduplicateMode.ONLY_LATEST:
|
||||
while len(dicts) >= 2 and equals_without_fields(dicts[-1], dicts[-2], deduplicate_ignore_columns):
|
||||
while len(dicts) >= 2 and equals_without_fields(
|
||||
dicts[-1],
|
||||
dicts[-2],
|
||||
deduplicate_ignore_columns,
|
||||
):
|
||||
del dicts[-1]
|
||||
elif deduplicate_mode != personal_data.data.DeduplicateMode.NONE:
|
||||
dicts = set(dicts)
|
||||
|
||||
|
||||
dicts = sorted(dicts, key=lambda d: tuple(str(d.get(fn, '')) for fn in fieldnames))
|
||||
|
||||
csvfile_in_memory = io.StringIO()
|
||||
|
@ -151,6 +150,7 @@ STANDARD_HEADERS = {
|
|||
|
||||
|
||||
if cfscrape:
|
||||
|
||||
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
|
||||
pass
|
||||
|
||||
|
@ -165,12 +165,18 @@ def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
|
|||
session.cookies.set_cookie(cookie)
|
||||
return session
|
||||
|
||||
def send_notification(session: requests.Session, scraper_name: str, latest_dict: frozendict):
|
||||
|
||||
def send_notification(
|
||||
session: requests.Session,
|
||||
scraper_name: str,
|
||||
latest_dict: frozendict,
|
||||
):
|
||||
body = ['A new update has occured for ', scraper_name, '\n']
|
||||
for k, v in latest_dict.items():
|
||||
body.append(f'{k}: {v}\n')
|
||||
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
|
||||
|
||||
|
||||
def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
||||
if use_cookiejar:
|
||||
cookiejar = browsercookie.firefox()
|
||||
|
|
|
@ -6,6 +6,7 @@ logger.setLevel(logging.INFO)
|
|||
|
||||
ENV_KEY_PREFIX = 'CF_PD_'
|
||||
|
||||
|
||||
def load_secret(env_key: str) -> str:
|
||||
filepath = os.environ.get(ENV_KEY_PREFIX + env_key)
|
||||
if filepath is None:
|
||||
|
|
Loading…
Reference in New Issue
Block a user