diff --git a/personal_data/__init__.py b/personal_data/__init__.py index c9bee9a..0c4635a 100644 --- a/personal_data/__init__.py +++ b/personal_data/__init__.py @@ -20,6 +20,7 @@ import personal_data.fetchers.crunchyroll import personal_data.fetchers.ffxiv_lodestone import personal_data.fetchers.playstation import personal_data.fetchers.psnprofiles +import personal_data.fetchers.partisia_blockchain from personal_data._version import __version__ CSV_DIALECT = 'one_true_dialect' @@ -133,12 +134,10 @@ def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session: return session -def main(): +def main(scraper_filter: frozenset[str]): cookiejar = browsercookie.firefox() logger.warning('Got cookiejar from firefox: %s cookies', len(cookiejar)) - scraper_filter = {'PsnProfilesScraper'} - for scraper_cls in personal_data.data.Scraper.__subclasses__(): session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape()) scraper = scraper_cls(session) diff --git a/personal_data/__main__.py b/personal_data/__main__.py index a2309fe..850acf0 100644 --- a/personal_data/__main__.py +++ b/personal_data/__main__.py @@ -1,4 +1,15 @@ import personal_data +import argparse + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+') + return parser.parse_args() + +def main(): + args = parse_arguments() + scraper_filter = frozenset(args.fetchers) + personal_data.main(scraper_filter) if __name__ == '__main__': - personal_data.main() + main() diff --git a/personal_data/data.py b/personal_data/data.py index 5ab54a4..0bddc51 100644 --- a/personal_data/data.py +++ b/personal_data/data.py @@ -15,22 +15,18 @@ class DeduplicateMode(Enum): class Scraper(abc.ABC): session: requests.Session - @abc.abstractmethod @staticmethod def dataset_name() -> str: pass - @abc.abstractmethod @staticmethod def deduplicate_mode() -> DeduplicateMode: pass - @abc.abstractmethod @staticmethod def dataset_format() -> str: return 'list-of-dicts' - @abc.abstractmethod @staticmethod def requires_cfscrape() -> bool: return False diff --git a/personal_data/fetchers/__init__.py b/personal_data/fetchers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/personal_data/fetchers/ffxiv_lodestone.py b/personal_data/fetchers/ffxiv_lodestone.py index dd0c1b7..4f22adc 100644 --- a/personal_data/fetchers/ffxiv_lodestone.py +++ b/personal_data/fetchers/ffxiv_lodestone.py @@ -47,8 +47,6 @@ class LodestoneAchievementScraper(Scraper): scripts=False, ) - # print(soup) - for entry in soup.select('.ldst__achievement ul li.entry'): time_acquired = str(entry.script.text).strip() time_acquired = re.search( diff --git a/personal_data/fetchers/partisia_blockchain.py b/personal_data/fetchers/partisia_blockchain.py new file mode 100644 index 0000000..412ded0 --- /dev/null +++ b/personal_data/fetchers/partisia_blockchain.py @@ -0,0 +1,52 @@ +import dataclasses +import datetime +import logging +import re +import secrets + +import bs4 + +import personal_data.html_util +import personal_data.parse_util +from personal_data.data import DeduplicateMode, Scraper + +logger = logging.getLogger(__name__) + + +# mainnet: https://reader.partisiablockchain.com +# testnet: https://node1.testnet.partisiablockchain.com + + +HOSTNAME = 'reader.partisiablockchain.com' + +URL_ACCOUNT_PLUGIN = 'https://{hostname}/{shard}blockchain/accountPlugin/local' + +@dataclasses.dataclass(frozen=True) +class MpcBalance(Scraper): + dataset_name = 'defi_mpc_balance' + deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + + def scrape(self): + address = '0019e9a28c978dd65114cc4e0bcb876770805b0349' # TODO + + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + } + + url = URL_ACCOUNT_PLUGIN.format( + hostname = HOSTNAME, + shard = 'shards/Shard0/', + ) + + data = f"{{\"path\":[{{\"type\":\"field\",\"name\":\"accounts\"}},{{\"type\":\"avl\",\"keyType\":\"BLOCKCHAIN_ADDRESS\",\"key\":\"{address}\"}}]}}" + print(data) + response = self.session.post(url, headers = headers, data=data) + response.raise_for_status() + + json = response.json() + print(json) + + assert False + + diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29