1
0

Merged all dataset outputs

This commit is contained in:
Jon Michael Aanes 2024-10-25 21:47:44 +02:00
parent 9a9af4287a
commit ce63ad2d53
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
4 changed files with 14 additions and 9 deletions

View File

@ -213,7 +213,7 @@ def import_watched_series_csv_from_file(vault: ObsidianVault) -> int:
def import_played_games_csv_from_file(vault: ObsidianVault) -> int: def import_played_games_csv_from_file(vault: ObsidianVault) -> int:
data_path = Path('output/games_played_playstation.csv') data_path = Path('output/games_played.csv')
return import_activity_sample_csv_from_file( return import_activity_sample_csv_from_file(
vault, vault,
data_path, data_path,

View File

@ -18,12 +18,13 @@ URL_PROFILE_MOUNTS = (
'https://eu.finalfantasyxiv.com/lodestone/character/{character_id}/mount/' 'https://eu.finalfantasyxiv.com/lodestone/character/{character_id}/mount/'
) )
FORMAT_DATE_HEADER = '%d/%m/%YYYY' FFXIV_ARR_NAME = 'Final Fantasy XIV: A Realm Reborn'
FFXIV_ARR_RELEASE_DATE = datetime.date(2013,8,27)
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class LodestoneAchievementScraper(Scraper): class LodestoneAchievementScraper(Scraper):
dataset_name = 'games_played_playstation' dataset_name = 'games_played'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self): def scrape(self):
@ -67,7 +68,8 @@ class LodestoneAchievementScraper(Scraper):
trophy_icon = trophy_icon.src trophy_icon = trophy_icon.src
yield { yield {
'game.name': 'Final Fantasy XIV: A Realm Reborn', 'game.name': FFXIV_ARR_NAME,
'game.release_date': FFXIV_ARR_RELEASE_DATE,
'me.last_played_time': time_acquired, 'me.last_played_time': time_acquired,
# Trophy Data # Trophy Data
'trophy.name': trophy_name, 'trophy.name': trophy_name,

View File

@ -34,7 +34,7 @@ MAX_NUMBER_GAMES_TO_PARSE = 10000
class PsnProfilesScraper(Scraper): class PsnProfilesScraper(Scraper):
"""Downloads all trophies for the given user.""" """Downloads all trophies for the given user."""
dataset_name = 'games_played_playstation' dataset_name = 'games_played'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
@staticmethod @staticmethod

View File

@ -24,12 +24,14 @@ FORMAT_DATE_HEADER = '%d/%m/%YYYY'
class SteamAchievementScraper(Scraper): class SteamAchievementScraper(Scraper):
"""Downloads all trophies for the given user.""" """Downloads all trophies for the given user."""
dataset_name = 'games_played_TODO' dataset_name = 'games_played'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict[str, Any]]: def scrape(self) -> Iterator[dict[str, Any]]:
username = secrets.STEAM_USERNAME username: str = secrets.STEAM_USERNAME
for appid in self._determine_appids_from_recent_activity(username): appids = list(self._determine_appids_from_recent_activity(username))
logger.info('Found %d Steam Apps', len(appids))
for appid in appids:
yield from self._scrape_app_achievements(username, appid) yield from self._scrape_app_achievements(username, appid)
def _determine_appids_from_recent_activity(self, username: str) -> Iterator[int]: def _determine_appids_from_recent_activity(self, username: str) -> Iterator[int]:
@ -75,7 +77,7 @@ class SteamAchievementScraper(Scraper):
for entry in soup.select('.achieveRow'): for entry in soup.select('.achieveRow'):
trophy_name: str = entry.select_one('h3').get_text() trophy_name: str = entry.select_one('h3').get_text()
trophy_desc: str = entry.select_one('h5').get_text() trophy_desc: str = entry.select_one('h5').get_text()
trophy_icon: str = entry.select_one('img').src trophy_icon: str = entry.select_one('img')['src']
time_acquired_html: str = entry.select_one('.achieveUnlockTime') time_acquired_html: str = entry.select_one('.achieveUnlockTime')
if time_acquired_html is None: if time_acquired_html is None:
@ -87,6 +89,7 @@ class SteamAchievementScraper(Scraper):
yield { yield {
'game.name': game_name, 'game.name': game_name,
#'game.release_date': None,
'me.last_played_time': time_acquired, 'me.last_played_time': time_acquired,
# Trophy Data # Trophy Data
'trophy.name': trophy_name, 'trophy.name': trophy_name,