Improved csv sniffing
This commit is contained in:
parent
857be3cf2f
commit
af42e3ba90
|
@ -261,17 +261,22 @@ PATH_WATCHED = Path('output/show_episodes_watched.csv')
|
||||||
PATH_PLAYED = Path('output/games_played.csv')
|
PATH_PLAYED = Path('output/games_played.csv')
|
||||||
PATH_WORKOUT = Path('/home/jmaa/Notes/workout.csv')
|
PATH_WORKOUT = Path('/home/jmaa/Notes/workout.csv')
|
||||||
PATH_STEP_COUNTS = Path(
|
PATH_STEP_COUNTS = Path(
|
||||||
'/home/jmaa/personal-archive/misc-data/step_counts_2023-07-26_to_2024-09-21.csv',
|
'/home/jmaa/Notes/Rawbackupdata/Steps/exportStepCount_2025-03-15_22-58-20',
|
||||||
)
|
)
|
||||||
PATH_STEPMANIA = Path('output/stepmania.csv')
|
PATH_STEPMANIA = Path('output/stepmania.csv')
|
||||||
|
|
||||||
|
|
||||||
IMPORTERS = [
|
IMPORTERS = [
|
||||||
{'path': PATH_WORKOUT, 'import_rows': import_workout_csv},
|
{'path': PATH_WORKOUT, 'standard_variant': True, 'import_rows': import_workout_csv},
|
||||||
{'path': PATH_STEP_COUNTS, 'import_rows': import_step_counts_csv},
|
{'path': PATH_STEP_COUNTS, 'import_rows': import_step_counts_csv},
|
||||||
{'path': PATH_STEPMANIA, 'import_rows': import_stepmania_steps_csv},
|
{
|
||||||
|
'path': PATH_STEPMANIA,
|
||||||
|
'standard_variant': True,
|
||||||
|
'import_rows': import_stepmania_steps_csv,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'path': PATH_PLAYED,
|
'path': PATH_PLAYED,
|
||||||
|
'standard_variant': True,
|
||||||
'import_rows': lambda vault, rows: import_activity_sample_csv(
|
'import_rows': lambda vault, rows: import_activity_sample_csv(
|
||||||
vault,
|
vault,
|
||||||
rows,
|
rows,
|
||||||
|
@ -281,6 +286,7 @@ IMPORTERS = [
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'path': PATH_WATCHED,
|
'path': PATH_WATCHED,
|
||||||
|
'standard_variant': True,
|
||||||
'import_rows': lambda vault, rows: import_activity_sample_csv(
|
'import_rows': lambda vault, rows: import_activity_sample_csv(
|
||||||
vault,
|
vault,
|
||||||
rows,
|
rows,
|
||||||
|
@ -301,7 +307,9 @@ def import_data(obsidian_path: Path, dry_run=True):
|
||||||
import_def['path'],
|
import_def['path'],
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
rows = load_csv_file(import_def['path'])
|
rows = load_csv_file(
|
||||||
|
import_def['path'], sniff=not import_def.get('standard_variant'),
|
||||||
|
)
|
||||||
logger.info('Loaded CSV with %d lines', len(rows))
|
logger.info('Loaded CSV with %d lines', len(rows))
|
||||||
num_files_updated = import_def['import_rows'](vault, rows)
|
num_files_updated = import_def['import_rows'](vault, rows)
|
||||||
logger.info('Updated %d files', num_files_updated)
|
logger.info('Updated %d files', num_files_updated)
|
||||||
|
|
|
@ -2,6 +2,7 @@ import csv
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
import decimal
|
import decimal
|
||||||
|
import logging
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
|
@ -11,6 +12,8 @@ from typing import Any
|
||||||
|
|
||||||
from frozendict import frozendict
|
from frozendict import frozendict
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
CSV_DIALECT = 'one_true_dialect'
|
CSV_DIALECT = 'one_true_dialect'
|
||||||
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||||
|
|
||||||
|
@ -86,10 +89,12 @@ def load_csv_file(csv_file: Path, sniff=False) -> list[frozendict[str, typing.An
|
||||||
dicts: list[frozendict] = []
|
dicts: list[frozendict] = []
|
||||||
with open(csv_file) as csvfile:
|
with open(csv_file) as csvfile:
|
||||||
if sniff:
|
if sniff:
|
||||||
dialect = csv.Sniffer().sniff(csvfile.read(1024))
|
logger.warning('Sniffing CSV variant: %s', csv_file)
|
||||||
|
dialect = csv.Sniffer().sniff(csvfile.read(1024), delimiters=',;')
|
||||||
csvfile.seek(0)
|
csvfile.seek(0)
|
||||||
else:
|
else:
|
||||||
dialect = CSV_DIALECT
|
dialect = CSV_DIALECT
|
||||||
|
logger.warning('Loading CSV file: %s', csv_file)
|
||||||
reader = csv.DictReader(csvfile, dialect=dialect)
|
reader = csv.DictReader(csvfile, dialect=dialect)
|
||||||
for row in reader:
|
for row in reader:
|
||||||
for k in list(row.keys()):
|
for k in list(row.keys()):
|
||||||
|
|
|
@ -22,7 +22,8 @@ def scrape(watch_history: bool) -> list[dict[str, str]]:
|
||||||
'yt-dlp',
|
'yt-dlp',
|
||||||
url,
|
url,
|
||||||
'--dump-json',
|
'--dump-json',
|
||||||
'--cookies-from-browser', 'firefox:/home/jmaa/.cachy/mbui5xg7.default-release',
|
'--cookies-from-browser',
|
||||||
|
'firefox:/home/jmaa/.cachy/mbui5xg7.default-release',
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
url = f'https://www.youtube.com/playlist?list={PLAYLIST_ID}'
|
url = f'https://www.youtube.com/playlist?list={PLAYLIST_ID}'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user