import argparse from collections.abc import Iterator import datetime import urllib.parse from pathlib import Path from personal_data.util import load_csv_file from ..data import WorkSample def iterate_samples_from_dicts(rows: list[dict]) -> Iterator[WorkSample]: max_title_parts = 2 for event_data in rows: # Select data possible_time_keys = [ k for k, v in event_data.items() if isinstance(v, datetime.date) ] possible_name_keys = [k for k, v in event_data.items() if isinstance(v, str)] possible_image_keys = [ k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult) ] possible_misc_keys = list(event_data.keys()) for k in possible_image_keys: if k in possible_misc_keys: possible_misc_keys.remove(k) del k for k in possible_time_keys : if k in possible_misc_keys: possible_misc_keys.remove(k) del k date = event_data[possible_time_keys[0]] if possible_time_keys else None image = event_data[possible_image_keys[0]] if possible_image_keys else None if date is None: continue title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts]) description = '\n\n'.join(event_data[k] for k in possible_name_keys[max_title_parts:]) labels = [f'{k}:{event_data[k]}' for k in possible_misc_keys] # Create event yield WorkSample( labels=tuple(labels), start_at=None, end_at=date, ) del event_data def iterate_samples_from_csv_file(file_path: Path) -> Iterator[WorkSample]: dicts = load_csv_file(file_path) yield from iterate_samples_from_dicts(dicts)