1
0
This repository has been archived on 2024-10-13. You can view files and clone it, but cannot push or open issues or pull requests.
git-time-tracker/git_time_tracker/source/csv_file.py

116 lines
3.5 KiB
Python
Raw Normal View History

2024-08-27 19:05:08 +00:00
import datetime
import urllib.parse
2024-09-26 22:03:41 +00:00
from typing import Any
2024-09-20 22:32:19 +00:00
from collections.abc import Iterator
from decimal import Decimal
2024-08-27 19:05:08 +00:00
from pathlib import Path
2024-09-26 22:03:41 +00:00
import dataclasses
2024-08-27 19:05:08 +00:00
from personal_data.util import load_csv_file
from ..data import WorkSample
2024-09-26 22:03:41 +00:00
@dataclasses.dataclass
class PossibleKeys:
time_start: list[str]
time_end: list[str]
duration: list[str]
name: list[str]
image: list[str]
misc: list[str]
2024-09-20 22:32:19 +00:00
2024-09-26 22:03:41 +00:00
def determine_possible_keys(event_data: dict[str, Any]) -> PossibleKeys:
# Select data
time_keys = [
k for k, v in event_data.items() if isinstance(v, datetime.date)
]
duration_keys = [
k
for k, v in event_data.items()
if isinstance(v, Decimal) and 'duration_seconds' in k
]
name_keys = [k for k, v in event_data.items() if isinstance(v, str)]
image_keys = [
k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult)
]
misc_keys = list(event_data.keys())
for k in image_keys:
if k in misc_keys:
misc_keys.remove(k)
del k
for k in time_keys:
if k in misc_keys:
misc_keys.remove(k)
del k
time_start_keys = [k for k in time_keys if 'start' in k.lower() ]
time_end_keys = [k for k in time_keys if 'end' in k.lower() or 'stop' in k.lower() ]
return PossibleKeys(
time_start = time_start_keys,
time_end = time_end_keys,
duration = duration_keys,
name = name_keys,
image = image_keys,
misc = misc_keys,
)
def start_end(sample: dict[str,Any], keys: PossibleKeys) -> tuple[datetime.datetime | None, datetime.datetime | None]:
if keys.time_start and keys.time_end:
return (sample[keys.time_start[0]], sample[keys.time_end[0]])
if keys.time_start and keys.duration:
start = sample[keys.time_start[0]]
duration = datetime.timedelta(seconds=float(sample[keys.duration[0]]))
return (start, start + duration)
if keys.time_start:
start = sample[keys.time_start[0]]
return (start, None)
if keys.time_end:
return (None, sample[keys.time_end[0]])
return (None, None)
def iterate_samples_from_dicts(rows: list[dict[str,Any]]) -> Iterator[WorkSample]:
assert len(rows) > 0
2024-08-27 19:05:08 +00:00
max_title_parts = 2
2024-09-08 18:29:45 +00:00
2024-09-26 22:03:41 +00:00
if True:
event_data = rows[len(rows)//2] # Hopefully select a useful representative.
possible_keys = determine_possible_keys(event_data)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
for event_data in rows:
'''
2024-08-27 19:05:08 +00:00
title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts])
2024-09-20 22:32:19 +00:00
description = '\n\n'.join(
event_data[k] for k in possible_name_keys[max_title_parts:]
)
2024-09-26 22:03:41 +00:00
image = event_data[possible_keys.image[0]] if possible_keys.image else None
'''
2024-08-27 19:05:08 +00:00
2024-09-26 22:03:41 +00:00
(start_at, end_at) = start_end(event_data, possible_keys)
labels = [f'{k}:{event_data[k]}' for k in possible_keys.misc]
2024-08-27 19:05:08 +00:00
# Create event
yield WorkSample(
labels=tuple(labels),
2024-09-08 18:29:45 +00:00
start_at=start_at,
end_at=end_at,
2024-08-27 19:05:08 +00:00
)
del event_data
2024-09-20 22:32:19 +00:00
2024-08-27 19:05:08 +00:00
def iterate_samples_from_csv_file(file_path: Path) -> Iterator[WorkSample]:
dicts = load_csv_file(file_path)
2024-09-26 22:03:41 +00:00
samples = list(iterate_samples_from_dicts(dicts))
assert len(samples) > 0, 'Did not found any samples'
yield from samples