1
0
datagraph/schemeld.py

108 lines
3.3 KiB
Python
Raw Normal View History

2022-12-20 20:16:31 +00:00
2023-03-06 22:41:49 +00:00
import logging
2022-12-20 20:16:31 +00:00
import urllib.parse
2023-03-06 22:41:49 +00:00
import wikidata.entity
import datetime
from dataclasses import dataclass
from enforce_typing import enforce_types
from typing import List, Set, Optional, Union
from enum import Enum
STRICT_VALIDATION = True
2022-12-20 20:16:31 +00:00
2023-09-17 10:09:17 +00:00
def canonical_keys(base_key, context):
if isinstance(base_key, urllib.parse.ParseResult):
return [base_key]
if not isinstance(base_key, str):
return [base_key]
elif base_key.startswith('@'):
return [base_key]
if context is None:
return [base_key]
return [context._replace(path = base_key), base_key]
2022-12-20 20:16:31 +00:00
2023-09-17 10:09:17 +00:00
class Concept(object):
2022-12-20 20:16:31 +00:00
2023-09-17 10:09:17 +00:00
def __init__(self, context, pairs):
self.pairs = []
for k, v in pairs.items():
keys = canonical_keys(k, context)
self.pairs.append({'canonical_key': keys[0], 'keys': set(keys), 'values': v})
self.regenerate_by_keys()
def regenerate_by_keys(self):
self.by_keys = {k: pair for pair in self.pairs for k in pair['keys']}
def __copy__(self):
new = Concept(None, {})
for p in self.pairs:
new.pairs.append({'canonical_key': p['canonical_key'], 'keys': set(p['keys']), 'values': p['values']})
new.regenerate_by_keys()
return new
def get(self, key, default = None):
pairs = self.by_keys.get(key, None)
return pairs['values'] if pairs is not None else default
def getlist(self, key):
result = self.get(key)
if result is None:
return []
assert isinstance(result, list), 'Not a list: ' + str(result)
return [r['value'] for r in result]
2022-12-20 20:16:31 +00:00
2023-03-06 22:41:49 +00:00
def keys(self):
2023-09-17 10:09:17 +00:00
for pair in self.pairs:
yield pair['canonical_key']
2023-03-06 22:41:49 +00:00
def setdefault(self, key, value):
2023-09-17 10:09:17 +00:00
if key not in self.by_keys:
self[key] = value
return self.by_keys[key]['values']
2023-03-06 22:41:49 +00:00
def to_dict(self):
2023-09-17 10:09:17 +00:00
return {p['canonical_key']:p['values'] for p in self.pairs}
2023-03-06 22:41:49 +00:00
2022-12-20 20:16:31 +00:00
def __getitem__(self, key):
2023-09-17 10:09:17 +00:00
return self.by_keys[key]['values']
2022-12-20 20:16:31 +00:00
def __setitem__(self, key, value):
2023-03-06 22:41:49 +00:00
if STRICT_VALIDATION:
if not isinstance(key, str) or key != '@id':
assert isinstance(value, list), value
for v in value:
assert isinstance(v, dict), value
assert 'value' in v, value
for subk in v:
assert not isinstance(v[subk], list), value
2023-09-17 10:09:17 +00:00
if key in self.by_keys:
self.by_keys[key]['values'] = value
else:
pair = {'canonical_key': key, 'keys': {key}, 'values': value}
self.pairs.append(pair)
self.by_keys[key] = pair
2022-12-20 20:16:31 +00:00
def __contains__(self, key):
2023-09-17 10:09:17 +00:00
return key in self.by_keys
2022-12-20 20:16:31 +00:00
def __delitem__(self, key):
2023-09-17 10:09:17 +00:00
self.pairs.remove(self.by_keys[key])
del self.by_keys[key]
2023-03-06 22:41:49 +00:00
def __repr__(self):
2023-09-17 10:09:17 +00:00
if id := self.by_keys.get('@id'):
return 'Concept {{ @id = {} }}'.format(id['values'])
2023-03-06 22:41:49 +00:00
2023-09-17 10:09:17 +00:00
return 'Concept '+str({p['canonical_key']:p['values'] for p in self.pairs})
2023-03-06 22:41:49 +00:00
def __str__(self):
return repr(self)
2022-12-20 20:16:31 +00:00
2023-09-17 10:09:17 +00:00
def set_canonical_key(self, new_canonical_key, key = None):
if key is None:
key = new_canonical_key
self.by_keys[key]['canonical_key'] = new_canonical_key