1
0
datagraph/schemeld.py

52 lines
1.5 KiB
Python
Raw Normal View History

2022-12-20 20:16:31 +00:00
import urllib.parse
class Concept(object):
def __init__(self, context, data):
self.context = context
self.data = {self.canonical_key(k):v for (k, v) in data.items()}
def get(self, key, *args, **kwargs):
return self.data.get(self.canonical_key(key, *args, **kwargs))
def __getitem__(self, key):
return self.data[self.canonical_key(key)]
def __setitem__(self, key, value):
self.data[self.canonical_key(key)] = value
def __contains__(self, key):
return self.canonical_key(key) in self.data
def __delitem__(self, key):
del self.data[self.canonical_key(key)]
def canonical_key(self, key):
if not isinstance(key, str):
return key
elif key.startswith('@'):
return key
elif key.startswith(self.context):
return key
return self.context + key
def determine_concepts_internal(json, context, outputs):
if isinstance(json, list):
for m in json:
determine_concepts_internal(m, context, outputs)
return
assert isinstance(json, dict)
context = json.get('@context', context)
assert urllib.parse.urlparse(context).netloc == 'schema.org'
if '@graph' in json:
determine_concepts_internal(json['@graph'], context, outputs)
else:
outputs.append(Concept(context, json))
def determine_concepts(json):
concepts = []
determine_concepts_internal(json, '', concepts)
return concepts