52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
|
|
||
|
import urllib.parse
|
||
|
|
||
|
class Concept(object):
|
||
|
|
||
|
def __init__(self, context, data):
|
||
|
self.context = context
|
||
|
self.data = {self.canonical_key(k):v for (k, v) in data.items()}
|
||
|
|
||
|
def get(self, key, *args, **kwargs):
|
||
|
return self.data.get(self.canonical_key(key, *args, **kwargs))
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
return self.data[self.canonical_key(key)]
|
||
|
|
||
|
def __setitem__(self, key, value):
|
||
|
self.data[self.canonical_key(key)] = value
|
||
|
|
||
|
def __contains__(self, key):
|
||
|
return self.canonical_key(key) in self.data
|
||
|
|
||
|
def __delitem__(self, key):
|
||
|
del self.data[self.canonical_key(key)]
|
||
|
|
||
|
def canonical_key(self, key):
|
||
|
if not isinstance(key, str):
|
||
|
return key
|
||
|
elif key.startswith('@'):
|
||
|
return key
|
||
|
elif key.startswith(self.context):
|
||
|
return key
|
||
|
return self.context + key
|
||
|
|
||
|
def determine_concepts_internal(json, context, outputs):
|
||
|
if isinstance(json, list):
|
||
|
for m in json:
|
||
|
determine_concepts_internal(m, context, outputs)
|
||
|
return
|
||
|
|
||
|
assert isinstance(json, dict)
|
||
|
context = json.get('@context', context)
|
||
|
assert urllib.parse.urlparse(context).netloc == 'schema.org'
|
||
|
if '@graph' in json:
|
||
|
determine_concepts_internal(json['@graph'], context, outputs)
|
||
|
else:
|
||
|
outputs.append(Concept(context, json))
|
||
|
|
||
|
def determine_concepts(json):
|
||
|
concepts = []
|
||
|
determine_concepts_internal(json, '', concepts)
|
||
|
return concepts
|