import urllib.parse class Concept(object): def __init__(self, context, data): self.context = context self.data = {self.canonical_key(k):v for (k, v) in data.items()} def get(self, key, *args, **kwargs): return self.data.get(self.canonical_key(key, *args, **kwargs)) def __getitem__(self, key): return self.data[self.canonical_key(key)] def __setitem__(self, key, value): self.data[self.canonical_key(key)] = value def __contains__(self, key): return self.canonical_key(key) in self.data def __delitem__(self, key): del self.data[self.canonical_key(key)] def canonical_key(self, key): if not isinstance(key, str): return key elif key.startswith('@'): return key elif key.startswith(self.context): return key return self.context + key def determine_concepts_internal(json, context, outputs): if isinstance(json, list): for m in json: determine_concepts_internal(m, context, outputs) return assert isinstance(json, dict) context = json.get('@context', context) assert urllib.parse.urlparse(context).netloc == 'schema.org' if '@graph' in json: determine_concepts_internal(json['@graph'], context, outputs) else: outputs.append(Concept(context, json)) def determine_concepts(json): concepts = [] determine_concepts_internal(json, '', concepts) return concepts