commit 94fa9f10b9c2069d010b5dc0ce9ec24db5954508 Author: Jon Michael Aanes Date: Tue Dec 20 21:16:31 2022 +0100 Working on datagraph capabilities diff --git a/schemeld.py b/schemeld.py new file mode 100644 index 0000000..d935248 --- /dev/null +++ b/schemeld.py @@ -0,0 +1,51 @@ + +import urllib.parse + +class Concept(object): + + def __init__(self, context, data): + self.context = context + self.data = {self.canonical_key(k):v for (k, v) in data.items()} + + def get(self, key, *args, **kwargs): + return self.data.get(self.canonical_key(key, *args, **kwargs)) + + def __getitem__(self, key): + return self.data[self.canonical_key(key)] + + def __setitem__(self, key, value): + self.data[self.canonical_key(key)] = value + + def __contains__(self, key): + return self.canonical_key(key) in self.data + + def __delitem__(self, key): + del self.data[self.canonical_key(key)] + + def canonical_key(self, key): + if not isinstance(key, str): + return key + elif key.startswith('@'): + return key + elif key.startswith(self.context): + return key + return self.context + key + +def determine_concepts_internal(json, context, outputs): + if isinstance(json, list): + for m in json: + determine_concepts_internal(m, context, outputs) + return + + assert isinstance(json, dict) + context = json.get('@context', context) + assert urllib.parse.urlparse(context).netloc == 'schema.org' + if '@graph' in json: + determine_concepts_internal(json['@graph'], context, outputs) + else: + outputs.append(Concept(context, json)) + +def determine_concepts(json): + concepts = [] + determine_concepts_internal(json, '', concepts) + return concepts diff --git a/wikidata.py b/wikidata.py new file mode 100644 index 0000000..b01d4a9 --- /dev/null +++ b/wikidata.py @@ -0,0 +1,26 @@ + +def get_triples(client, subject = None, predicate = None, object = None): + time.sleep(1) + params = { + 'subject': fmt_triple_value(subject), + 'predicate': fmt_triple_value(predicate), + 'object': fmt_triple_value(object), + 'page': 1, + } + headers = {'accept': 'application/ld+json'} + result = requests.get('https://query.wikidata.org/bigdata/ldf', + params = params, + headers = headers, + ) + + triples = [] + if result.status_code != 200: + logging.error('Got %s error message: %s', result.status_code, repr((subject, predicate, object))) + return [] + for item in result.json()['@graph']: + if item['@id'].startswith('wd:') and predicate.id in item: + s = item['@id'][3:] + triples.append((client.get(s, load = False), predicate, object)) + + return triples +