1
0

Working on datagraph capabilities

This commit is contained in:
Jon Michael Aanes 2022-12-20 21:16:31 +01:00
commit 94fa9f10b9
2 changed files with 77 additions and 0 deletions

51
schemeld.py Normal file
View File

@ -0,0 +1,51 @@
import urllib.parse
class Concept(object):
def __init__(self, context, data):
self.context = context
self.data = {self.canonical_key(k):v for (k, v) in data.items()}
def get(self, key, *args, **kwargs):
return self.data.get(self.canonical_key(key, *args, **kwargs))
def __getitem__(self, key):
return self.data[self.canonical_key(key)]
def __setitem__(self, key, value):
self.data[self.canonical_key(key)] = value
def __contains__(self, key):
return self.canonical_key(key) in self.data
def __delitem__(self, key):
del self.data[self.canonical_key(key)]
def canonical_key(self, key):
if not isinstance(key, str):
return key
elif key.startswith('@'):
return key
elif key.startswith(self.context):
return key
return self.context + key
def determine_concepts_internal(json, context, outputs):
if isinstance(json, list):
for m in json:
determine_concepts_internal(m, context, outputs)
return
assert isinstance(json, dict)
context = json.get('@context', context)
assert urllib.parse.urlparse(context).netloc == 'schema.org'
if '@graph' in json:
determine_concepts_internal(json['@graph'], context, outputs)
else:
outputs.append(Concept(context, json))
def determine_concepts(json):
concepts = []
determine_concepts_internal(json, '', concepts)
return concepts

26
wikidata.py Normal file
View File

@ -0,0 +1,26 @@
def get_triples(client, subject = None, predicate = None, object = None):
time.sleep(1)
params = {
'subject': fmt_triple_value(subject),
'predicate': fmt_triple_value(predicate),
'object': fmt_triple_value(object),
'page': 1,
}
headers = {'accept': 'application/ld+json'}
result = requests.get('https://query.wikidata.org/bigdata/ldf',
params = params,
headers = headers,
)
triples = []
if result.status_code != 200:
logging.error('Got %s error message: %s', result.status_code, repr((subject, predicate, object)))
return []
for item in result.json()['@graph']:
if item['@id'].startswith('wd:') and predicate.id in item:
s = item['@id'][3:]
triples.append((client.get(s, load = False), predicate, object))
return triples