Ruff check fix
This commit is contained in:
parent
dfd2cf35ed
commit
d7ad42890b
|
@ -11,14 +11,10 @@ __all__ = [
|
||||||
'wikidata_ext',
|
'wikidata_ext',
|
||||||
]
|
]
|
||||||
|
|
||||||
import sys
|
|
||||||
import os.path
|
import os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.path.dirname(__file__)))
|
sys.path.append(os.path.join(os.path.dirname(__file__)))
|
||||||
|
|
||||||
|
from . import format, parse, schemeld, wikidata_ext
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
from . import format
|
|
||||||
from . import parse
|
|
||||||
from . import schemeld
|
|
||||||
from . import wikidata_ext
|
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
|
|
||||||
import datagraph.schemeld
|
|
||||||
import urllib.parse
|
|
||||||
import wikidata.entity
|
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
import wikidata.entity
|
||||||
|
|
||||||
|
import datagraph.schemeld
|
||||||
|
|
||||||
REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'}
|
REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'}
|
||||||
|
|
||||||
def fmt_value(c, prefer_reference = False):
|
def fmt_value(c, prefer_reference = False):
|
||||||
if isinstance(c, str):
|
if isinstance(c, str):
|
||||||
return '"{}"'.format(c) # TODO: Escape
|
return f'"{c}"' # TODO: Escape
|
||||||
elif isinstance(c, datagraph.schemeld.Concept):
|
elif isinstance(c, datagraph.schemeld.Concept):
|
||||||
if '@id' in c:
|
if '@id' in c:
|
||||||
return fmt_value(c['@id'], prefer_reference)
|
return fmt_value(c['@id'], prefer_reference)
|
||||||
|
@ -19,16 +21,16 @@ def fmt_value(c, prefer_reference = False):
|
||||||
elif isinstance(c, wikidata.entity.Entity):
|
elif isinstance(c, wikidata.entity.Entity):
|
||||||
s = c.id
|
s = c.id
|
||||||
if isinstance(s, int):
|
if isinstance(s, int):
|
||||||
s = 'P{}'.format(s)
|
s = f'P{s}'
|
||||||
if s in REFERENCE_PROPERTIES:
|
if s in REFERENCE_PROPERTIES:
|
||||||
s = s.replace('P', 'S', 1)
|
s = s.replace('P', 'S', 1)
|
||||||
return s
|
return s
|
||||||
elif isinstance(c, urllib.parse.ParseResult):
|
elif isinstance(c, urllib.parse.ParseResult):
|
||||||
return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference)
|
return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference)
|
||||||
elif isinstance(c, datetime.datetime):
|
elif isinstance(c, datetime.datetime):
|
||||||
return '+{}/11'.format(c.isoformat())
|
return f'+{c.isoformat()}/11'
|
||||||
elif isinstance(c, datetime.date):
|
elif isinstance(c, datetime.date):
|
||||||
return '+{}T00:00:00Z/11'.format(c.isoformat())
|
return f'+{c.isoformat()}T00:00:00Z/11'
|
||||||
|
|
||||||
return str(c)
|
return str(c)
|
||||||
|
|
||||||
|
@ -42,7 +44,7 @@ def fmt_predicate(pred, object):
|
||||||
elif pred.path == '/description':
|
elif pred.path == '/description':
|
||||||
return 'D'+lang
|
return 'D'+lang
|
||||||
elif pred.path == '/sameAs':
|
elif pred.path == '/sameAs':
|
||||||
return 'S{}wiki'.format(lang)
|
return f'S{lang}wiki'
|
||||||
else:
|
else:
|
||||||
assert False, pred
|
assert False, pred
|
||||||
return fmt_value(pred, prefer_reference = True)
|
return fmt_value(pred, prefer_reference = True)
|
||||||
|
@ -105,7 +107,7 @@ def to_quickstatements_v1(concepts):
|
||||||
for concept in concepts:
|
for concept in concepts:
|
||||||
to_quickstatements_v1_item(concept, lines)
|
to_quickstatements_v1_item(concept, lines)
|
||||||
|
|
||||||
logging.info("Produced %s statements for %s concepts", len(lines), len(concepts))
|
logging.info('Produced %s statements for %s concepts', len(lines), len(concepts))
|
||||||
commands = '\n'.join(['\t'.join(l) for l in lines])
|
commands = '\n'.join(['\t'.join(l) for l in lines])
|
||||||
|
|
||||||
assert '\tNone\t' not in commands, 'TODO'
|
assert '\tNone\t' not in commands, 'TODO'
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
|
|
||||||
|
import json
|
||||||
|
import urllib
|
||||||
|
|
||||||
import schemeld
|
import schemeld
|
||||||
import urllib
|
|
||||||
import json
|
|
||||||
|
|
||||||
def determine_concepts_internal(json, context, outputs):
|
def determine_concepts_internal(json, context, outputs):
|
||||||
if isinstance(json, list):
|
if isinstance(json, list):
|
||||||
|
@ -25,7 +27,7 @@ def determine_concepts(json):
|
||||||
|
|
||||||
def determine_concepts_in_soup(soup):
|
def determine_concepts_in_soup(soup):
|
||||||
# TODO: Check type
|
# TODO: Check type
|
||||||
ld_json_elements = soup.find_all('script', type="application/ld+json")
|
ld_json_elements = soup.find_all('script', type='application/ld+json')
|
||||||
concepts = []
|
concepts = []
|
||||||
for e in ld_json_elements:
|
for e in ld_json_elements:
|
||||||
json_data = json.loads(e.string)
|
json_data = json.loads(e.string)
|
||||||
|
|
|
@ -1,11 +1,5 @@
|
||||||
|
|
||||||
import logging
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import wikidata.entity
|
|
||||||
import datetime
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import List, Set, Optional, Union
|
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
STRICT_VALIDATION = True
|
STRICT_VALIDATION = True
|
||||||
|
|
||||||
|
@ -20,7 +14,7 @@ def canonical_keys(base_key, context):
|
||||||
return [base_key]
|
return [base_key]
|
||||||
return [context._replace(path = base_key), base_key]
|
return [context._replace(path = base_key), base_key]
|
||||||
|
|
||||||
class Concept(object):
|
class Concept:
|
||||||
|
|
||||||
def __init__(self, context, pairs):
|
def __init__(self, context, pairs):
|
||||||
self.pairs = []
|
self.pairs = []
|
||||||
|
|
|
@ -1,27 +1,27 @@
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
import ratelimit
|
import ratelimit
|
||||||
import urllib.parse
|
|
||||||
import wikidata.entity
|
|
||||||
import requests
|
import requests
|
||||||
import json
|
import wikidata.entity
|
||||||
import logging
|
|
||||||
|
|
||||||
REQUEST_SESSION = None # TODO?
|
REQUEST_SESSION = None # TODO?
|
||||||
|
|
||||||
def concept_uri(obj):
|
def concept_uri(obj):
|
||||||
assert isinstance(obj, wikidata.entity.Entity), obj
|
assert isinstance(obj, wikidata.entity.Entity), obj
|
||||||
if obj.id.startswith('P'):
|
if obj.id.startswith('P'):
|
||||||
return urllib.parse.urlparse('http://www.wikidata.org/prop/direct/{}'.format(obj.id))
|
return urllib.parse.urlparse(f'http://www.wikidata.org/prop/direct/{obj.id}')
|
||||||
elif obj.id.startswith('Q'):
|
elif obj.id.startswith('Q'):
|
||||||
return urllib.parse.urlparse('http://www.wikidata.org/entity/{}'.format(obj.id))
|
return urllib.parse.urlparse(f'http://www.wikidata.org/entity/{obj.id}')
|
||||||
else:
|
else:
|
||||||
assert False, "TODO: " + ojb.id
|
assert False, 'TODO: ' + ojb.id
|
||||||
|
|
||||||
def fmt_triple_value(obj, prefer_obj = False):
|
def fmt_triple_value(obj, prefer_obj = False):
|
||||||
if obj is None:
|
if obj is None:
|
||||||
return ''
|
return ''
|
||||||
if isinstance(obj, str):
|
if isinstance(obj, str):
|
||||||
return '"{}"'.format(obj)
|
return f'"{obj}"'
|
||||||
elif isinstance(obj, urllib.parse.ParseResult):
|
elif isinstance(obj, urllib.parse.ParseResult):
|
||||||
return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl())
|
return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl())
|
||||||
elif isinstance(obj, wikidata.entity.Entity):
|
elif isinstance(obj, wikidata.entity.Entity):
|
||||||
|
@ -40,8 +40,8 @@ def fetch_by_url(url, headers):
|
||||||
return None
|
return None
|
||||||
return response
|
return response
|
||||||
|
|
||||||
ITEMS_PER_PAGE = "http://www.w3.org/ns/hydra/core#itemsPerPage"
|
ITEMS_PER_PAGE = 'http://www.w3.org/ns/hydra/core#itemsPerPage'
|
||||||
TOTAL_ITEMS = "http://www.w3.org/ns/hydra/core#totalItems"
|
TOTAL_ITEMS = 'http://www.w3.org/ns/hydra/core#totalItems'
|
||||||
|
|
||||||
def fmt_params(subject, predicate, object):
|
def fmt_params(subject, predicate, object):
|
||||||
derp = [x for x in [subject, predicate, object] if x]
|
derp = [x for x in [subject, predicate, object] if x]
|
||||||
|
@ -55,9 +55,8 @@ def fmt_params(subject, predicate, object):
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def get_triples_count(subject = None, predicate = None, object = None):
|
def get_triples_count(subject = None, predicate = None, object = None):
|
||||||
'''
|
"""Fetches first page in order to determine amount of items.
|
||||||
Fetches first page in order to determine amount of items.
|
"""
|
||||||
'''
|
|
||||||
params = fmt_params(subject, predicate, object)
|
params = fmt_params(subject, predicate, object)
|
||||||
url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url
|
url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url
|
||||||
response = fetch_by_url(url, headers = {'accept': 'application/ld+json'})
|
response = fetch_by_url(url, headers = {'accept': 'application/ld+json'})
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
|
import requests_cache
|
||||||
import wikidata.client
|
import wikidata.client
|
||||||
|
|
||||||
import datagraph.schemeld
|
import datagraph.schemeld
|
||||||
import datagraph.wikidata_ext
|
import datagraph.wikidata_ext
|
||||||
|
|
||||||
import requests_cache
|
|
||||||
|
|
||||||
datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing')
|
datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing')
|
||||||
|
|
||||||
def test_version():
|
def test_version():
|
||||||
|
@ -14,12 +13,12 @@ def test_get_triples():
|
||||||
client = wikidata.client.Client()
|
client = wikidata.client.Client()
|
||||||
|
|
||||||
EQV_PROPERTY = client.get('P1628')
|
EQV_PROPERTY = client.get('P1628')
|
||||||
schema_root = "https://schema.org/"
|
schema_root = 'https://schema.org/'
|
||||||
schema_prop = "image"
|
schema_prop = 'image'
|
||||||
|
|
||||||
triples_iter = datagraph.wikidata_ext.get_triples(
|
triples_iter = datagraph.wikidata_ext.get_triples(
|
||||||
client = client,
|
client = client,
|
||||||
predicate = EQV_PROPERTY,
|
predicate = EQV_PROPERTY,
|
||||||
object = "{}{}".format(schema_root, schema_prop),
|
object = f'{schema_root}{schema_prop}',
|
||||||
)
|
)
|
||||||
assert triples_iter is not None
|
assert triples_iter is not None
|
||||||
|
|
Loading…
Reference in New Issue
Block a user