1
0

Ruff check fix

This commit is contained in:
Jon Michael Aanes 2024-07-08 18:53:33 +02:00
parent dfd2cf35ed
commit d7ad42890b
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
6 changed files with 35 additions and 43 deletions

View File

@ -11,14 +11,10 @@ __all__ = [
'wikidata_ext',
]
import sys
import os.path
import sys
sys.path.append(os.path.join(os.path.dirname(__file__)))
from . import format, parse, schemeld, wikidata_ext
from ._version import __version__
from . import format
from . import parse
from . import schemeld
from . import wikidata_ext

View File

@ -1,15 +1,17 @@
import datagraph.schemeld
import urllib.parse
import wikidata.entity
import datetime
import logging
import urllib.parse
import wikidata.entity
import datagraph.schemeld
REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'}
def fmt_value(c, prefer_reference = False):
if isinstance(c, str):
return '"{}"'.format(c) # TODO: Escape
return f'"{c}"' # TODO: Escape
elif isinstance(c, datagraph.schemeld.Concept):
if '@id' in c:
return fmt_value(c['@id'], prefer_reference)
@ -19,16 +21,16 @@ def fmt_value(c, prefer_reference = False):
elif isinstance(c, wikidata.entity.Entity):
s = c.id
if isinstance(s, int):
s = 'P{}'.format(s)
s = f'P{s}'
if s in REFERENCE_PROPERTIES:
s = s.replace('P', 'S', 1)
return s
elif isinstance(c, urllib.parse.ParseResult):
return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference)
elif isinstance(c, datetime.datetime):
return '+{}/11'.format(c.isoformat())
return f'+{c.isoformat()}/11'
elif isinstance(c, datetime.date):
return '+{}T00:00:00Z/11'.format(c.isoformat())
return f'+{c.isoformat()}T00:00:00Z/11'
return str(c)
@ -42,7 +44,7 @@ def fmt_predicate(pred, object):
elif pred.path == '/description':
return 'D'+lang
elif pred.path == '/sameAs':
return 'S{}wiki'.format(lang)
return f'S{lang}wiki'
else:
assert False, pred
return fmt_value(pred, prefer_reference = True)
@ -105,7 +107,7 @@ def to_quickstatements_v1(concepts):
for concept in concepts:
to_quickstatements_v1_item(concept, lines)
logging.info("Produced %s statements for %s concepts", len(lines), len(concepts))
logging.info('Produced %s statements for %s concepts', len(lines), len(concepts))
commands = '\n'.join(['\t'.join(l) for l in lines])
assert '\tNone\t' not in commands, 'TODO'

View File

@ -1,7 +1,9 @@
import json
import urllib
import schemeld
import urllib
import json
def determine_concepts_internal(json, context, outputs):
if isinstance(json, list):
@ -25,7 +27,7 @@ def determine_concepts(json):
def determine_concepts_in_soup(soup):
# TODO: Check type
ld_json_elements = soup.find_all('script', type="application/ld+json")
ld_json_elements = soup.find_all('script', type='application/ld+json')
concepts = []
for e in ld_json_elements:
json_data = json.loads(e.string)

View File

@ -1,11 +1,5 @@
import logging
import urllib.parse
import wikidata.entity
import datetime
from dataclasses import dataclass
from typing import List, Set, Optional, Union
from enum import Enum
STRICT_VALIDATION = True
@ -20,7 +14,7 @@ def canonical_keys(base_key, context):
return [base_key]
return [context._replace(path = base_key), base_key]
class Concept(object):
class Concept:
def __init__(self, context, pairs):
self.pairs = []

View File

@ -1,27 +1,27 @@
import logging
import urllib.parse
import ratelimit
import urllib.parse
import wikidata.entity
import requests
import json
import logging
import wikidata.entity
REQUEST_SESSION = None # TODO?
def concept_uri(obj):
assert isinstance(obj, wikidata.entity.Entity), obj
if obj.id.startswith('P'):
return urllib.parse.urlparse('http://www.wikidata.org/prop/direct/{}'.format(obj.id))
return urllib.parse.urlparse(f'http://www.wikidata.org/prop/direct/{obj.id}')
elif obj.id.startswith('Q'):
return urllib.parse.urlparse('http://www.wikidata.org/entity/{}'.format(obj.id))
return urllib.parse.urlparse(f'http://www.wikidata.org/entity/{obj.id}')
else:
assert False, "TODO: " + ojb.id
assert False, 'TODO: ' + ojb.id
def fmt_triple_value(obj, prefer_obj = False):
if obj is None:
return ''
if isinstance(obj, str):
return '"{}"'.format(obj)
return f'"{obj}"'
elif isinstance(obj, urllib.parse.ParseResult):
return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl())
elif isinstance(obj, wikidata.entity.Entity):
@ -40,8 +40,8 @@ def fetch_by_url(url, headers):
return None
return response
ITEMS_PER_PAGE = "http://www.w3.org/ns/hydra/core#itemsPerPage"
TOTAL_ITEMS = "http://www.w3.org/ns/hydra/core#totalItems"
ITEMS_PER_PAGE = 'http://www.w3.org/ns/hydra/core#itemsPerPage'
TOTAL_ITEMS = 'http://www.w3.org/ns/hydra/core#totalItems'
def fmt_params(subject, predicate, object):
derp = [x for x in [subject, predicate, object] if x]
@ -55,9 +55,8 @@ def fmt_params(subject, predicate, object):
return params
def get_triples_count(subject = None, predicate = None, object = None):
'''
Fetches first page in order to determine amount of items.
'''
"""Fetches first page in order to determine amount of items.
"""
params = fmt_params(subject, predicate, object)
url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url
response = fetch_by_url(url, headers = {'accept': 'application/ld+json'})

View File

@ -1,10 +1,9 @@
import requests_cache
import wikidata.client
import datagraph.schemeld
import datagraph.wikidata_ext
import requests_cache
datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing')
def test_version():
@ -14,12 +13,12 @@ def test_get_triples():
client = wikidata.client.Client()
EQV_PROPERTY = client.get('P1628')
schema_root = "https://schema.org/"
schema_prop = "image"
schema_root = 'https://schema.org/'
schema_prop = 'image'
triples_iter = datagraph.wikidata_ext.get_triples(
client = client,
predicate = EQV_PROPERTY,
object = "{}{}".format(schema_root, schema_prop),
object = f'{schema_root}{schema_prop}',
)
assert triples_iter is not None