1
0

Ruff check fix

This commit is contained in:
Jon Michael Aanes 2024-07-08 18:53:33 +02:00
parent dfd2cf35ed
commit d7ad42890b
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
6 changed files with 35 additions and 43 deletions

View File

@ -11,14 +11,10 @@ __all__ = [
'wikidata_ext', 'wikidata_ext',
] ]
import sys
import os.path import os.path
import sys
sys.path.append(os.path.join(os.path.dirname(__file__))) sys.path.append(os.path.join(os.path.dirname(__file__)))
from . import format, parse, schemeld, wikidata_ext
from ._version import __version__ from ._version import __version__
from . import format
from . import parse
from . import schemeld
from . import wikidata_ext

View File

@ -1,15 +1,17 @@
import datagraph.schemeld
import urllib.parse
import wikidata.entity
import datetime import datetime
import logging import logging
import urllib.parse
import wikidata.entity
import datagraph.schemeld
REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'} REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'}
def fmt_value(c, prefer_reference = False): def fmt_value(c, prefer_reference = False):
if isinstance(c, str): if isinstance(c, str):
return '"{}"'.format(c) # TODO: Escape return f'"{c}"' # TODO: Escape
elif isinstance(c, datagraph.schemeld.Concept): elif isinstance(c, datagraph.schemeld.Concept):
if '@id' in c: if '@id' in c:
return fmt_value(c['@id'], prefer_reference) return fmt_value(c['@id'], prefer_reference)
@ -19,16 +21,16 @@ def fmt_value(c, prefer_reference = False):
elif isinstance(c, wikidata.entity.Entity): elif isinstance(c, wikidata.entity.Entity):
s = c.id s = c.id
if isinstance(s, int): if isinstance(s, int):
s = 'P{}'.format(s) s = f'P{s}'
if s in REFERENCE_PROPERTIES: if s in REFERENCE_PROPERTIES:
s = s.replace('P', 'S', 1) s = s.replace('P', 'S', 1)
return s return s
elif isinstance(c, urllib.parse.ParseResult): elif isinstance(c, urllib.parse.ParseResult):
return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference) return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference)
elif isinstance(c, datetime.datetime): elif isinstance(c, datetime.datetime):
return '+{}/11'.format(c.isoformat()) return f'+{c.isoformat()}/11'
elif isinstance(c, datetime.date): elif isinstance(c, datetime.date):
return '+{}T00:00:00Z/11'.format(c.isoformat()) return f'+{c.isoformat()}T00:00:00Z/11'
return str(c) return str(c)
@ -42,7 +44,7 @@ def fmt_predicate(pred, object):
elif pred.path == '/description': elif pred.path == '/description':
return 'D'+lang return 'D'+lang
elif pred.path == '/sameAs': elif pred.path == '/sameAs':
return 'S{}wiki'.format(lang) return f'S{lang}wiki'
else: else:
assert False, pred assert False, pred
return fmt_value(pred, prefer_reference = True) return fmt_value(pred, prefer_reference = True)
@ -105,7 +107,7 @@ def to_quickstatements_v1(concepts):
for concept in concepts: for concept in concepts:
to_quickstatements_v1_item(concept, lines) to_quickstatements_v1_item(concept, lines)
logging.info("Produced %s statements for %s concepts", len(lines), len(concepts)) logging.info('Produced %s statements for %s concepts', len(lines), len(concepts))
commands = '\n'.join(['\t'.join(l) for l in lines]) commands = '\n'.join(['\t'.join(l) for l in lines])
assert '\tNone\t' not in commands, 'TODO' assert '\tNone\t' not in commands, 'TODO'

View File

@ -1,7 +1,9 @@
import json
import urllib
import schemeld import schemeld
import urllib
import json
def determine_concepts_internal(json, context, outputs): def determine_concepts_internal(json, context, outputs):
if isinstance(json, list): if isinstance(json, list):
@ -25,7 +27,7 @@ def determine_concepts(json):
def determine_concepts_in_soup(soup): def determine_concepts_in_soup(soup):
# TODO: Check type # TODO: Check type
ld_json_elements = soup.find_all('script', type="application/ld+json") ld_json_elements = soup.find_all('script', type='application/ld+json')
concepts = [] concepts = []
for e in ld_json_elements: for e in ld_json_elements:
json_data = json.loads(e.string) json_data = json.loads(e.string)

View File

@ -1,11 +1,5 @@
import logging
import urllib.parse import urllib.parse
import wikidata.entity
import datetime
from dataclasses import dataclass
from typing import List, Set, Optional, Union
from enum import Enum
STRICT_VALIDATION = True STRICT_VALIDATION = True
@ -20,7 +14,7 @@ def canonical_keys(base_key, context):
return [base_key] return [base_key]
return [context._replace(path = base_key), base_key] return [context._replace(path = base_key), base_key]
class Concept(object): class Concept:
def __init__(self, context, pairs): def __init__(self, context, pairs):
self.pairs = [] self.pairs = []

View File

@ -1,27 +1,27 @@
import logging
import urllib.parse
import ratelimit import ratelimit
import urllib.parse
import wikidata.entity
import requests import requests
import json import wikidata.entity
import logging
REQUEST_SESSION = None # TODO? REQUEST_SESSION = None # TODO?
def concept_uri(obj): def concept_uri(obj):
assert isinstance(obj, wikidata.entity.Entity), obj assert isinstance(obj, wikidata.entity.Entity), obj
if obj.id.startswith('P'): if obj.id.startswith('P'):
return urllib.parse.urlparse('http://www.wikidata.org/prop/direct/{}'.format(obj.id)) return urllib.parse.urlparse(f'http://www.wikidata.org/prop/direct/{obj.id}')
elif obj.id.startswith('Q'): elif obj.id.startswith('Q'):
return urllib.parse.urlparse('http://www.wikidata.org/entity/{}'.format(obj.id)) return urllib.parse.urlparse(f'http://www.wikidata.org/entity/{obj.id}')
else: else:
assert False, "TODO: " + ojb.id assert False, 'TODO: ' + ojb.id
def fmt_triple_value(obj, prefer_obj = False): def fmt_triple_value(obj, prefer_obj = False):
if obj is None: if obj is None:
return '' return ''
if isinstance(obj, str): if isinstance(obj, str):
return '"{}"'.format(obj) return f'"{obj}"'
elif isinstance(obj, urllib.parse.ParseResult): elif isinstance(obj, urllib.parse.ParseResult):
return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl()) return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl())
elif isinstance(obj, wikidata.entity.Entity): elif isinstance(obj, wikidata.entity.Entity):
@ -40,8 +40,8 @@ def fetch_by_url(url, headers):
return None return None
return response return response
ITEMS_PER_PAGE = "http://www.w3.org/ns/hydra/core#itemsPerPage" ITEMS_PER_PAGE = 'http://www.w3.org/ns/hydra/core#itemsPerPage'
TOTAL_ITEMS = "http://www.w3.org/ns/hydra/core#totalItems" TOTAL_ITEMS = 'http://www.w3.org/ns/hydra/core#totalItems'
def fmt_params(subject, predicate, object): def fmt_params(subject, predicate, object):
derp = [x for x in [subject, predicate, object] if x] derp = [x for x in [subject, predicate, object] if x]
@ -55,9 +55,8 @@ def fmt_params(subject, predicate, object):
return params return params
def get_triples_count(subject = None, predicate = None, object = None): def get_triples_count(subject = None, predicate = None, object = None):
''' """Fetches first page in order to determine amount of items.
Fetches first page in order to determine amount of items. """
'''
params = fmt_params(subject, predicate, object) params = fmt_params(subject, predicate, object)
url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url
response = fetch_by_url(url, headers = {'accept': 'application/ld+json'}) response = fetch_by_url(url, headers = {'accept': 'application/ld+json'})

View File

@ -1,10 +1,9 @@
import requests_cache
import wikidata.client import wikidata.client
import datagraph.schemeld import datagraph.schemeld
import datagraph.wikidata_ext import datagraph.wikidata_ext
import requests_cache
datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing') datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing')
def test_version(): def test_version():
@ -14,12 +13,12 @@ def test_get_triples():
client = wikidata.client.Client() client = wikidata.client.Client()
EQV_PROPERTY = client.get('P1628') EQV_PROPERTY = client.get('P1628')
schema_root = "https://schema.org/" schema_root = 'https://schema.org/'
schema_prop = "image" schema_prop = 'image'
triples_iter = datagraph.wikidata_ext.get_triples( triples_iter = datagraph.wikidata_ext.get_triples(
client = client, client = client,
predicate = EQV_PROPERTY, predicate = EQV_PROPERTY,
object = "{}{}".format(schema_root, schema_prop), object = f'{schema_root}{schema_prop}',
) )
assert triples_iter is not None assert triples_iter is not None