diff --git a/datagraph/__init__.py b/datagraph/__init__.py index 8bbff5d..7ba4fbc 100644 --- a/datagraph/__init__.py +++ b/datagraph/__init__.py @@ -11,14 +11,10 @@ __all__ = [ 'wikidata_ext', ] -import sys import os.path +import sys sys.path.append(os.path.join(os.path.dirname(__file__))) +from . import format, parse, schemeld, wikidata_ext from ._version import __version__ - -from . import format -from . import parse -from . import schemeld -from . import wikidata_ext diff --git a/datagraph/format.py b/datagraph/format.py index 9fc6ea9..1dfe393 100644 --- a/datagraph/format.py +++ b/datagraph/format.py @@ -1,15 +1,17 @@ -import datagraph.schemeld -import urllib.parse -import wikidata.entity import datetime import logging +import urllib.parse + +import wikidata.entity + +import datagraph.schemeld REFERENCE_PROPERTIES = {'P813', 'P854', 'P248', 'P143', 'P813'} def fmt_value(c, prefer_reference = False): if isinstance(c, str): - return '"{}"'.format(c) # TODO: Escape + return f'"{c}"' # TODO: Escape elif isinstance(c, datagraph.schemeld.Concept): if '@id' in c: return fmt_value(c['@id'], prefer_reference) @@ -19,16 +21,16 @@ def fmt_value(c, prefer_reference = False): elif isinstance(c, wikidata.entity.Entity): s = c.id if isinstance(s, int): - s = 'P{}'.format(s) + s = f'P{s}' if s in REFERENCE_PROPERTIES: s = s.replace('P', 'S', 1) return s elif isinstance(c, urllib.parse.ParseResult): return c.geturl() if prefer_reference else fmt_value(c.geturl(), prefer_reference) elif isinstance(c, datetime.datetime): - return '+{}/11'.format(c.isoformat()) + return f'+{c.isoformat()}/11' elif isinstance(c, datetime.date): - return '+{}T00:00:00Z/11'.format(c.isoformat()) + return f'+{c.isoformat()}T00:00:00Z/11' return str(c) @@ -42,7 +44,7 @@ def fmt_predicate(pred, object): elif pred.path == '/description': return 'D'+lang elif pred.path == '/sameAs': - return 'S{}wiki'.format(lang) + return f'S{lang}wiki' else: assert False, pred return fmt_value(pred, prefer_reference = True) @@ -105,7 +107,7 @@ def to_quickstatements_v1(concepts): for concept in concepts: to_quickstatements_v1_item(concept, lines) - logging.info("Produced %s statements for %s concepts", len(lines), len(concepts)) + logging.info('Produced %s statements for %s concepts', len(lines), len(concepts)) commands = '\n'.join(['\t'.join(l) for l in lines]) assert '\tNone\t' not in commands, 'TODO' diff --git a/datagraph/parse.py b/datagraph/parse.py index ff54246..4b7084d 100644 --- a/datagraph/parse.py +++ b/datagraph/parse.py @@ -1,7 +1,9 @@ +import json +import urllib + import schemeld -import urllib -import json + def determine_concepts_internal(json, context, outputs): if isinstance(json, list): @@ -25,7 +27,7 @@ def determine_concepts(json): def determine_concepts_in_soup(soup): # TODO: Check type - ld_json_elements = soup.find_all('script', type="application/ld+json") + ld_json_elements = soup.find_all('script', type='application/ld+json') concepts = [] for e in ld_json_elements: json_data = json.loads(e.string) diff --git a/datagraph/schemeld.py b/datagraph/schemeld.py index 7172c8c..828da80 100644 --- a/datagraph/schemeld.py +++ b/datagraph/schemeld.py @@ -1,11 +1,5 @@ -import logging import urllib.parse -import wikidata.entity -import datetime -from dataclasses import dataclass -from typing import List, Set, Optional, Union -from enum import Enum STRICT_VALIDATION = True @@ -20,7 +14,7 @@ def canonical_keys(base_key, context): return [base_key] return [context._replace(path = base_key), base_key] -class Concept(object): +class Concept: def __init__(self, context, pairs): self.pairs = [] diff --git a/datagraph/wikidata_ext.py b/datagraph/wikidata_ext.py index 3f0c26c..b61e52c 100644 --- a/datagraph/wikidata_ext.py +++ b/datagraph/wikidata_ext.py @@ -1,27 +1,27 @@ +import logging +import urllib.parse + import ratelimit -import urllib.parse -import wikidata.entity import requests -import json -import logging +import wikidata.entity REQUEST_SESSION = None # TODO? def concept_uri(obj): assert isinstance(obj, wikidata.entity.Entity), obj if obj.id.startswith('P'): - return urllib.parse.urlparse('http://www.wikidata.org/prop/direct/{}'.format(obj.id)) + return urllib.parse.urlparse(f'http://www.wikidata.org/prop/direct/{obj.id}') elif obj.id.startswith('Q'): - return urllib.parse.urlparse('http://www.wikidata.org/entity/{}'.format(obj.id)) + return urllib.parse.urlparse(f'http://www.wikidata.org/entity/{obj.id}') else: - assert False, "TODO: " + ojb.id + assert False, 'TODO: ' + ojb.id def fmt_triple_value(obj, prefer_obj = False): if obj is None: return '' if isinstance(obj, str): - return '"{}"'.format(obj) + return f'"{obj}"' elif isinstance(obj, urllib.parse.ParseResult): return obj.geturl() if prefer_obj else fmt_triple_value(obj.geturl()) elif isinstance(obj, wikidata.entity.Entity): @@ -40,8 +40,8 @@ def fetch_by_url(url, headers): return None return response -ITEMS_PER_PAGE = "http://www.w3.org/ns/hydra/core#itemsPerPage" -TOTAL_ITEMS = "http://www.w3.org/ns/hydra/core#totalItems" +ITEMS_PER_PAGE = 'http://www.w3.org/ns/hydra/core#itemsPerPage' +TOTAL_ITEMS = 'http://www.w3.org/ns/hydra/core#totalItems' def fmt_params(subject, predicate, object): derp = [x for x in [subject, predicate, object] if x] @@ -55,9 +55,8 @@ def fmt_params(subject, predicate, object): return params def get_triples_count(subject = None, predicate = None, object = None): - ''' - Fetches first page in order to determine amount of items. - ''' + """Fetches first page in order to determine amount of items. + """ params = fmt_params(subject, predicate, object) url = requests.Request(url = 'https://query.wikidata.org/bigdata/ldf', params = params).prepare().url response = fetch_by_url(url, headers = {'accept': 'application/ld+json'}) diff --git a/test/test_datagraph.py b/test/test_datagraph.py index 48f67b5..f674688 100644 --- a/test/test_datagraph.py +++ b/test/test_datagraph.py @@ -1,10 +1,9 @@ +import requests_cache import wikidata.client import datagraph.schemeld import datagraph.wikidata_ext -import requests_cache - datagraph.wikidata_ext.REQUEST_SESSION = requests_cache.CachedSession('output/testing') def test_version(): @@ -14,12 +13,12 @@ def test_get_triples(): client = wikidata.client.Client() EQV_PROPERTY = client.get('P1628') - schema_root = "https://schema.org/" - schema_prop = "image" + schema_root = 'https://schema.org/' + schema_prop = 'image' triples_iter = datagraph.wikidata_ext.get_triples( client = client, predicate = EQV_PROPERTY, - object = "{}{}".format(schema_root, schema_prop), + object = f'{schema_root}{schema_prop}', ) assert triples_iter is not None