diff --git a/datagraph/parse.py b/datagraph/parse.py index ba767c6..b0fc0c0 100644 --- a/datagraph/parse.py +++ b/datagraph/parse.py @@ -1,17 +1,23 @@ import json -import urllib +import urllib.parse import schemeld -def determine_concepts_internal(json, context, outputs): +def parse_url(url: str | urllib.parse.ParseResult) -> urllib.parse.ParseResult: + if isinstance(url, urllib.parse.ParseResult): + return url + return urllib.parse.urlparse(url) + + +def determine_concepts_internal(json: dict | list, context, outputs: list[schemeld.Concept]) -> None: if isinstance(json, list): for m in json: determine_concepts_internal(m, context, outputs) return assert isinstance(json, dict), type(json) - context = urllib.parse.urlparse(json.get('@context', context)) + context = parse_url(json.get('@context', context)) assert context.netloc == 'schema.org' if '@graph' in json: @@ -20,13 +26,13 @@ def determine_concepts_internal(json, context, outputs): outputs.append(schemeld.Concept(context, json)) -def determine_concepts(json): +def determine_concepts(json: dict | list) -> list[schemeld.Concept]: concepts = [] determine_concepts_internal(json, '', concepts) return concepts -def determine_concepts_in_soup(soup): +def determine_concepts_in_soup(soup) -> list[schemeld.Concept]: # TODO: Check type ld_json_elements = soup.find_all('script', type='application/ld+json') concepts = [] diff --git a/datagraph/schemeld.py b/datagraph/schemeld.py index c51a6cb..fb4e2db 100644 --- a/datagraph/schemeld.py +++ b/datagraph/schemeld.py @@ -5,7 +5,7 @@ from typing import Any STRICT_VALIDATION = True Key = int | str | urllib.parse.ParseResult -Context = str # TODO +Context = urllib.parse.ParseResult # TODO def canonical_keys(base_key: Key, context: Context | None) -> list[Any]: