1
0

More typing

This commit is contained in:
Jon Michael Aanes 2024-05-12 16:34:47 +02:00
parent f73ba5ccc2
commit 1aa41a8414
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA

View File

@ -118,7 +118,7 @@ class WikidataInfo(object):
nickname_version_of: Optional[SocialSiteId] = None
WIKIDATA_PROPERTIES = {
WIKIDATA_PROPERTIES: dict[SocialSiteId, WikidataInfo] = {
SocialSiteId.EMAIL: WikidataInfo(968, None),
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
@ -184,8 +184,7 @@ WIKIDATA_PROPERTIES = {
}
def re_social_subdomain(main_domain):
# return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'\/?$'
def re_social_subdomain(main_domain: str) -> str:
return r'^(?:https?:\/\/)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
@ -194,23 +193,23 @@ RE_DUAL_ID = r'@?([^/]+/[^/]+)'
RE_ANY_SUBPATH = r'(|\/|\/.*)$'
def re_social_path(main_domain):
# return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
def re_social_path(main_domain: str) -> str:
return re_social_path_adv(main_domain, RE_ID)
def re_social_path_adv(main_domain, *path):
assert not main_domain.startswith('www.'), 'Redundant www.'
l = [r'^', r'(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
def re_social_path_adv(main_domain: str, *path: str) -> str:
if main_domain.startswith('www.'):
msg = f'Redundant www: {main_domain}'
raise ValueError(msg)
regex_builder: list[str] = [r'^', r'(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
for p in path:
if p != RE_ANY_SUBPATH:
l.append(r'\/')
l.append(p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p))
regex_builder.append(r'\/')
regex_builder.append(p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p))
if path[-1] != RE_ANY_SUBPATH:
l.append(r'\/?$')
regex = ''.join(l)
return regex
regex_builder.append(r'\/?$')
return ''.join(regex_builder)
MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'
@ -264,7 +263,7 @@ URL_PARSE_DANBOORU_ARTIST = re_social_path_adv('danbooru.donmai.us', 'artists',
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID)
REGEXES = [
REGEXES: list[tuple[str, SocialSiteId]] = [
# Reddit
(REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
(REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
@ -364,7 +363,7 @@ REGEXES = [
(re_social_subdomain('blogspot.com'), SocialSiteId.GOOGLE_BLOGGER_PAGE),
]
WELL_KNOWN_MASTODON_INSTANCES = frozenset(
WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
{
# Includes all servers with 50 000+ users as of 6 july 2023.
# based on https://mastodonservers.net/servers/top
@ -388,7 +387,7 @@ WELL_KNOWN_MASTODON_INSTANCES = frozenset(
)
def determine_social_from_url_internally(url: str):
def determine_social_from_url_internally(url: str) -> tuple[SocialSiteId | None, str | None]:
assert isinstance(url, str)
# Regexes
@ -405,15 +404,23 @@ def determine_social_from_url_internally(url: str):
return (SocialSiteId.MASTODON_PAGE, None)
# Feed (?)
elif 'feed' in url or 'xml' in url or 'rss' in url or 'atom' in url:
if 'feed' in url or 'xml' in url or 'rss' in url or 'atom' in url:
return (SocialSiteId.RSS_FEED, None)
return (None, None)
def determine_social_from_url(url):
def to_parse_result(url: str | urllib.parse.ParseResult) -> urllib.parse.ParseResult:
if isinstance(url, str):
url = urllib.parse.urlparse(url)
return urllib.parse.urlparse(url)
if isinstance(url, urllib.parse.ParseResult):
return url
# Throw error
msg = f'Expected {urllib.parse.ParseResult} or {str}'
raise TypeError(msg)
def determine_social_from_url(url_not_normalized: str | urllib.parse.ParseResult) -> SocialLink | None:
url = to_parse_result(url_not_normalized)
(social_site_id, social_id) = determine_social_from_url_internally(
url._replace(query='', fragment='').geturl()
)