More typing
This commit is contained in:
parent
f73ba5ccc2
commit
1aa41a8414
|
@ -118,7 +118,7 @@ class WikidataInfo(object):
|
||||||
nickname_version_of: Optional[SocialSiteId] = None
|
nickname_version_of: Optional[SocialSiteId] = None
|
||||||
|
|
||||||
|
|
||||||
WIKIDATA_PROPERTIES = {
|
WIKIDATA_PROPERTIES: dict[SocialSiteId, WikidataInfo] = {
|
||||||
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
||||||
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
|
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
|
||||||
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
|
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
|
||||||
|
@ -184,8 +184,7 @@ WIKIDATA_PROPERTIES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def re_social_subdomain(main_domain):
|
def re_social_subdomain(main_domain: str) -> str:
|
||||||
# return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'\/?$'
|
|
||||||
return r'^(?:https?:\/\/)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
|
return r'^(?:https?:\/\/)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,23 +193,23 @@ RE_DUAL_ID = r'@?([^/]+/[^/]+)'
|
||||||
RE_ANY_SUBPATH = r'(|\/|\/.*)$'
|
RE_ANY_SUBPATH = r'(|\/|\/.*)$'
|
||||||
|
|
||||||
|
|
||||||
def re_social_path(main_domain):
|
def re_social_path(main_domain: str) -> str:
|
||||||
# return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
|
|
||||||
return re_social_path_adv(main_domain, RE_ID)
|
return re_social_path_adv(main_domain, RE_ID)
|
||||||
|
|
||||||
|
|
||||||
def re_social_path_adv(main_domain, *path):
|
def re_social_path_adv(main_domain: str, *path: str) -> str:
|
||||||
assert not main_domain.startswith('www.'), 'Redundant www.'
|
if main_domain.startswith('www.'):
|
||||||
l = [r'^', r'(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
msg = f'Redundant www: {main_domain}'
|
||||||
|
raise ValueError(msg)
|
||||||
|
regex_builder: list[str] = [r'^', r'(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
||||||
|
|
||||||
for p in path:
|
for p in path:
|
||||||
if p != RE_ANY_SUBPATH:
|
if p != RE_ANY_SUBPATH:
|
||||||
l.append(r'\/')
|
regex_builder.append(r'\/')
|
||||||
l.append(p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p))
|
regex_builder.append(p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p))
|
||||||
if path[-1] != RE_ANY_SUBPATH:
|
if path[-1] != RE_ANY_SUBPATH:
|
||||||
l.append(r'\/?$')
|
regex_builder.append(r'\/?$')
|
||||||
regex = ''.join(l)
|
return ''.join(regex_builder)
|
||||||
return regex
|
|
||||||
|
|
||||||
|
|
||||||
MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'
|
MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'
|
||||||
|
@ -264,7 +263,7 @@ URL_PARSE_DANBOORU_ARTIST = re_social_path_adv('danbooru.donmai.us', 'artists',
|
||||||
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
|
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
|
||||||
URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID)
|
URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID)
|
||||||
|
|
||||||
REGEXES = [
|
REGEXES: list[tuple[str, SocialSiteId]] = [
|
||||||
# Reddit
|
# Reddit
|
||||||
(REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
|
(REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
|
||||||
(REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
|
(REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
|
||||||
|
@ -364,7 +363,7 @@ REGEXES = [
|
||||||
(re_social_subdomain('blogspot.com'), SocialSiteId.GOOGLE_BLOGGER_PAGE),
|
(re_social_subdomain('blogspot.com'), SocialSiteId.GOOGLE_BLOGGER_PAGE),
|
||||||
]
|
]
|
||||||
|
|
||||||
WELL_KNOWN_MASTODON_INSTANCES = frozenset(
|
WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
|
||||||
{
|
{
|
||||||
# Includes all servers with 50 000+ users as of 6 july 2023.
|
# Includes all servers with 50 000+ users as of 6 july 2023.
|
||||||
# based on https://mastodonservers.net/servers/top
|
# based on https://mastodonservers.net/servers/top
|
||||||
|
@ -388,7 +387,7 @@ WELL_KNOWN_MASTODON_INSTANCES = frozenset(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def determine_social_from_url_internally(url: str):
|
def determine_social_from_url_internally(url: str) -> tuple[SocialSiteId | None, str | None]:
|
||||||
assert isinstance(url, str)
|
assert isinstance(url, str)
|
||||||
|
|
||||||
# Regexes
|
# Regexes
|
||||||
|
@ -405,15 +404,23 @@ def determine_social_from_url_internally(url: str):
|
||||||
return (SocialSiteId.MASTODON_PAGE, None)
|
return (SocialSiteId.MASTODON_PAGE, None)
|
||||||
|
|
||||||
# Feed (?)
|
# Feed (?)
|
||||||
elif 'feed' in url or 'xml' in url or 'rss' in url or 'atom' in url:
|
if 'feed' in url or 'xml' in url or 'rss' in url or 'atom' in url:
|
||||||
return (SocialSiteId.RSS_FEED, None)
|
return (SocialSiteId.RSS_FEED, None)
|
||||||
|
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|
||||||
|
def to_parse_result(url: str | urllib.parse.ParseResult) -> urllib.parse.ParseResult:
|
||||||
def determine_social_from_url(url):
|
|
||||||
if isinstance(url, str):
|
if isinstance(url, str):
|
||||||
url = urllib.parse.urlparse(url)
|
return urllib.parse.urlparse(url)
|
||||||
|
if isinstance(url, urllib.parse.ParseResult):
|
||||||
|
return url
|
||||||
|
|
||||||
|
# Throw error
|
||||||
|
msg = f'Expected {urllib.parse.ParseResult} or {str}'
|
||||||
|
raise TypeError(msg)
|
||||||
|
|
||||||
|
def determine_social_from_url(url_not_normalized: str | urllib.parse.ParseResult) -> SocialLink | None:
|
||||||
|
url = to_parse_result(url_not_normalized)
|
||||||
(social_site_id, social_id) = determine_social_from_url_internally(
|
(social_site_id, social_id) = determine_social_from_url_internally(
|
||||||
url._replace(query='', fragment='').geturl()
|
url._replace(query='', fragment='').geturl()
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user