This commit is contained in:
parent
6e6e5e63be
commit
c067f1c9ae
|
@ -238,6 +238,8 @@ SONGKICK_ARTIST_URL = (
|
||||||
)
|
)
|
||||||
TUMBLR_PAGE_URL = re_social_path('tumblr.com')
|
TUMBLR_PAGE_URL = re_social_path('tumblr.com')
|
||||||
TUMBLR_PAGE_URL_2 = re_social_subdomain('tumblr.com')
|
TUMBLR_PAGE_URL_2 = re_social_subdomain('tumblr.com')
|
||||||
|
TUMBLR_PAGE_URL_3 = re_social_path('tumblr.com/blog')
|
||||||
|
TUMBLR_PAGE_URL_4 = re_social_path('tumblr.com/blog/view')
|
||||||
INSTAGRAM_URL = re_social_path('instagram.com')
|
INSTAGRAM_URL = re_social_path('instagram.com')
|
||||||
PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH)
|
PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
ARTSTATION_URL = re_social_path('artstation.com')
|
ARTSTATION_URL = re_social_path('artstation.com')
|
||||||
|
@ -299,6 +301,8 @@ REGEXES: list[tuple[str, SocialSiteId]] = [
|
||||||
# Tumblr
|
# Tumblr
|
||||||
(TUMBLR_PAGE_URL, SocialSiteId.TUMBLR),
|
(TUMBLR_PAGE_URL, SocialSiteId.TUMBLR),
|
||||||
(TUMBLR_PAGE_URL_2, SocialSiteId.TUMBLR),
|
(TUMBLR_PAGE_URL_2, SocialSiteId.TUMBLR),
|
||||||
|
(TUMBLR_PAGE_URL_3, SocialSiteId.TUMBLR),
|
||||||
|
(TUMBLR_PAGE_URL_4, SocialSiteId.TUMBLR),
|
||||||
# Instagram
|
# Instagram
|
||||||
(INSTAGRAM_URL, SocialSiteId.INSTAGRAM_PAGE),
|
(INSTAGRAM_URL, SocialSiteId.INSTAGRAM_PAGE),
|
||||||
# Tiktok
|
# Tiktok
|
||||||
|
@ -402,6 +406,7 @@ WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www'})
|
||||||
|
|
||||||
def determine_social_from_url_internally(
|
def determine_social_from_url_internally(
|
||||||
url: str,
|
url: str,
|
||||||
|
@ -414,7 +419,11 @@ def determine_social_from_url_internally(
|
||||||
for social_site_url_regex, social_site_id in REGEXES:
|
for social_site_url_regex, social_site_id in REGEXES:
|
||||||
if m := re.fullmatch(social_site_url_regex, url, re.I):
|
if m := re.fullmatch(social_site_url_regex, url, re.I):
|
||||||
groups = m.groups()
|
groups = m.groups()
|
||||||
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
username_or_id = groups[0] if len(groups) > 0 else None
|
||||||
|
if username_or_id in DISALLOWED_IDENTIFIERS:
|
||||||
|
continue
|
||||||
|
return (social_site_id, username_or_id)
|
||||||
|
del social_site_url_regex, social_site_id, m
|
||||||
|
|
||||||
# Mastodon
|
# Mastodon
|
||||||
for mastodon_hostname in WELL_KNOWN_MASTODON_INSTANCES:
|
for mastodon_hostname in WELL_KNOWN_MASTODON_INSTANCES:
|
||||||
|
|
|
@ -2,6 +2,17 @@ import pytest
|
||||||
|
|
||||||
from socials_util import *
|
from socials_util import *
|
||||||
|
|
||||||
|
PARSABLE_SOCIAL_IDS_COMBINED = [
|
||||||
|
# Tumblr formats
|
||||||
|
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://tumblr.com/blog/view/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://www.tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://www.tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://www.tumblr.com/blog/view/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
]
|
||||||
|
|
||||||
PARSABLE_SOCIAL_IDS = [
|
PARSABLE_SOCIAL_IDS = [
|
||||||
('http://www.twitter.com/dril', 'dril'),
|
('http://www.twitter.com/dril', 'dril'),
|
||||||
('http://worstdril.tumblr.com/', 'worstdril'),
|
('http://worstdril.tumblr.com/', 'worstdril'),
|
||||||
|
@ -13,7 +24,7 @@ PARSABLE_SOCIAL_IDS = [
|
||||||
('https://www.tiktok.com/@depthsofwikipedia?lang=en', 'depthsofwikipedia'),
|
('https://www.tiktok.com/@depthsofwikipedia?lang=en', 'depthsofwikipedia'),
|
||||||
('https://www.pixiv.net/users/14866303', '14866303'),
|
('https://www.pixiv.net/users/14866303', '14866303'),
|
||||||
('https://www.pixiv.net/member.php?id=109710', '109710'),
|
('https://www.pixiv.net/member.php?id=109710', '109710'),
|
||||||
]
|
] + [(a,c) for (a,b,c) in PARSABLE_SOCIAL_IDS_COMBINED]
|
||||||
|
|
||||||
PARSABLE_SOCIAL_SITE_IDS = [
|
PARSABLE_SOCIAL_SITE_IDS = [
|
||||||
('https://www.deviantart.com/solquiet', SocialSiteId.DEVIANT_ART_ACCOUNT),
|
('https://www.deviantart.com/solquiet', SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||||
|
@ -25,16 +36,15 @@ PARSABLE_SOCIAL_SITE_IDS = [
|
||||||
('https://www.instagram.com/timkongart/', SocialSiteId.INSTAGRAM_PAGE),
|
('https://www.instagram.com/timkongart/', SocialSiteId.INSTAGRAM_PAGE),
|
||||||
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE),
|
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE),
|
||||||
('https://www.instagram.com/itsbettyjiang', SocialSiteId.INSTAGRAM_PAGE),
|
('https://www.instagram.com/itsbettyjiang', SocialSiteId.INSTAGRAM_PAGE),
|
||||||
]
|
] + [(a,b) for (a,b,c) in PARSABLE_SOCIAL_IDS_COMBINED]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('url,expected_social_id', PARSABLE_SOCIAL_IDS)
|
@pytest.mark.parametrize('url,expected_social_id', PARSABLE_SOCIAL_IDS)
|
||||||
def test_parse_social_ids(url, expected_social_id):
|
def test_parse_social_ids(url, expected_social_id):
|
||||||
social_link = determine_social_from_url(url)
|
social_link = determine_social_from_url(url)
|
||||||
assert social_link.social_id == expected_social_id
|
assert social_link.social_id == expected_social_id, url
|
||||||
assert social_link.social_site_id is not None
|
assert social_link.social_site_id is not None, url
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('url,expected_social_site_id', PARSABLE_SOCIAL_SITE_IDS)
|
@pytest.mark.parametrize('url,expected_social_site_id', PARSABLE_SOCIAL_SITE_IDS)
|
||||||
def test_parse_social_site_ids(url, expected_social_site_id):
|
def test_parse_social_site_ids(url, expected_social_site_id):
|
||||||
assert determine_social_from_url(url).social_site_id == expected_social_site_id
|
assert determine_social_from_url(url).social_site_id == expected_social_site_id, url
|
||||||
|
|
Loading…
Reference in New Issue
Block a user