More precise tests, and better support for certain sites.
This commit is contained in:
parent
f4a71899e0
commit
eead04f620
|
@ -113,7 +113,7 @@ class WikidataInfo:
|
||||||
nickname_version_of: SocialSiteId | None = None
|
nickname_version_of: SocialSiteId | None = None
|
||||||
|
|
||||||
|
|
||||||
WIKIDATA_PROPERTIES: dict[SocialSiteId, WikidataInfo] = {
|
WIKIDATA_PROPERTIES: dict[SocialSiteId | int, WikidataInfo] = {
|
||||||
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
||||||
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
|
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
|
||||||
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
|
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
|
||||||
|
@ -207,7 +207,7 @@ def re_social_path_adv(main_domain: str, *path: str) -> str:
|
||||||
regex_builder: list[str] = [
|
regex_builder: list[str] = [
|
||||||
r'^',
|
r'^',
|
||||||
r'(?:https?:\/\/)?',
|
r'(?:https?:\/\/)?',
|
||||||
r'(?:www\.)?',
|
r'(?:www\.|m\.|mobile\.)?',
|
||||||
re.escape(main_domain),
|
re.escape(main_domain),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -228,8 +228,8 @@ REDDIT_SUBREDDIT_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/r\/([\w-]+)\/?$
|
||||||
REDDIT_USER_URL = (
|
REDDIT_USER_URL = (
|
||||||
r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/user\/([\w-]+)(?:|\/submitted)\/?$'
|
r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/user\/([\w-]+)(?:|\/submitted)\/?$'
|
||||||
)
|
)
|
||||||
TWITTER_HANDLE_URL_1 = re_social_path('twitter.com')
|
TWITTER_HANDLE_URL_1 = re_social_path_adv('twitter.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
TWITTER_HANDLE_URL_2 = re_social_path('x.com')
|
TWITTER_HANDLE_URL_2 = re_social_path_adv('x.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
LINKTREE_PAGE_URL = re_social_path('linktr.ee')
|
LINKTREE_PAGE_URL = re_social_path('linktr.ee')
|
||||||
TWITCH_STREAM_URL = re_social_path('twitch.tv')
|
TWITCH_STREAM_URL = re_social_path('twitch.tv')
|
||||||
WIKIDATA_ITEM_URL = re_social_path_adv('wikidata.org', 'wiki', RE_ID)
|
WIKIDATA_ITEM_URL = re_social_path_adv('wikidata.org', 'wiki', RE_ID)
|
||||||
|
@ -242,7 +242,8 @@ TUMBLR_PAGE_URL_3 = re_social_path('tumblr.com/blog')
|
||||||
TUMBLR_PAGE_URL_4 = re_social_path('tumblr.com/blog/view')
|
TUMBLR_PAGE_URL_4 = re_social_path('tumblr.com/blog/view')
|
||||||
INSTAGRAM_URL = re_social_path('instagram.com')
|
INSTAGRAM_URL = re_social_path('instagram.com')
|
||||||
PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH)
|
PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
ARTSTATION_URL = re_social_path('artstation.com')
|
ARTSTATION_URL_1 = re_social_path_adv('artstation.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
|
ARTSTATION_URL_2 = re_social_subdomain('artstation.com')
|
||||||
INPRNT_URL = re_social_path_adv('inprnt.com', 'gallery', RE_ID)
|
INPRNT_URL = re_social_path_adv('inprnt.com', 'gallery', RE_ID)
|
||||||
FACEBOOK_PAGE_URL = re_social_path('facebook.com')
|
FACEBOOK_PAGE_URL = re_social_path('facebook.com')
|
||||||
SUBSTACK_PREFIX_URL = re_social_subdomain('substack.com')
|
SUBSTACK_PREFIX_URL = re_social_subdomain('substack.com')
|
||||||
|
@ -258,7 +259,9 @@ PIXIV_USER_NICKNAME_URL = re_social_path_adv('pixiv.net', 'stacc', RE_ID)
|
||||||
PIXIV_SKETCH_USER_NICKNAME_URL = re_social_path_adv('sketch.pixiv.net', RE_ID)
|
PIXIV_SKETCH_USER_NICKNAME_URL = re_social_path_adv('sketch.pixiv.net', RE_ID)
|
||||||
|
|
||||||
URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv('youtube.com', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv(
|
||||||
|
'youtube.com', RE_ID, RE_ANY_SUBPATH
|
||||||
|
)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2 = re_social_path_adv('youtube.com', 'c', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2 = re_social_path_adv('youtube.com', 'c', RE_ID)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_ID = re_social_path_adv('youtube.com', 'channel', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_ID = re_social_path_adv('youtube.com', 'channel', RE_ID)
|
||||||
URL_PARSE_VIMEO_CHANNEL = re_social_path_adv('vimeo.com', RE_ID)
|
URL_PARSE_VIMEO_CHANNEL = re_social_path_adv('vimeo.com', RE_ID)
|
||||||
|
@ -281,7 +284,7 @@ URL_PARSE_DANBOORU_ARTIST = re_social_path_adv('danbooru.donmai.us', 'artists',
|
||||||
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
|
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
|
||||||
URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID)
|
URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID)
|
||||||
|
|
||||||
REGEXES: list[tuple[str, SocialSiteId]] = [
|
REGEXES: list[tuple[str, object]] = [
|
||||||
# Reddit
|
# Reddit
|
||||||
(REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
|
(REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
|
||||||
(REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
|
(REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
|
||||||
|
@ -316,7 +319,8 @@ REGEXES: list[tuple[str, SocialSiteId]] = [
|
||||||
# Patreon
|
# Patreon
|
||||||
(PATREON_URL, SocialSiteId.PATREON_PAGE),
|
(PATREON_URL, SocialSiteId.PATREON_PAGE),
|
||||||
# Artstation
|
# Artstation
|
||||||
(ARTSTATION_URL, SocialSiteId.ARTSTATION_PAGE),
|
(ARTSTATION_URL_1, SocialSiteId.ARTSTATION_PAGE),
|
||||||
|
(ARTSTATION_URL_2, SocialSiteId.ARTSTATION_PAGE),
|
||||||
# Inprnt
|
# Inprnt
|
||||||
(INPRNT_URL, SocialSiteId.INPRNT_PAGE),
|
(INPRNT_URL, SocialSiteId.INPRNT_PAGE),
|
||||||
# Email
|
# Email
|
||||||
|
@ -371,7 +375,10 @@ REGEXES: list[tuple[str, SocialSiteId]] = [
|
||||||
SocialSiteId.STEAM_APPLICATION_ID,
|
SocialSiteId.STEAM_APPLICATION_ID,
|
||||||
),
|
),
|
||||||
# Github
|
# Github
|
||||||
(re_social_path_adv('github.com', RE_DUAL_ID), SocialSiteId.GITHUB_REPOSITORY),
|
(
|
||||||
|
re_social_path_adv('github.com', RE_DUAL_ID, RE_ANY_SUBPATH),
|
||||||
|
SocialSiteId.GITHUB_REPOSITORY,
|
||||||
|
),
|
||||||
# Plurk
|
# Plurk
|
||||||
(re_social_path_adv('plurk.com', RE_ID), SocialSiteId.PLURK),
|
(re_social_path_adv('plurk.com', RE_ID), SocialSiteId.PLURK),
|
||||||
# Linked in
|
# Linked in
|
||||||
|
|
|
@ -2,7 +2,7 @@ import pytest
|
||||||
|
|
||||||
from socials_util import *
|
from socials_util import *
|
||||||
|
|
||||||
PARSABLE_SOCIAL_IDS_COMBINED = [
|
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
|
||||||
# Tumblr formats
|
# Tumblr formats
|
||||||
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
@ -19,7 +19,8 @@ PARSABLE_SOCIAL_IDS_COMBINED = [
|
||||||
SocialSiteId.TUMBLR,
|
SocialSiteId.TUMBLR,
|
||||||
'triviallytrue',
|
'triviallytrue',
|
||||||
),
|
),
|
||||||
|
('http://worstdril.tumblr.com/', SocialSiteId.TUMBLR, 'worstdril'),
|
||||||
|
('https://deep-dark-fears.tumblr.com', SocialSiteId.TUMBLR, 'deep-dark-fears'),
|
||||||
# Cohost formats
|
# Cohost formats
|
||||||
(
|
(
|
||||||
'https://cohost.org/andrewelmore?page=0',
|
'https://cohost.org/andrewelmore?page=0',
|
||||||
|
@ -31,58 +32,161 @@ PARSABLE_SOCIAL_IDS_COMBINED = [
|
||||||
SocialSiteId.COHOST_PROFILE,
|
SocialSiteId.COHOST_PROFILE,
|
||||||
'andrewelmore',
|
'andrewelmore',
|
||||||
),
|
),
|
||||||
|
|
||||||
# Reddit formats
|
# Reddit formats
|
||||||
('https://old.reddit.com/user/Harpsibored/submitted/',
|
(
|
||||||
SocialSiteId.REDDIT_USER ,
|
'https://old.reddit.com/user/Harpsibored/submitted/',
|
||||||
|
SocialSiteId.REDDIT_USER,
|
||||||
'Harpsibored',
|
'Harpsibored',
|
||||||
),
|
),
|
||||||
('https://old.reddit.com/user/Harpsibored/submitted',
|
(
|
||||||
SocialSiteId.REDDIT_USER ,
|
'https://old.reddit.com/user/Harpsibored/submitted',
|
||||||
|
SocialSiteId.REDDIT_USER,
|
||||||
'Harpsibored',
|
'Harpsibored',
|
||||||
),
|
),
|
||||||
('https://old.reddit.com/user/Harpsibored/',
|
(
|
||||||
SocialSiteId.REDDIT_USER ,
|
'https://old.reddit.com/user/Harpsibored/',
|
||||||
|
SocialSiteId.REDDIT_USER,
|
||||||
'Harpsibored',
|
'Harpsibored',
|
||||||
),
|
),
|
||||||
('https://old.reddit.com/user/Harpsibored',
|
(
|
||||||
SocialSiteId.REDDIT_USER ,
|
'https://old.reddit.com/user/Harpsibored',
|
||||||
|
SocialSiteId.REDDIT_USER,
|
||||||
'Harpsibored',
|
'Harpsibored',
|
||||||
),
|
),
|
||||||
|
# Ko-fi formats
|
||||||
|
('https://ko-fi.com/A627LI1/shop/', SocialSiteId.KO_FI, 'A627LI1'),
|
||||||
|
('https://ko-fi.com/A627LI1/shop', SocialSiteId.KO_FI, 'A627LI1'),
|
||||||
|
('https://ko-fi.com/A627LI1/', SocialSiteId.KO_FI, 'A627LI1'),
|
||||||
|
('https://ko-fi.com/A627LI1', SocialSiteId.KO_FI, 'A627LI1'),
|
||||||
|
# Twitter formats
|
||||||
|
('http://twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://www.twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://www.x.com/dril', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://x.com/dril', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://www.twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://www.x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
('http://x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
|
||||||
|
# Wikidata formats
|
||||||
|
('https://wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
|
||||||
|
('https://m.wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
|
||||||
|
# YouTube formats
|
||||||
|
(
|
||||||
|
'https://youtube.com/@WheelieYellow',
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
|
||||||
|
'WheelieYellow',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://youtube.com/@WheelieYellow/',
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
|
||||||
|
'WheelieYellow',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/@WheelieYellow',
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
|
||||||
|
'WheelieYellow',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/@WheelieYellow/',
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
|
||||||
|
'WheelieYellow',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/@WheelieYellow/featured',
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
|
||||||
|
'WheelieYellow',
|
||||||
|
),
|
||||||
|
# GitHub
|
||||||
|
('https://github.com/love2d/love', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
|
||||||
|
('https://github.com/love2d/love/', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
|
||||||
|
(
|
||||||
|
'https://github.com/love2d/love/releases',
|
||||||
|
SocialSiteId.GITHUB_REPOSITORY,
|
||||||
|
'love2d/love',
|
||||||
|
),
|
||||||
|
# ArtStation
|
||||||
|
('https://toraji.artstation.com', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
|
||||||
|
('https://www.artstation.com/toraji', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
|
||||||
|
(
|
||||||
|
'https://www.artstation.com/toraji/profile',
|
||||||
|
SocialSiteId.ARTSTATION_PAGE,
|
||||||
|
'toraji',
|
||||||
|
),
|
||||||
|
# Tiktok
|
||||||
|
(
|
||||||
|
'https://tiktok.com/@depthsofwikipedia',
|
||||||
|
SocialSiteId.TIKTOK_USER,
|
||||||
|
'depthsofwikipedia',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.tiktok.com/@depthsofwikipedia',
|
||||||
|
SocialSiteId.TIKTOK_USER,
|
||||||
|
'depthsofwikipedia',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.tiktok.com/@depthsofwikipedia?lang=en',
|
||||||
|
SocialSiteId.TIKTOK_USER,
|
||||||
|
'depthsofwikipedia',
|
||||||
|
),
|
||||||
|
# Instagram
|
||||||
|
(
|
||||||
|
'https://instagram.com/_richardparry_',
|
||||||
|
SocialSiteId.INSTAGRAM_PAGE,
|
||||||
|
'_richardparry_',
|
||||||
|
),
|
||||||
|
('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE, 'j_kmor'),
|
||||||
|
(
|
||||||
|
'https://instagram.com/cullensartbox/',
|
||||||
|
SocialSiteId.INSTAGRAM_PAGE,
|
||||||
|
'cullensartbox',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://www.instagram.com/timkongart/',
|
||||||
|
SocialSiteId.INSTAGRAM_PAGE,
|
||||||
|
'timkongart',
|
||||||
|
),
|
||||||
|
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE, 'kcn.wu'),
|
||||||
|
(
|
||||||
|
'https://www.instagram.com/itsbettyjiang',
|
||||||
|
SocialSiteId.INSTAGRAM_PAGE,
|
||||||
|
'itsbettyjiang',
|
||||||
|
),
|
||||||
|
# Facebook
|
||||||
|
(
|
||||||
|
'https://www.facebook.com/fredagscafeen.dk/',
|
||||||
|
SocialSiteId.FACEBOOK_PAGE,
|
||||||
|
'fredagscafeen.dk',
|
||||||
|
),
|
||||||
|
# Pixiv
|
||||||
|
('https://www.pixiv.net/users/14866303', SocialSiteId.PIXIV_USER_ID, '14866303'),
|
||||||
|
(
|
||||||
|
'https://www.pixiv.net/member.php?id=109710',
|
||||||
|
SocialSiteId.PIXIV_USER_ID,
|
||||||
|
'109710',
|
||||||
|
),
|
||||||
|
# Etsy
|
||||||
|
(
|
||||||
|
'https://www.etsy.com/shop/aleksiremesart',
|
||||||
|
SocialSiteId.ETSY_SHOP,
|
||||||
|
'aleksiremesart',
|
||||||
|
),
|
||||||
|
# Deviantart
|
||||||
|
(
|
||||||
|
'https://www.deviantart.com/solquiet',
|
||||||
|
SocialSiteId.DEVIANT_ART_ACCOUNT,
|
||||||
|
'solquiet',
|
||||||
|
),
|
||||||
|
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
|
||||||
]
|
]
|
||||||
|
|
||||||
PARSABLE_SOCIAL_IDS = [
|
|
||||||
('http://www.twitter.com/dril', 'dril'),
|
|
||||||
('http://worstdril.tumblr.com/', 'worstdril'),
|
|
||||||
('https://deep-dark-fears.tumblr.com', 'deep-dark-fears'),
|
|
||||||
('https://www.etsy.com/shop/aleksiremesart', 'aleksiremesart'),
|
|
||||||
('https://ko-fi.com/A627LI1/shop', 'A627LI1'),
|
|
||||||
('https://ko-fi.com/A627LI1/', 'A627LI1'),
|
|
||||||
('https://www.facebook.com/fredagscafeen.dk/', 'fredagscafeen.dk'),
|
|
||||||
('https://www.tiktok.com/@depthsofwikipedia?lang=en', 'depthsofwikipedia'),
|
|
||||||
('https://www.pixiv.net/users/14866303', '14866303'),
|
|
||||||
('https://www.pixiv.net/member.php?id=109710', '109710'),
|
|
||||||
] + [(a, c) for (a, b, c) in PARSABLE_SOCIAL_IDS_COMBINED]
|
|
||||||
|
|
||||||
PARSABLE_SOCIAL_SITE_IDS = [
|
@pytest.mark.parametrize(
|
||||||
('https://www.deviantart.com/solquiet', SocialSiteId.DEVIANT_ART_ACCOUNT),
|
'url,expected_social_site_id,expected_social_id', PARSABLE_SOCIAL_IDS_COMBINED
|
||||||
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT),
|
)
|
||||||
('https://instagram.com/_richardparry_', SocialSiteId.INSTAGRAM_PAGE),
|
def test_parse_social_ids(url, expected_social_site_id, expected_social_id):
|
||||||
('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE),
|
social_link: SocialLink | None = determine_social_from_url(url)
|
||||||
('https://instagram.com/cullensartbox/', SocialSiteId.INSTAGRAM_PAGE),
|
assert social_link is not None, url
|
||||||
('https://www.instagram.com/timkongart/', SocialSiteId.INSTAGRAM_PAGE),
|
assert (social_link.social_id, social_link.social_site_id) == (
|
||||||
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE),
|
expected_social_id,
|
||||||
('https://www.instagram.com/itsbettyjiang', SocialSiteId.INSTAGRAM_PAGE),
|
expected_social_site_id,
|
||||||
] + [(a, b) for (a, b, c) in PARSABLE_SOCIAL_IDS_COMBINED]
|
), url
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('url,expected_social_id', PARSABLE_SOCIAL_IDS)
|
|
||||||
def test_parse_social_ids(url, expected_social_id):
|
|
||||||
social_link = determine_social_from_url(url)
|
|
||||||
assert social_link.social_id == expected_social_id, url
|
|
||||||
assert social_link.social_site_id is not None, url
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('url,expected_social_site_id', PARSABLE_SOCIAL_SITE_IDS)
|
|
||||||
def test_parse_social_site_ids(url, expected_social_site_id):
|
|
||||||
assert determine_social_from_url(url).social_site_id == expected_social_site_id, url
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user