diff --git a/socials_util/__init__.py b/socials_util/__init__.py index 52f92e8..c15b7ac 100644 --- a/socials_util/__init__.py +++ b/socials_util/__init__.py @@ -113,7 +113,7 @@ class WikidataInfo: nickname_version_of: SocialSiteId | None = None -WIKIDATA_PROPERTIES: dict[SocialSiteId, WikidataInfo] = { +WIKIDATA_PROPERTIES: dict[SocialSiteId | int, WikidataInfo] = { SocialSiteId.EMAIL: WikidataInfo(968, None), SocialSiteId.RSS_FEED: WikidataInfo(1079, None), SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None), @@ -207,7 +207,7 @@ def re_social_path_adv(main_domain: str, *path: str) -> str: regex_builder: list[str] = [ r'^', r'(?:https?:\/\/)?', - r'(?:www\.)?', + r'(?:www\.|m\.|mobile\.)?', re.escape(main_domain), ] @@ -228,8 +228,8 @@ REDDIT_SUBREDDIT_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/r\/([\w-]+)\/?$ REDDIT_USER_URL = ( r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/user\/([\w-]+)(?:|\/submitted)\/?$' ) -TWITTER_HANDLE_URL_1 = re_social_path('twitter.com') -TWITTER_HANDLE_URL_2 = re_social_path('x.com') +TWITTER_HANDLE_URL_1 = re_social_path_adv('twitter.com', RE_ID, RE_ANY_SUBPATH) +TWITTER_HANDLE_URL_2 = re_social_path_adv('x.com', RE_ID, RE_ANY_SUBPATH) LINKTREE_PAGE_URL = re_social_path('linktr.ee') TWITCH_STREAM_URL = re_social_path('twitch.tv') WIKIDATA_ITEM_URL = re_social_path_adv('wikidata.org', 'wiki', RE_ID) @@ -242,7 +242,8 @@ TUMBLR_PAGE_URL_3 = re_social_path('tumblr.com/blog') TUMBLR_PAGE_URL_4 = re_social_path('tumblr.com/blog/view') INSTAGRAM_URL = re_social_path('instagram.com') PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH) -ARTSTATION_URL = re_social_path('artstation.com') +ARTSTATION_URL_1 = re_social_path_adv('artstation.com', RE_ID, RE_ANY_SUBPATH) +ARTSTATION_URL_2 = re_social_subdomain('artstation.com') INPRNT_URL = re_social_path_adv('inprnt.com', 'gallery', RE_ID) FACEBOOK_PAGE_URL = re_social_path('facebook.com') SUBSTACK_PREFIX_URL = re_social_subdomain('substack.com') @@ -258,7 +259,9 @@ PIXIV_USER_NICKNAME_URL = re_social_path_adv('pixiv.net', 'stacc', RE_ID) PIXIV_SKETCH_USER_NICKNAME_URL = re_social_path_adv('sketch.pixiv.net', RE_ID) URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co') -URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv('youtube.com', RE_ID) +URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv( + 'youtube.com', RE_ID, RE_ANY_SUBPATH +) URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2 = re_social_path_adv('youtube.com', 'c', RE_ID) URL_PARSE_YOUTUBE_CHANNEL_ID = re_social_path_adv('youtube.com', 'channel', RE_ID) URL_PARSE_VIMEO_CHANNEL = re_social_path_adv('vimeo.com', RE_ID) @@ -281,7 +284,7 @@ URL_PARSE_DANBOORU_ARTIST = re_social_path_adv('danbooru.donmai.us', 'artists', URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com') URL_PARSE_BLUESKY = re_social_path_adv('bsky.app', 'profile', RE_ID) -REGEXES: list[tuple[str, SocialSiteId]] = [ +REGEXES: list[tuple[str, object]] = [ # Reddit (REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT), (REDDIT_USER_URL, SocialSiteId.REDDIT_USER), @@ -316,7 +319,8 @@ REGEXES: list[tuple[str, SocialSiteId]] = [ # Patreon (PATREON_URL, SocialSiteId.PATREON_PAGE), # Artstation - (ARTSTATION_URL, SocialSiteId.ARTSTATION_PAGE), + (ARTSTATION_URL_1, SocialSiteId.ARTSTATION_PAGE), + (ARTSTATION_URL_2, SocialSiteId.ARTSTATION_PAGE), # Inprnt (INPRNT_URL, SocialSiteId.INPRNT_PAGE), # Email @@ -371,7 +375,10 @@ REGEXES: list[tuple[str, SocialSiteId]] = [ SocialSiteId.STEAM_APPLICATION_ID, ), # Github - (re_social_path_adv('github.com', RE_DUAL_ID), SocialSiteId.GITHUB_REPOSITORY), + ( + re_social_path_adv('github.com', RE_DUAL_ID, RE_ANY_SUBPATH), + SocialSiteId.GITHUB_REPOSITORY, + ), # Plurk (re_social_path_adv('plurk.com', RE_ID), SocialSiteId.PLURK), # Linked in diff --git a/test/test_parsing.py b/test/test_parsing.py index d63b939..6cd3f74 100644 --- a/test/test_parsing.py +++ b/test/test_parsing.py @@ -2,7 +2,7 @@ import pytest from socials_util import * -PARSABLE_SOCIAL_IDS_COMBINED = [ +PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [ # Tumblr formats ('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'), ('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'), @@ -19,7 +19,8 @@ PARSABLE_SOCIAL_IDS_COMBINED = [ SocialSiteId.TUMBLR, 'triviallytrue', ), - + ('http://worstdril.tumblr.com/', SocialSiteId.TUMBLR, 'worstdril'), + ('https://deep-dark-fears.tumblr.com', SocialSiteId.TUMBLR, 'deep-dark-fears'), # Cohost formats ( 'https://cohost.org/andrewelmore?page=0', @@ -31,58 +32,161 @@ PARSABLE_SOCIAL_IDS_COMBINED = [ SocialSiteId.COHOST_PROFILE, 'andrewelmore', ), - # Reddit formats - ('https://old.reddit.com/user/Harpsibored/submitted/', - SocialSiteId.REDDIT_USER , + ( + 'https://old.reddit.com/user/Harpsibored/submitted/', + SocialSiteId.REDDIT_USER, 'Harpsibored', ), - ('https://old.reddit.com/user/Harpsibored/submitted', - SocialSiteId.REDDIT_USER , + ( + 'https://old.reddit.com/user/Harpsibored/submitted', + SocialSiteId.REDDIT_USER, 'Harpsibored', ), - ('https://old.reddit.com/user/Harpsibored/', - SocialSiteId.REDDIT_USER , + ( + 'https://old.reddit.com/user/Harpsibored/', + SocialSiteId.REDDIT_USER, 'Harpsibored', ), - ('https://old.reddit.com/user/Harpsibored', - SocialSiteId.REDDIT_USER , + ( + 'https://old.reddit.com/user/Harpsibored', + SocialSiteId.REDDIT_USER, 'Harpsibored', ), + # Ko-fi formats + ('https://ko-fi.com/A627LI1/shop/', SocialSiteId.KO_FI, 'A627LI1'), + ('https://ko-fi.com/A627LI1/shop', SocialSiteId.KO_FI, 'A627LI1'), + ('https://ko-fi.com/A627LI1/', SocialSiteId.KO_FI, 'A627LI1'), + ('https://ko-fi.com/A627LI1', SocialSiteId.KO_FI, 'A627LI1'), + # Twitter formats + ('http://twitter.com/dril', SocialSiteId.TWITTER, 'dril'), + ('http://www.twitter.com/dril', SocialSiteId.TWITTER, 'dril'), + ('http://www.x.com/dril', SocialSiteId.TWITTER, 'dril'), + ('http://x.com/dril', SocialSiteId.TWITTER, 'dril'), + ('http://twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'), + ('http://www.twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'), + ('http://www.x.com/dril/media', SocialSiteId.TWITTER, 'dril'), + ('http://x.com/dril/media', SocialSiteId.TWITTER, 'dril'), + # Wikidata formats + ('https://wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'), + ('https://m.wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'), + # YouTube formats + ( + 'https://youtube.com/@WheelieYellow', + SocialSiteId.YOUTUBE_CHANNEL_HANDLE, + 'WheelieYellow', + ), + ( + 'https://youtube.com/@WheelieYellow/', + SocialSiteId.YOUTUBE_CHANNEL_HANDLE, + 'WheelieYellow', + ), + ( + 'https://www.youtube.com/@WheelieYellow', + SocialSiteId.YOUTUBE_CHANNEL_HANDLE, + 'WheelieYellow', + ), + ( + 'https://www.youtube.com/@WheelieYellow/', + SocialSiteId.YOUTUBE_CHANNEL_HANDLE, + 'WheelieYellow', + ), + ( + 'https://www.youtube.com/@WheelieYellow/featured', + SocialSiteId.YOUTUBE_CHANNEL_HANDLE, + 'WheelieYellow', + ), + # GitHub + ('https://github.com/love2d/love', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'), + ('https://github.com/love2d/love/', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'), + ( + 'https://github.com/love2d/love/releases', + SocialSiteId.GITHUB_REPOSITORY, + 'love2d/love', + ), + # ArtStation + ('https://toraji.artstation.com', SocialSiteId.ARTSTATION_PAGE, 'toraji'), + ('https://www.artstation.com/toraji', SocialSiteId.ARTSTATION_PAGE, 'toraji'), + ( + 'https://www.artstation.com/toraji/profile', + SocialSiteId.ARTSTATION_PAGE, + 'toraji', + ), + # Tiktok + ( + 'https://tiktok.com/@depthsofwikipedia', + SocialSiteId.TIKTOK_USER, + 'depthsofwikipedia', + ), + ( + 'https://www.tiktok.com/@depthsofwikipedia', + SocialSiteId.TIKTOK_USER, + 'depthsofwikipedia', + ), + ( + 'https://www.tiktok.com/@depthsofwikipedia?lang=en', + SocialSiteId.TIKTOK_USER, + 'depthsofwikipedia', + ), + # Instagram + ( + 'https://instagram.com/_richardparry_', + SocialSiteId.INSTAGRAM_PAGE, + '_richardparry_', + ), + ('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE, 'j_kmor'), + ( + 'https://instagram.com/cullensartbox/', + SocialSiteId.INSTAGRAM_PAGE, + 'cullensartbox', + ), + ( + 'https://www.instagram.com/timkongart/', + SocialSiteId.INSTAGRAM_PAGE, + 'timkongart', + ), + ('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE, 'kcn.wu'), + ( + 'https://www.instagram.com/itsbettyjiang', + SocialSiteId.INSTAGRAM_PAGE, + 'itsbettyjiang', + ), + # Facebook + ( + 'https://www.facebook.com/fredagscafeen.dk/', + SocialSiteId.FACEBOOK_PAGE, + 'fredagscafeen.dk', + ), + # Pixiv + ('https://www.pixiv.net/users/14866303', SocialSiteId.PIXIV_USER_ID, '14866303'), + ( + 'https://www.pixiv.net/member.php?id=109710', + SocialSiteId.PIXIV_USER_ID, + '109710', + ), + # Etsy + ( + 'https://www.etsy.com/shop/aleksiremesart', + SocialSiteId.ETSY_SHOP, + 'aleksiremesart', + ), + # Deviantart + ( + 'https://www.deviantart.com/solquiet', + SocialSiteId.DEVIANT_ART_ACCOUNT, + 'solquiet', + ), + ('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'), ] -PARSABLE_SOCIAL_IDS = [ - ('http://www.twitter.com/dril', 'dril'), - ('http://worstdril.tumblr.com/', 'worstdril'), - ('https://deep-dark-fears.tumblr.com', 'deep-dark-fears'), - ('https://www.etsy.com/shop/aleksiremesart', 'aleksiremesart'), - ('https://ko-fi.com/A627LI1/shop', 'A627LI1'), - ('https://ko-fi.com/A627LI1/', 'A627LI1'), - ('https://www.facebook.com/fredagscafeen.dk/', 'fredagscafeen.dk'), - ('https://www.tiktok.com/@depthsofwikipedia?lang=en', 'depthsofwikipedia'), - ('https://www.pixiv.net/users/14866303', '14866303'), - ('https://www.pixiv.net/member.php?id=109710', '109710'), -] + [(a, c) for (a, b, c) in PARSABLE_SOCIAL_IDS_COMBINED] -PARSABLE_SOCIAL_SITE_IDS = [ - ('https://www.deviantart.com/solquiet', SocialSiteId.DEVIANT_ART_ACCOUNT), - ('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT), - ('https://instagram.com/_richardparry_', SocialSiteId.INSTAGRAM_PAGE), - ('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE), - ('https://instagram.com/cullensartbox/', SocialSiteId.INSTAGRAM_PAGE), - ('https://www.instagram.com/timkongart/', SocialSiteId.INSTAGRAM_PAGE), - ('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE), - ('https://www.instagram.com/itsbettyjiang', SocialSiteId.INSTAGRAM_PAGE), -] + [(a, b) for (a, b, c) in PARSABLE_SOCIAL_IDS_COMBINED] - - -@pytest.mark.parametrize('url,expected_social_id', PARSABLE_SOCIAL_IDS) -def test_parse_social_ids(url, expected_social_id): - social_link = determine_social_from_url(url) - assert social_link.social_id == expected_social_id, url - assert social_link.social_site_id is not None, url - - -@pytest.mark.parametrize('url,expected_social_site_id', PARSABLE_SOCIAL_SITE_IDS) -def test_parse_social_site_ids(url, expected_social_site_id): - assert determine_social_from_url(url).social_site_id == expected_social_site_id, url +@pytest.mark.parametrize( + 'url,expected_social_site_id,expected_social_id', PARSABLE_SOCIAL_IDS_COMBINED +) +def test_parse_social_ids(url, expected_social_site_id, expected_social_id): + social_link: SocialLink | None = determine_social_from_url(url) + assert social_link is not None, url + assert (social_link.social_id, social_link.social_site_id) == ( + expected_social_id, + expected_social_site_id, + ), url