From ccdfe635d1b13a4ceec0070abd3a19dcd619cd70 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 6 Mar 2023 23:34:30 +0100 Subject: [PATCH] Additional sites --- __init__.py | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/__init__.py b/__init__.py index bc76607..587df60 100644 --- a/__init__.py +++ b/__init__.py @@ -7,7 +7,10 @@ import re import urllib.parse class SocialSiteId(Enum): - REDDIT = 1 + REDDIT = 1 # Should have been named REDDIT_SUBREDDIT + REDDIT_USER = 22 + REDDIT_SUBREDDIT = REDDIT + TWITTER = 2 RSS_FEED = 3 PAGE_WATCH = 4 @@ -25,6 +28,9 @@ class SocialSiteId(Enum): EMAIL = 16 JSON_LD = 17 # Similar to PAGE_WATCH, but focused on embedded microdata SUBSTACK = 18 + ETSY_SHOP = 19 + KO_FI = 20 + BEHANCE_PAGE = 21 def wikidata_property(self, client): return client.get(WIKIDATA_PROPERTIES[self]) @@ -41,28 +47,30 @@ WIKIDATA_PROPERTIES = { SocialSiteId.FACEBOOK_PAGE: 2013, SocialSiteId.INSTAGRAM_PAGE: 2003, SocialSiteId.LINKTREE_PAGE: 11079, - SocialSiteId.REDDIT: 3984, + SocialSiteId.REDDIT_SUBREDDIT: 3984, + SocialSiteId.REDDIT_USER: 4265, SocialSiteId.RSS_FEED: 1019, SocialSiteId.SONGKICK_ARTIST: 3478, SocialSiteId.TWITCH: 5797, SocialSiteId.TWITTER: 2002, SocialSiteId.WIKIDATA: 43649390, - #SocialSiteId.TUMBLR: 2000 + 9, + SocialSiteId.TUMBLR: 3943, #SocialSiteId.MASTODON_PAGE: 2000 + 10, #SocialSiteId.PATREON_PAGE: 2000 + 12, #SocialSiteId.ARTSTATION_PAGE: 2000 + 13, #SocialSiteId.INPRNT_PAGE: 2000 + 14, } -REDDIT_SUBSCRIPTION_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/r\/(\w+)\/?$' +REDDIT_SUBREDDIT_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/r\/(\w+)\/?$' +REDDIT_USER_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/user\/(\w+)(?:|\/submitted)\/?$' TWITTER_HANDLE_URL = r'^(?:https?:\/\/)?(?:www\.)?twitter\.com\/(\w+)\/?$' LINKTREE_PAGE_URL = r'^(?:https?:\/\/)?(?:www\.)?linktr\.ee\/(\w+)\/?$' TWITCH_STREAM_URL = r'^(?:https?:\/\/)?(?:www\.)?twitch\.tv\/(\w+)\/?$' WIKIDATA_ITEM_URL = r'^(?:https?:\/\/)?(?:www\.)?wikidata\.org\/wiki\/(\w+)\/?$' SONGKICK_ARTIST_URL = r'^(?:https?:\/\/)?(?:www\.)?songkick\.com\/artists\/(\d+)([\w-]*)\/?$' TUMBLR_PAGE_URL = r'^(?:https?:\/\/)?(?:www\.)?tumblr\.com\/([\w-]+)(?:\/|\/rss)?\/?$' -TUMBLR_PAGE_URL_2 = r'^(?:https?:\/\/)?(\w+)\.tumblr\.com\/?$' +TUMBLR_PAGE_URL_2 = r'^(?:https?:\/\/)?([\w-]+)\.tumblr\.com\/?$' INSTAGRAM_URL = r'^(?:https?:\/\/)?(?:www\.)?instagram\.com\/([\w_.-]+)\/?$' PATREON_URL = r'^(?:https?:\/\/)?(?:www\.)?patreon\.com\/([\w-]+)\/?$' ARTSTATION_URL = r'^(?:https?:\/\/)?(?:www\.)?artstation\.com\/([\w-]+)\/?$' @@ -70,10 +78,14 @@ INPRNT_URL = r'^(?:https?:\/\/)?(?:www\.)?inprnt\.com\/gallery\/([\w-]+)\/?$' MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$' FACEBOOK_PAGE_URL = r'^(?:https?:\/\/)?(?:www\.)?facebook\.com\/([\w-]+)\/?$' SUBSTACK_PREFIX_URL = r'^(?:https?:\/\/)?(\w+)\.substack\.com\/?$' +ETSY_SHOP_URL = r'^(?:https?:\/\/)?(?:www\.)?etsy\.com\/shop\/([\w-]+)\/?$' +KO_FI_URL = r'^(?:https?:\/\/)?(?:www\.)?ko\-fi\.com\/([\w-]+)(?:|\/shop)\/?$' +BEHANCE_PAGE_URL = r'^(?:https?:\/\/)?(?:www\.)?behance\.net\/([\w-]+)\/?$' REGEXES = [ - # Subreddits - (REDDIT_SUBSCRIPTION_URL, SocialSiteId.REDDIT), + # Reddit + (REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT), + (REDDIT_USER_URL, SocialSiteId.REDDIT_USER), # Twitter (TWITTER_HANDLE_URL, SocialSiteId.TWITTER), @@ -114,13 +126,22 @@ REGEXES = [ # Substack (SUBSTACK_PREFIX_URL, SocialSiteId.SUBSTACK), + + # Etsy shop + (ETSY_SHOP_URL, SocialSiteId.ETSY_SHOP), + + # Ko-fi + (KO_FI_URL, SocialSiteId.KO_FI), + + # Behance + (BEHANCE_PAGE_URL, SocialSiteId.BEHANCE_PAGE), ] def determine_social_from_url_internally(url): # Regexes for (social_site_url_regex, social_site_id) in REGEXES: - if m := re.match(social_site_url_regex, url): + if m := re.match(social_site_url_regex, url, re.I): groups = m.groups() return (social_site_id, groups[0] if len(groups) > 0 else None) @@ -143,5 +164,11 @@ def determine_social_from_url(url): return None return SocialLink(url, social_site_id, social_id) -assert determine_social_from_url('http://www.twitter.com/dril').social_id == 'dril' -assert determine_social_from_url('http://worstdril.tumblr.com/') +TEST = True +if TEST: + assert determine_social_from_url('http://www.twitter.com/dril').social_id == 'dril' + assert determine_social_from_url('http://worstdril.tumblr.com/') + assert determine_social_from_url('https://deep-dark-fears.tumblr.com').social_id == 'deep-dark-fears' + assert determine_social_from_url('https://www.etsy.com/shop/aleksiremesart').social_id == 'aleksiremesart' + assert determine_social_from_url('https://ko-fi.com/A627LI1/shop').social_id == 'A627LI1' + assert determine_social_from_url('https://ko-fi.com/A627LI1/').social_id == 'A627LI1'