1
0
This commit is contained in:
Jon Michael Aanes 2023-07-22 21:56:31 +02:00
parent b07177b920
commit 5aad3e1cab

View File

@ -1,12 +1,12 @@
from dataclasses import dataclass from dataclasses import dataclass
from enforce_typing import enforce_types from enforce_typing import enforce_types
from typing import List, Set, Optional, Union from typing import List, Set, Optional, Union
from enum import Enum import enum
import datetime import datetime
import re import re
import urllib.parse import urllib.parse
class SocialSiteId(Enum): class SocialSiteId(enum.Enum):
REDDIT = 1 # Should have been named REDDIT_SUBREDDIT REDDIT = 1 # Should have been named REDDIT_SUBREDDIT
REDDIT_USER = 22 REDDIT_USER = 22
REDDIT_SUBREDDIT = REDDIT REDDIT_SUBREDDIT = REDDIT
@ -31,13 +31,14 @@ class SocialSiteId(Enum):
ETSY_SHOP = 19 ETSY_SHOP = 19
KO_FI = 20 KO_FI = 20
BEHANCE_PAGE = 21 BEHANCE_PAGE = 21
TIKTOK_USER = 22 TIKTOK_USER = 7085
PIXIV_USER = 23 PIXIV_USER = 23
CARRD_PAGE = 24 CARRD_PAGE = 24
HENTAI_FOUNDRY = 25 HENTAI_FOUNDRY = 25
YOUTUBE_CHANNEL_HANDLE = 26 YOUTUBE_CHANNEL_HANDLE = 26
VIMEO_CHANNEL = 27 VIMEO_CHANNEL = 27
NEWGROUNDS_PAGE = 28 NEWGROUNDS_PAGE = 28
ARTSY_ARTIST = 2042
def wikidata_property(self, client): def wikidata_property(self, client):
return client.get(WIKIDATA_PROPERTIES[self]) return client.get(WIKIDATA_PROPERTIES[self])
@ -62,6 +63,8 @@ class WikidataInfo(object):
WIKIDATA_PROPERTIES = { WIKIDATA_PROPERTIES = {
SocialSiteId.EMAIL: WikidataInfo(968, None), SocialSiteId.EMAIL: WikidataInfo(968, None),
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None), SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
SocialSiteId.INSTAGRAM_PAGE: WikidataInfo(2003, None), SocialSiteId.INSTAGRAM_PAGE: WikidataInfo(2003, None),
SocialSiteId.LINKTREE_PAGE: WikidataInfo(11079, None), SocialSiteId.LINKTREE_PAGE: WikidataInfo(11079, None),
@ -71,7 +74,7 @@ WIKIDATA_PROPERTIES = {
SocialSiteId.SONGKICK_ARTIST: WikidataInfo(3478, None), SocialSiteId.SONGKICK_ARTIST: WikidataInfo(3478, None),
SocialSiteId.TWITCH: WikidataInfo(5797, None), SocialSiteId.TWITCH: WikidataInfo(5797, None),
SocialSiteId.TWITTER: WikidataInfo(2002, None), SocialSiteId.TWITTER: WikidataInfo(2002, None),
SocialSiteId.WIKIDATA: WikidataInfo(43649390, None), SocialSiteId.WIKIDATA: WikidataInfo(None, 2013),
SocialSiteId.TUMBLR: WikidataInfo(3943, None), SocialSiteId.TUMBLR: WikidataInfo(3943, None),
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None), SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
@ -87,13 +90,14 @@ WIKIDATA_PROPERTIES = {
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866), SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376), SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655), SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
} }
def re_social_subdomain(main_domain): def re_social_subdomain(main_domain):
#return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'\/?$' #return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'\/?$'
return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'(\/.*)?$' return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'(\/.*)?$'
RE_ID = r'@?([\w_.-]+)' RE_ID = r'@?([^/]+)'
def re_social_path(main_domain): def re_social_path(main_domain):
#return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$' #return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
@ -137,6 +141,7 @@ URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID) URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID) URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID)
URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com') URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
REGEXES = [ REGEXES = [
# Reddit # Reddit
@ -215,9 +220,13 @@ REGEXES = [
# Newgrounds # Newgrounds
(URL_PARSE_NEWGROUNDS_PAGE, SocialSiteId.NEWGROUNDS_PAGE), (URL_PARSE_NEWGROUNDS_PAGE, SocialSiteId.NEWGROUNDS_PAGE),
# Artsy
(URL_PARSE_ARTSY_ARTIST, SocialSiteId.ARTSY_ARTIST),
] ]
def determine_social_from_url_internally(url): def determine_social_from_url_internally(url):
assert isinstance(url, str)
# Regexes # Regexes
for (social_site_url_regex, social_site_id) in REGEXES: for (social_site_url_regex, social_site_id) in REGEXES: