1
0
This commit is contained in:
Jon Michael Aanes 2023-07-22 21:56:31 +02:00
parent b07177b920
commit 5aad3e1cab

View File

@ -1,12 +1,12 @@
from dataclasses import dataclass
from enforce_typing import enforce_types
from typing import List, Set, Optional, Union
from enum import Enum
import enum
import datetime
import re
import urllib.parse
class SocialSiteId(Enum):
class SocialSiteId(enum.Enum):
REDDIT = 1 # Should have been named REDDIT_SUBREDDIT
REDDIT_USER = 22
REDDIT_SUBREDDIT = REDDIT
@ -31,13 +31,14 @@ class SocialSiteId(Enum):
ETSY_SHOP = 19
KO_FI = 20
BEHANCE_PAGE = 21
TIKTOK_USER = 22
TIKTOK_USER = 7085
PIXIV_USER = 23
CARRD_PAGE = 24
HENTAI_FOUNDRY = 25
YOUTUBE_CHANNEL_HANDLE = 26
VIMEO_CHANNEL = 27
NEWGROUNDS_PAGE = 28
ARTSY_ARTIST = 2042
def wikidata_property(self, client):
return client.get(WIKIDATA_PROPERTIES[self])
@ -62,6 +63,8 @@ class WikidataInfo(object):
WIKIDATA_PROPERTIES = {
SocialSiteId.EMAIL: WikidataInfo(968, None),
SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
SocialSiteId.INSTAGRAM_PAGE: WikidataInfo(2003, None),
SocialSiteId.LINKTREE_PAGE: WikidataInfo(11079, None),
@ -71,7 +74,7 @@ WIKIDATA_PROPERTIES = {
SocialSiteId.SONGKICK_ARTIST: WikidataInfo(3478, None),
SocialSiteId.TWITCH: WikidataInfo(5797, None),
SocialSiteId.TWITTER: WikidataInfo(2002, None),
SocialSiteId.WIKIDATA: WikidataInfo(43649390, None),
SocialSiteId.WIKIDATA: WikidataInfo(None, 2013),
SocialSiteId.TUMBLR: WikidataInfo(3943, None),
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
@ -87,13 +90,14 @@ WIKIDATA_PROPERTIES = {
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
}
def re_social_subdomain(main_domain):
#return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'\/?$'
return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'(\/.*)?$'
RE_ID = r'@?([\w_.-]+)'
RE_ID = r'@?([^/]+)'
def re_social_path(main_domain):
#return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
@ -137,6 +141,7 @@ URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID)
URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
REGEXES = [
# Reddit
@ -215,9 +220,13 @@ REGEXES = [
# Newgrounds
(URL_PARSE_NEWGROUNDS_PAGE, SocialSiteId.NEWGROUNDS_PAGE),
# Artsy
(URL_PARSE_ARTSY_ARTIST, SocialSiteId.ARTSY_ARTIST),
]
def determine_social_from_url_internally(url):
assert isinstance(url, str)
# Regexes
for (social_site_url_regex, social_site_id) in REGEXES: