Additional mastodon pages and deviant art parsing.
This commit is contained in:
parent
5aad3e1cab
commit
bdf60219fd
42
__init__.py
42
__init__.py
|
@ -36,9 +36,12 @@ class SocialSiteId(enum.Enum):
|
||||||
CARRD_PAGE = 24
|
CARRD_PAGE = 24
|
||||||
HENTAI_FOUNDRY = 25
|
HENTAI_FOUNDRY = 25
|
||||||
YOUTUBE_CHANNEL_HANDLE = 26
|
YOUTUBE_CHANNEL_HANDLE = 26
|
||||||
|
YOUTUBE_CHANNEL_ID = 2397
|
||||||
VIMEO_CHANNEL = 27
|
VIMEO_CHANNEL = 27
|
||||||
NEWGROUNDS_PAGE = 28
|
NEWGROUNDS_PAGE = 28
|
||||||
ARTSY_ARTIST = 2042
|
ARTSY_ARTIST = 2042
|
||||||
|
LINK_COLLECTION_PAGE = 29
|
||||||
|
DEVIANT_ART_ACCOUNT = 7737
|
||||||
|
|
||||||
def wikidata_property(self, client):
|
def wikidata_property(self, client):
|
||||||
return client.get(WIKIDATA_PROPERTIES[self])
|
return client.get(WIKIDATA_PROPERTIES[self])
|
||||||
|
@ -80,7 +83,7 @@ WIKIDATA_PROPERTIES = {
|
||||||
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
|
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
|
||||||
|
|
||||||
SocialSiteId.PIXIV_USER: WikidataInfo(None, 306956),
|
SocialSiteId.PIXIV_USER: WikidataInfo(None, 306956),
|
||||||
#SocialSiteId.MASTODON_PAGE: 2000 + 10,
|
SocialSiteId.MASTODON_PAGE: WikidataInfo(4033, None),
|
||||||
SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
|
SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
|
||||||
SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
|
SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
|
||||||
#SocialSiteId.INPRNT_PAGE: WikidataInfo(None, None),
|
#SocialSiteId.INPRNT_PAGE: WikidataInfo(None, None),
|
||||||
|
@ -88,9 +91,11 @@ WIKIDATA_PROPERTIES = {
|
||||||
SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
|
SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
|
||||||
SocialSiteId.HENTAI_FOUNDRY: WikidataInfo(None, 115903301),
|
SocialSiteId.HENTAI_FOUNDRY: WikidataInfo(None, 115903301),
|
||||||
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
|
||||||
|
SocialSiteId.YOUTUBE_CHANNEL_ID: WikidataInfo(2397, None),
|
||||||
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
|
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
|
||||||
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
|
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
|
||||||
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
|
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
|
||||||
|
SocialSiteId.DEVIANT_ART_ACCOUNT: WikidataInfo(7737, None),
|
||||||
}
|
}
|
||||||
|
|
||||||
def re_social_subdomain(main_domain):
|
def re_social_subdomain(main_domain):
|
||||||
|
@ -104,6 +109,7 @@ def re_social_path(main_domain):
|
||||||
return re_social_path_adv(main_domain, RE_ID)
|
return re_social_path_adv(main_domain, RE_ID)
|
||||||
|
|
||||||
def re_social_path_adv(main_domain, *path):
|
def re_social_path_adv(main_domain, *path):
|
||||||
|
assert not main_domain.startswith('www.'), 'Redundant www.'
|
||||||
l = [r'^', '(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
l = [r'^', '(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
||||||
|
|
||||||
for p in path:
|
for p in path:
|
||||||
|
@ -139,9 +145,13 @@ URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
||||||
URL_PARSE_HENTAI_FOUNDRY= re_social_path_adv('hentai-foundry.com', 'user', RE_ID, 'profile')
|
URL_PARSE_HENTAI_FOUNDRY= re_social_path_adv('hentai-foundry.com', 'user', RE_ID, 'profile')
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
|
||||||
|
URL_PARSE_YOUTUBE_CHANNEL_ID= re_social_path_adv('youtube.com', 'channel', RE_ID)
|
||||||
URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID)
|
URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID)
|
||||||
URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
|
URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
|
||||||
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
|
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
|
||||||
|
URL_PARSE_DEVIANT_ART_ACCOUNT = re_social_path_adv('deviantart.com', RE_ID)
|
||||||
|
URL_PARSE_DEVIANT_ART_ACCOUNT_2 = re_social_subdomain('deviantart.com')
|
||||||
|
# TODO: https://<ID>.deviantart.com
|
||||||
|
|
||||||
REGEXES = [
|
REGEXES = [
|
||||||
# Reddit
|
# Reddit
|
||||||
|
@ -214,6 +224,7 @@ REGEXES = [
|
||||||
# Youtube
|
# Youtube
|
||||||
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
||||||
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
||||||
|
(URL_PARSE_YOUTUBE_CHANNEL_ID, SocialSiteId.YOUTUBE_CHANNEL_ID),
|
||||||
|
|
||||||
# Vimeo
|
# Vimeo
|
||||||
(URL_PARSE_VIMEO_CHANNEL, SocialSiteId.VIMEO_CHANNEL),
|
(URL_PARSE_VIMEO_CHANNEL, SocialSiteId.VIMEO_CHANNEL),
|
||||||
|
@ -223,8 +234,32 @@ REGEXES = [
|
||||||
|
|
||||||
# Artsy
|
# Artsy
|
||||||
(URL_PARSE_ARTSY_ARTIST, SocialSiteId.ARTSY_ARTIST),
|
(URL_PARSE_ARTSY_ARTIST, SocialSiteId.ARTSY_ARTIST),
|
||||||
|
|
||||||
|
# Deviant art
|
||||||
|
(URL_PARSE_DEVIANT_ART_ACCOUNT, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||||
|
(URL_PARSE_DEVIANT_ART_ACCOUNT_2, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
WELL_KNOWN_MASTODON_INSTANCES = frozenset({
|
||||||
|
# Includes all servers with 50 000+ users as of 6 july 2023.
|
||||||
|
# based on https://mastodonservers.net/servers/top
|
||||||
|
'mastodon.social',
|
||||||
|
'pawoo.net',
|
||||||
|
'baraag.net',
|
||||||
|
'mstdn.jp',
|
||||||
|
'mastodon.cloud',
|
||||||
|
'mstdn.social',
|
||||||
|
'mastodon.online',
|
||||||
|
'mas.to',
|
||||||
|
'mastodon.world',
|
||||||
|
'mastodon.lol',
|
||||||
|
'mastodon.sdf.org',
|
||||||
|
'c.im',
|
||||||
|
'mastodon.uno',
|
||||||
|
'mastodonapp.uk',
|
||||||
|
'fosstodon.org',
|
||||||
|
})
|
||||||
|
|
||||||
def determine_social_from_url_internally(url):
|
def determine_social_from_url_internally(url):
|
||||||
assert isinstance(url, str)
|
assert isinstance(url, str)
|
||||||
|
|
||||||
|
@ -235,6 +270,9 @@ def determine_social_from_url_internally(url):
|
||||||
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
||||||
|
|
||||||
# Mastodon
|
# Mastodon
|
||||||
|
for mastodon_hostname in WELL_KNOWN_MASTODON_INSTANCES:
|
||||||
|
if url.startswith('https://' + mastodon_hostname):
|
||||||
|
return (SocialSiteId.MASTODON_PAGE, None)
|
||||||
if 'mastodon' in url:
|
if 'mastodon' in url:
|
||||||
return (SocialSiteId.MASTODON_PAGE, None)
|
return (SocialSiteId.MASTODON_PAGE, None)
|
||||||
|
|
||||||
|
@ -266,6 +304,8 @@ def run_tests():
|
||||||
assert determine_social_from_url('https://www.tiktok.com/@depthsofwikipedia?lang=en').social_id == 'depthsofwikipedia'
|
assert determine_social_from_url('https://www.tiktok.com/@depthsofwikipedia?lang=en').social_id == 'depthsofwikipedia'
|
||||||
assert determine_social_from_url('https://www.pixiv.net/users/14866303').social_id == '14866303'
|
assert determine_social_from_url('https://www.pixiv.net/users/14866303').social_id == '14866303'
|
||||||
assert determine_social_from_url('https://www.pixiv.net/member.php?id=109710').social_id == '109710'
|
assert determine_social_from_url('https://www.pixiv.net/member.php?id=109710').social_id == '109710'
|
||||||
|
assert determine_social_from_url('https://www.deviantart.com/solquiet').social_site_id == SocialSiteId.DEVIANT_ART_ACCOUNT
|
||||||
|
assert determine_social_from_url('https://solquiet.deviantart.com/').social_site_id == SocialSiteId.DEVIANT_ART_ACCOUNT
|
||||||
|
|
||||||
INSTAGRAMS = [
|
INSTAGRAMS = [
|
||||||
'https://instagram.com/_richardparry_',
|
'https://instagram.com/_richardparry_',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user