Additional mastodon pages and deviant art parsing.
This commit is contained in:
parent
5aad3e1cab
commit
bdf60219fd
42
__init__.py
42
__init__.py
|
@ -36,9 +36,12 @@ class SocialSiteId(enum.Enum):
|
|||
CARRD_PAGE = 24
|
||||
HENTAI_FOUNDRY = 25
|
||||
YOUTUBE_CHANNEL_HANDLE = 26
|
||||
YOUTUBE_CHANNEL_ID = 2397
|
||||
VIMEO_CHANNEL = 27
|
||||
NEWGROUNDS_PAGE = 28
|
||||
ARTSY_ARTIST = 2042
|
||||
LINK_COLLECTION_PAGE = 29
|
||||
DEVIANT_ART_ACCOUNT = 7737
|
||||
|
||||
def wikidata_property(self, client):
|
||||
return client.get(WIKIDATA_PROPERTIES[self])
|
||||
|
@ -80,7 +83,7 @@ WIKIDATA_PROPERTIES = {
|
|||
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
|
||||
|
||||
SocialSiteId.PIXIV_USER: WikidataInfo(None, 306956),
|
||||
#SocialSiteId.MASTODON_PAGE: 2000 + 10,
|
||||
SocialSiteId.MASTODON_PAGE: WikidataInfo(4033, None),
|
||||
SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
|
||||
SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
|
||||
#SocialSiteId.INPRNT_PAGE: WikidataInfo(None, None),
|
||||
|
@ -88,9 +91,11 @@ WIKIDATA_PROPERTIES = {
|
|||
SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
|
||||
SocialSiteId.HENTAI_FOUNDRY: WikidataInfo(None, 115903301),
|
||||
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
|
||||
SocialSiteId.YOUTUBE_CHANNEL_ID: WikidataInfo(2397, None),
|
||||
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
|
||||
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
|
||||
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
|
||||
SocialSiteId.DEVIANT_ART_ACCOUNT: WikidataInfo(7737, None),
|
||||
}
|
||||
|
||||
def re_social_subdomain(main_domain):
|
||||
|
@ -104,6 +109,7 @@ def re_social_path(main_domain):
|
|||
return re_social_path_adv(main_domain, RE_ID)
|
||||
|
||||
def re_social_path_adv(main_domain, *path):
|
||||
assert not main_domain.startswith('www.'), 'Redundant www.'
|
||||
l = [r'^', '(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
||||
|
||||
for p in path:
|
||||
|
@ -139,9 +145,13 @@ URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
|||
URL_PARSE_HENTAI_FOUNDRY= re_social_path_adv('hentai-foundry.com', 'user', RE_ID, 'profile')
|
||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
|
||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
|
||||
URL_PARSE_YOUTUBE_CHANNEL_ID= re_social_path_adv('youtube.com', 'channel', RE_ID)
|
||||
URL_PARSE_VIMEO_CHANNEL= re_social_path_adv('vimeo.com', RE_ID)
|
||||
URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
|
||||
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
|
||||
URL_PARSE_DEVIANT_ART_ACCOUNT = re_social_path_adv('deviantart.com', RE_ID)
|
||||
URL_PARSE_DEVIANT_ART_ACCOUNT_2 = re_social_subdomain('deviantart.com')
|
||||
# TODO: https://<ID>.deviantart.com
|
||||
|
||||
REGEXES = [
|
||||
# Reddit
|
||||
|
@ -214,6 +224,7 @@ REGEXES = [
|
|||
# Youtube
|
||||
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
||||
(URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2, SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
||||
(URL_PARSE_YOUTUBE_CHANNEL_ID, SocialSiteId.YOUTUBE_CHANNEL_ID),
|
||||
|
||||
# Vimeo
|
||||
(URL_PARSE_VIMEO_CHANNEL, SocialSiteId.VIMEO_CHANNEL),
|
||||
|
@ -223,8 +234,32 @@ REGEXES = [
|
|||
|
||||
# Artsy
|
||||
(URL_PARSE_ARTSY_ARTIST, SocialSiteId.ARTSY_ARTIST),
|
||||
|
||||
# Deviant art
|
||||
(URL_PARSE_DEVIANT_ART_ACCOUNT, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||
(URL_PARSE_DEVIANT_ART_ACCOUNT_2, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||
]
|
||||
|
||||
WELL_KNOWN_MASTODON_INSTANCES = frozenset({
|
||||
# Includes all servers with 50 000+ users as of 6 july 2023.
|
||||
# based on https://mastodonservers.net/servers/top
|
||||
'mastodon.social',
|
||||
'pawoo.net',
|
||||
'baraag.net',
|
||||
'mstdn.jp',
|
||||
'mastodon.cloud',
|
||||
'mstdn.social',
|
||||
'mastodon.online',
|
||||
'mas.to',
|
||||
'mastodon.world',
|
||||
'mastodon.lol',
|
||||
'mastodon.sdf.org',
|
||||
'c.im',
|
||||
'mastodon.uno',
|
||||
'mastodonapp.uk',
|
||||
'fosstodon.org',
|
||||
})
|
||||
|
||||
def determine_social_from_url_internally(url):
|
||||
assert isinstance(url, str)
|
||||
|
||||
|
@ -235,6 +270,9 @@ def determine_social_from_url_internally(url):
|
|||
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
||||
|
||||
# Mastodon
|
||||
for mastodon_hostname in WELL_KNOWN_MASTODON_INSTANCES:
|
||||
if url.startswith('https://' + mastodon_hostname):
|
||||
return (SocialSiteId.MASTODON_PAGE, None)
|
||||
if 'mastodon' in url:
|
||||
return (SocialSiteId.MASTODON_PAGE, None)
|
||||
|
||||
|
@ -266,6 +304,8 @@ def run_tests():
|
|||
assert determine_social_from_url('https://www.tiktok.com/@depthsofwikipedia?lang=en').social_id == 'depthsofwikipedia'
|
||||
assert determine_social_from_url('https://www.pixiv.net/users/14866303').social_id == '14866303'
|
||||
assert determine_social_from_url('https://www.pixiv.net/member.php?id=109710').social_id == '109710'
|
||||
assert determine_social_from_url('https://www.deviantart.com/solquiet').social_site_id == SocialSiteId.DEVIANT_ART_ACCOUNT
|
||||
assert determine_social_from_url('https://solquiet.deviantart.com/').social_site_id == SocialSiteId.DEVIANT_ART_ACCOUNT
|
||||
|
||||
INSTAGRAMS = [
|
||||
'https://instagram.com/_richardparry_',
|
||||
|
|
Loading…
Reference in New Issue
Block a user