Pixiv has too many link formats
This commit is contained in:
parent
bdf60219fd
commit
2bdd5b43fa
68
__init__.py
68
__init__.py
|
@ -32,7 +32,8 @@ class SocialSiteId(enum.Enum):
|
||||||
KO_FI = 20
|
KO_FI = 20
|
||||||
BEHANCE_PAGE = 21
|
BEHANCE_PAGE = 21
|
||||||
TIKTOK_USER = 7085
|
TIKTOK_USER = 7085
|
||||||
PIXIV_USER = 23
|
PIXIV_USER_ID = 5435
|
||||||
|
PIXIV_USER_NICKNAME = 31
|
||||||
CARRD_PAGE = 24
|
CARRD_PAGE = 24
|
||||||
HENTAI_FOUNDRY = 25
|
HENTAI_FOUNDRY = 25
|
||||||
YOUTUBE_CHANNEL_HANDLE = 26
|
YOUTUBE_CHANNEL_HANDLE = 26
|
||||||
|
@ -42,6 +43,8 @@ class SocialSiteId(enum.Enum):
|
||||||
ARTSY_ARTIST = 2042
|
ARTSY_ARTIST = 2042
|
||||||
LINK_COLLECTION_PAGE = 29
|
LINK_COLLECTION_PAGE = 29
|
||||||
DEVIANT_ART_ACCOUNT = 7737
|
DEVIANT_ART_ACCOUNT = 7737
|
||||||
|
DANBOORU_ARTIST = 30
|
||||||
|
BANDCAMP_PROFILE = 3283
|
||||||
|
|
||||||
def wikidata_property(self, client):
|
def wikidata_property(self, client):
|
||||||
return client.get(WIKIDATA_PROPERTIES[self])
|
return client.get(WIKIDATA_PROPERTIES[self])
|
||||||
|
@ -49,7 +52,13 @@ class SocialSiteId(enum.Enum):
|
||||||
def is_aggregator(self):
|
def is_aggregator(self):
|
||||||
return self in AGGERAGOR_SOCIALS
|
return self in AGGERAGOR_SOCIALS
|
||||||
|
|
||||||
AGGERAGOR_SOCIALS = {SocialSiteId.LINKTREE_PAGE, SocialSiteId.WIKIDATA}
|
AGGERAGOR_SOCIALS = {
|
||||||
|
SocialSiteId.LINKTREE_PAGE,
|
||||||
|
SocialSiteId.WIKIDATA,
|
||||||
|
SocialSiteId.CARRD_PAGE,
|
||||||
|
SocialSiteId.LINK_COLLECTION_PAGE,
|
||||||
|
SocialSiteId.DANBOORU_ARTIST,
|
||||||
|
}
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
@dataclass(frozen = True)
|
@dataclass(frozen = True)
|
||||||
|
@ -63,6 +72,8 @@ class SocialLink(object):
|
||||||
class WikidataInfo(object):
|
class WikidataInfo(object):
|
||||||
property_id: Optional[int]
|
property_id: Optional[int]
|
||||||
issuer_id: Optional[int]
|
issuer_id: Optional[int]
|
||||||
|
id_version_of: Optional[SocialSiteId] = None
|
||||||
|
nickname_version_of: Optional[SocialSiteId] = None
|
||||||
|
|
||||||
WIKIDATA_PROPERTIES = {
|
WIKIDATA_PROPERTIES = {
|
||||||
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
SocialSiteId.EMAIL: WikidataInfo(968, None),
|
||||||
|
@ -82,7 +93,9 @@ WIKIDATA_PROPERTIES = {
|
||||||
SocialSiteId.TUMBLR: WikidataInfo(3943, None),
|
SocialSiteId.TUMBLR: WikidataInfo(3943, None),
|
||||||
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
|
SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
|
||||||
|
|
||||||
SocialSiteId.PIXIV_USER: WikidataInfo(None, 306956),
|
SocialSiteId.PIXIV_USER_ID: WikidataInfo(5435, 306956, id_version_of = SocialSiteId.PIXIV_USER_NICKNAME),
|
||||||
|
SocialSiteId.PIXIV_USER_NICKNAME: WikidataInfo(None, 306956, nickname_version_of = SocialSiteId.PIXIV_USER_ID),
|
||||||
|
|
||||||
SocialSiteId.MASTODON_PAGE: WikidataInfo(4033, None),
|
SocialSiteId.MASTODON_PAGE: WikidataInfo(4033, None),
|
||||||
SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
|
SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
|
||||||
SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
|
SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
|
||||||
|
@ -90,12 +103,14 @@ WIKIDATA_PROPERTIES = {
|
||||||
|
|
||||||
SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
|
SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
|
||||||
SocialSiteId.HENTAI_FOUNDRY: WikidataInfo(None, 115903301),
|
SocialSiteId.HENTAI_FOUNDRY: WikidataInfo(None, 115903301),
|
||||||
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866),
|
SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(11245, 866, nickname_version_of = SocialSiteId.YOUTUBE_CHANNEL_ID),
|
||||||
SocialSiteId.YOUTUBE_CHANNEL_ID: WikidataInfo(2397, None),
|
SocialSiteId.YOUTUBE_CHANNEL_ID: WikidataInfo(2397, 866, id_version_of = SocialSiteId.YOUTUBE_CHANNEL_HANDLE),
|
||||||
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
|
SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
|
||||||
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
|
SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
|
||||||
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
|
SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
|
||||||
SocialSiteId.DEVIANT_ART_ACCOUNT: WikidataInfo(7737, None),
|
SocialSiteId.DEVIANT_ART_ACCOUNT: WikidataInfo(7737, None),
|
||||||
|
SocialSiteId.DANBOORU_ARTIST: WikidataInfo(None, 64514853),
|
||||||
|
SocialSiteId.BANDCAMP_PROFILE: WikidataInfo(3283, 545966),
|
||||||
}
|
}
|
||||||
|
|
||||||
def re_social_subdomain(main_domain):
|
def re_social_subdomain(main_domain):
|
||||||
|
@ -103,6 +118,7 @@ def re_social_subdomain(main_domain):
|
||||||
return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'(\/.*)?$'
|
return r'^(?:https?:\/\/)?([\w_-]+)\.'+re.escape(main_domain)+'(\/.*)?$'
|
||||||
|
|
||||||
RE_ID = r'@?([^/]+)'
|
RE_ID = r'@?([^/]+)'
|
||||||
|
RE_ANY_SUBPATH = r'(|\/|\/.*)$'
|
||||||
|
|
||||||
def re_social_path(main_domain):
|
def re_social_path(main_domain):
|
||||||
#return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
|
#return r'^(?:https?:\/\/)?(?:www\.)?'+re.escape(main_domain)+'\/'+RE_ID+'\/?$'
|
||||||
|
@ -113,10 +129,13 @@ def re_social_path_adv(main_domain, *path):
|
||||||
l = [r'^', '(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
l = [r'^', '(?:https?:\/\/)?', r'(?:www\.)?', re.escape(main_domain)]
|
||||||
|
|
||||||
for p in path:
|
for p in path:
|
||||||
l.append(r'\/')
|
if p != RE_ANY_SUBPATH:
|
||||||
l.append(RE_ID if p == RE_ID else re.escape(p))
|
l.append(r'\/')
|
||||||
l.append(r'\/?$')
|
l.append(p if p in {RE_ID, RE_ANY_SUBPATH} else re.escape(p))
|
||||||
return ''.join(l)
|
if path[-1] != RE_ANY_SUBPATH:
|
||||||
|
l.append(r'\/?$')
|
||||||
|
regex = ''.join(l)
|
||||||
|
return regex
|
||||||
|
|
||||||
MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'
|
MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'
|
||||||
|
|
||||||
|
@ -130,7 +149,7 @@ SONGKICK_ARTIST_URL = r'^(?:https?:\/\/)?(?:www\.)?songkick\.com\/artists\/(\d+)
|
||||||
TUMBLR_PAGE_URL = re_social_path('tumblr.com')
|
TUMBLR_PAGE_URL = re_social_path('tumblr.com')
|
||||||
TUMBLR_PAGE_URL_2 = re_social_subdomain('tumblr.com')
|
TUMBLR_PAGE_URL_2 = re_social_subdomain('tumblr.com')
|
||||||
INSTAGRAM_URL = re_social_path('instagram.com')
|
INSTAGRAM_URL = re_social_path('instagram.com')
|
||||||
PATREON_URL = re_social_path('patreon.com')
|
PATREON_URL = re_social_path_adv('patreon.com', RE_ID, RE_ANY_SUBPATH)
|
||||||
ARTSTATION_URL = re_social_path('artstation.com')
|
ARTSTATION_URL = re_social_path('artstation.com')
|
||||||
INPRNT_URL = re_social_path_adv('inprnt.com', 'gallery', RE_ID)
|
INPRNT_URL = re_social_path_adv('inprnt.com', 'gallery', RE_ID)
|
||||||
FACEBOOK_PAGE_URL = re_social_path('facebook.com')
|
FACEBOOK_PAGE_URL = re_social_path('facebook.com')
|
||||||
|
@ -138,11 +157,14 @@ SUBSTACK_PREFIX_URL = re_social_subdomain('substack.com')
|
||||||
ETSY_SHOP_URL = re_social_path_adv('etsy.com', 'shop', RE_ID)
|
ETSY_SHOP_URL = re_social_path_adv('etsy.com', 'shop', RE_ID)
|
||||||
BEHANCE_PAGE_URL = re_social_path('behance.net')
|
BEHANCE_PAGE_URL = re_social_path('behance.net')
|
||||||
TIKTOK_USER_URL = re_social_path('tiktok.com')
|
TIKTOK_USER_URL = re_social_path('tiktok.com')
|
||||||
PIXIV_USER_URL = r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/users/(\d+)\/?$'
|
PIXIV_USER_ID_URL = r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/users/(\d+)\/?$'
|
||||||
PIXIV_USER_URL_2 = r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/member\.php\/?[?]id=(\d+)$'
|
PIXIV_USER_ID_URL_2 = r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/member\.php\/?[?]id=(\d+)$'
|
||||||
|
PIXIV_FANBOX_USER_NICKNAME_URL = re_social_subdomain('fanbox.cc')
|
||||||
|
PIXIV_USER_NICKNAME_URL = re_social_path_adv('pixiv.net', 'stacc', RE_ID)
|
||||||
|
PIXIV_SKETCH_USER_NICKNAME_URL = re_social_path_adv('sketch.pixiv.net', RE_ID)
|
||||||
|
|
||||||
URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
|
||||||
URL_PARSE_HENTAI_FOUNDRY= re_social_path_adv('hentai-foundry.com', 'user', RE_ID, 'profile')
|
URL_PARSE_HENTAI_FOUNDRY = re_social_path_adv('hentai-foundry.com', 'user', RE_ID, RE_ANY_SUBPATH)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1= re_social_path_adv('youtube.com', RE_ID)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2= re_social_path_adv('youtube.com', 'c', RE_ID)
|
||||||
URL_PARSE_YOUTUBE_CHANNEL_ID= re_social_path_adv('youtube.com', 'channel', RE_ID)
|
URL_PARSE_YOUTUBE_CHANNEL_ID= re_social_path_adv('youtube.com', 'channel', RE_ID)
|
||||||
|
@ -151,6 +173,9 @@ URL_PARSE_NEWGROUNDS_PAGE = re_social_subdomain('newgrounds.com')
|
||||||
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
|
URL_PARSE_ARTSY_ARTIST = re_social_path_adv('artsy.net', 'artist', RE_ID)
|
||||||
URL_PARSE_DEVIANT_ART_ACCOUNT = re_social_path_adv('deviantart.com', RE_ID)
|
URL_PARSE_DEVIANT_ART_ACCOUNT = re_social_path_adv('deviantart.com', RE_ID)
|
||||||
URL_PARSE_DEVIANT_ART_ACCOUNT_2 = re_social_subdomain('deviantart.com')
|
URL_PARSE_DEVIANT_ART_ACCOUNT_2 = re_social_subdomain('deviantart.com')
|
||||||
|
URL_PARSE_DANBOORU_ARTIST = re_social_path_adv('danbooru.donmai.us', 'artists', RE_ID)
|
||||||
|
URL_PARSE_BANDCAMP = re_social_subdomain('bandcamp.com')
|
||||||
|
|
||||||
# TODO: https://<ID>.deviantart.com
|
# TODO: https://<ID>.deviantart.com
|
||||||
|
|
||||||
REGEXES = [
|
REGEXES = [
|
||||||
|
@ -187,8 +212,11 @@ REGEXES = [
|
||||||
(TIKTOK_USER_URL, SocialSiteId.TIKTOK_USER),
|
(TIKTOK_USER_URL, SocialSiteId.TIKTOK_USER),
|
||||||
|
|
||||||
# Pixiv
|
# Pixiv
|
||||||
(PIXIV_USER_URL, SocialSiteId.PIXIV_USER),
|
(PIXIV_USER_ID_URL, SocialSiteId.PIXIV_USER_ID),
|
||||||
(PIXIV_USER_URL_2, SocialSiteId.PIXIV_USER),
|
(PIXIV_USER_ID_URL_2, SocialSiteId.PIXIV_USER_ID),
|
||||||
|
(PIXIV_FANBOX_USER_NICKNAME_URL , SocialSiteId.PIXIV_USER_NICKNAME),
|
||||||
|
(PIXIV_USER_NICKNAME_URL , SocialSiteId.PIXIV_USER_NICKNAME),
|
||||||
|
(PIXIV_SKETCH_USER_NICKNAME_URL, SocialSiteId.PIXIV_USER_NICKNAME),
|
||||||
|
|
||||||
# Patreon
|
# Patreon
|
||||||
(PATREON_URL, SocialSiteId.PATREON_PAGE),
|
(PATREON_URL, SocialSiteId.PATREON_PAGE),
|
||||||
|
@ -238,6 +266,12 @@ REGEXES = [
|
||||||
# Deviant art
|
# Deviant art
|
||||||
(URL_PARSE_DEVIANT_ART_ACCOUNT, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
(URL_PARSE_DEVIANT_ART_ACCOUNT, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||||
(URL_PARSE_DEVIANT_ART_ACCOUNT_2, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
(URL_PARSE_DEVIANT_ART_ACCOUNT_2, SocialSiteId.DEVIANT_ART_ACCOUNT),
|
||||||
|
|
||||||
|
# Danbooru
|
||||||
|
(URL_PARSE_DANBOORU_ARTIST, SocialSiteId.DANBOORU_ARTIST),
|
||||||
|
|
||||||
|
# Bandcamp
|
||||||
|
(URL_PARSE_BANDCAMP, SocialSiteId.BANDCAMP_PROFILE),
|
||||||
]
|
]
|
||||||
|
|
||||||
WELL_KNOWN_MASTODON_INSTANCES = frozenset({
|
WELL_KNOWN_MASTODON_INSTANCES = frozenset({
|
||||||
|
@ -260,12 +294,12 @@ WELL_KNOWN_MASTODON_INSTANCES = frozenset({
|
||||||
'fosstodon.org',
|
'fosstodon.org',
|
||||||
})
|
})
|
||||||
|
|
||||||
def determine_social_from_url_internally(url):
|
def determine_social_from_url_internally(url: str):
|
||||||
assert isinstance(url, str)
|
assert isinstance(url, str)
|
||||||
|
|
||||||
# Regexes
|
# Regexes
|
||||||
for (social_site_url_regex, social_site_id) in REGEXES:
|
for (social_site_url_regex, social_site_id) in REGEXES:
|
||||||
if m := re.match(social_site_url_regex, url, re.I):
|
if m := re.fullmatch(social_site_url_regex, url, re.I):
|
||||||
groups = m.groups()
|
groups = m.groups()
|
||||||
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
return (social_site_id, groups[0] if len(groups) > 0 else None)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user