Avoid parsing twitter intent as username
This commit is contained in:
parent
9dd51ac012
commit
2afb80fc1e
|
@ -188,12 +188,15 @@ WIKIDATA_PROPERTIES: dict[SocialSiteId | int, WikidataInfo] = {
|
||||||
|
|
||||||
|
|
||||||
def re_social_subdomain(main_domain: str) -> str:
|
def re_social_subdomain(main_domain: str) -> str:
|
||||||
return r'^(?:https?:\/\/)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
|
return r'^(?:https?:\/\/)?(?:www\.)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
|
||||||
|
|
||||||
|
|
||||||
RE_ID = r'@?([^/]+)'
|
RE_ID = r'@?([^\s/]+)'
|
||||||
RE_DUAL_ID = r'@?([^/]+/[^/]+)'
|
RE_DUAL_ID = r'@?([^\s/]+/[^\s/]+)'
|
||||||
RE_ANY_SUBPATH = r'(|\/|\/.*)$'
|
RE_ANY_SUBPATH = r'(|\/|\/\S*)$'
|
||||||
|
|
||||||
|
SPECIAL_REGEX_LITERALS = frozenset({RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH})
|
||||||
|
DOES_NOT_NEED_AUTO_SLASH = frozenset({RE_ANY_SUBPATH})
|
||||||
|
|
||||||
|
|
||||||
def re_social_path(main_domain: str) -> str:
|
def re_social_path(main_domain: str) -> str:
|
||||||
|
@ -212,12 +215,13 @@ def re_social_path_adv(main_domain: str, *path: str) -> str:
|
||||||
]
|
]
|
||||||
|
|
||||||
for p in path:
|
for p in path:
|
||||||
if p != RE_ANY_SUBPATH:
|
if p not in DOES_NOT_NEED_AUTO_SLASH:
|
||||||
regex_builder.append(r'\/')
|
regex_builder.append(r'\/')
|
||||||
regex_builder.append(
|
regex_builder.append(
|
||||||
p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p),
|
p if p in SPECIAL_REGEX_LITERALS else re.escape(p),
|
||||||
)
|
)
|
||||||
if path[-1] != RE_ANY_SUBPATH:
|
del p
|
||||||
|
if path[-1] not in DOES_NOT_NEED_AUTO_SLASH:
|
||||||
regex_builder.append(r'\/?$')
|
regex_builder.append(r'\/?$')
|
||||||
return ''.join(regex_builder)
|
return ''.join(regex_builder)
|
||||||
|
|
||||||
|
@ -413,8 +417,7 @@ WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www'})
|
DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www', 'intent', 'user'})
|
||||||
|
|
||||||
|
|
||||||
def determine_social_from_url_internally(
|
def determine_social_from_url_internally(
|
||||||
url: str,
|
url: str,
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from socials_util import *
|
from socials_util import determine_social_from_url, SocialSiteId, SocialLink
|
||||||
|
|
||||||
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
|
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
|
||||||
# Tumblr formats
|
# Tumblr formats
|
||||||
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
|
('https://www.triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
|
||||||
(
|
(
|
||||||
|
@ -179,6 +180,13 @@ PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
|
||||||
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
|
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
NOT_PARSABLE = [
|
||||||
|
# Twitter intents are not supported
|
||||||
|
'twitter.com/intent/user?user_id=123',
|
||||||
|
'https://twitter.com/intent/user?user_id=123',
|
||||||
|
'https://twitter.com/intent/user',
|
||||||
|
'https://twitter.com/intent',
|
||||||
|
]
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
'url,expected_social_site_id,expected_social_id', PARSABLE_SOCIAL_IDS_COMBINED
|
'url,expected_social_site_id,expected_social_id', PARSABLE_SOCIAL_IDS_COMBINED
|
||||||
|
@ -190,3 +198,7 @@ def test_parse_social_ids(url, expected_social_site_id, expected_social_id):
|
||||||
expected_social_id,
|
expected_social_id,
|
||||||
expected_social_site_id,
|
expected_social_site_id,
|
||||||
), url
|
), url
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('url', NOT_PARSABLE)
|
||||||
|
def test_not_parsable(url: str):
|
||||||
|
assert determine_social_from_url(url) is None
|
||||||
|
|
Loading…
Reference in New Issue
Block a user