1
0

Compare commits

...

2 Commits

Author SHA1 Message Date
2f6dc95a7f
Ruff format
All checks were successful
Test Python / Test (push) Successful in 21s
2024-06-01 20:46:38 +02:00
2afb80fc1e
Avoid parsing twitter intent as username 2024-06-01 20:45:50 +02:00
4 changed files with 38 additions and 12 deletions

View File

@ -15,6 +15,7 @@ PACKAGE_NAME = 'socials_util'
with open('README.md') as f: with open('README.md') as f:
readme = f.read() readme = f.read()
def parse_version_file(text: str) -> str: def parse_version_file(text: str) -> str:
match = re.match(r'^__version__\s*=\s*(["\'])([\d\.]+)\1$', text) match = re.match(r'^__version__\s*=\s*(["\'])([\d\.]+)\1$', text)
if match is None: if match is None:
@ -22,9 +23,11 @@ def parse_version_file(text: str) -> str:
raise Exception(msg) raise Exception(msg)
return match.group(2) return match.group(2)
with open(PACKAGE_NAME + '/_version.py') as f: with open(PACKAGE_NAME + '/_version.py') as f:
version = parse_version_file(f.read()) version = parse_version_file(f.read())
def parse_requirements(text: str) -> list[str]: def parse_requirements(text: str) -> list[str]:
return text.strip().split('\n') return text.strip().split('\n')

View File

@ -188,12 +188,17 @@ WIKIDATA_PROPERTIES: dict[SocialSiteId | int, WikidataInfo] = {
def re_social_subdomain(main_domain: str) -> str: def re_social_subdomain(main_domain: str) -> str:
return r'^(?:https?:\/\/)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$' return (
r'^(?:https?:\/\/)?(?:www\.)?([\w_-]+)\.' + re.escape(main_domain) + r'(\/.*)?$'
)
RE_ID = r'@?([^/]+)' RE_ID = r'@?([^\s/]+)'
RE_DUAL_ID = r'@?([^/]+/[^/]+)' RE_DUAL_ID = r'@?([^\s/]+/[^\s/]+)'
RE_ANY_SUBPATH = r'(|\/|\/.*)$' RE_ANY_SUBPATH = r'(|\/|\/\S*)$'
SPECIAL_REGEX_LITERALS = frozenset({RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH})
DOES_NOT_NEED_AUTO_SLASH = frozenset({RE_ANY_SUBPATH})
def re_social_path(main_domain: str) -> str: def re_social_path(main_domain: str) -> str:
@ -212,12 +217,13 @@ def re_social_path_adv(main_domain: str, *path: str) -> str:
] ]
for p in path: for p in path:
if p != RE_ANY_SUBPATH: if p not in DOES_NOT_NEED_AUTO_SLASH:
regex_builder.append(r'\/') regex_builder.append(r'\/')
regex_builder.append( regex_builder.append(
p if p in {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH} else re.escape(p), p if p in SPECIAL_REGEX_LITERALS else re.escape(p),
) )
if path[-1] != RE_ANY_SUBPATH: del p
if path[-1] not in DOES_NOT_NEED_AUTO_SLASH:
regex_builder.append(r'\/?$') regex_builder.append(r'\/?$')
return ''.join(regex_builder) return ''.join(regex_builder)
@ -260,7 +266,9 @@ PIXIV_SKETCH_USER_NICKNAME_URL = re_social_path_adv('sketch.pixiv.net', RE_ID)
URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co') URL_PARSE_CARRD_PAGE = re_social_subdomain('carrd.co')
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv( URL_PARSE_YOUTUBE_CHANNEL_HANDLE_1 = re_social_path_adv(
'youtube.com', RE_ID, RE_ANY_SUBPATH 'youtube.com',
RE_ID,
RE_ANY_SUBPATH,
) )
URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2 = re_social_path_adv('youtube.com', 'c', RE_ID) URL_PARSE_YOUTUBE_CHANNEL_HANDLE_2 = re_social_path_adv('youtube.com', 'c', RE_ID)
URL_PARSE_YOUTUBE_CHANNEL_ID = re_social_path_adv('youtube.com', 'channel', RE_ID) URL_PARSE_YOUTUBE_CHANNEL_ID = re_social_path_adv('youtube.com', 'channel', RE_ID)
@ -413,7 +421,7 @@ WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
}, },
) )
DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www'}) DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www', 'intent', 'user'})
def determine_social_from_url_internally( def determine_social_from_url_internally(

View File

@ -1,10 +1,11 @@
import pytest import pytest
from socials_util import * from socials_util import SocialLink, SocialSiteId, determine_social_from_url
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [ PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
# Tumblr formats # Tumblr formats
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'), ('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://www.triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'), ('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'), ('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
( (
@ -179,9 +180,18 @@ PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str]] = [
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'), ('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
] ]
NOT_PARSABLE = [
# Twitter intents are not supported
'twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user',
'https://twitter.com/intent',
]
@pytest.mark.parametrize( @pytest.mark.parametrize(
'url,expected_social_site_id,expected_social_id', PARSABLE_SOCIAL_IDS_COMBINED 'url,expected_social_site_id,expected_social_id',
PARSABLE_SOCIAL_IDS_COMBINED,
) )
def test_parse_social_ids(url, expected_social_site_id, expected_social_id): def test_parse_social_ids(url, expected_social_site_id, expected_social_id):
social_link: SocialLink | None = determine_social_from_url(url) social_link: SocialLink | None = determine_social_from_url(url)
@ -190,3 +200,8 @@ def test_parse_social_ids(url, expected_social_site_id, expected_social_id):
expected_social_id, expected_social_id,
expected_social_site_id, expected_social_site_id,
), url ), url
@pytest.mark.parametrize('url', NOT_PARSABLE)
def test_not_parsable(url: str):
assert determine_social_from_url(url) is None