1
0
socials-util/test/test_parsing.py

253 lines
8.0 KiB
Python
Raw Permalink Normal View History

2024-11-01 20:43:46 +00:00
import urllib.parse
2024-03-31 22:50:52 +00:00
import pytest
2023-12-16 22:09:27 +00:00
2024-11-01 20:42:16 +00:00
import socials_util
2024-06-01 18:46:38 +00:00
from socials_util import SocialLink, SocialSiteId, determine_social_from_url
2024-03-31 22:37:15 +00:00
2024-11-01 20:42:16 +00:00
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str | None]] = [
2024-05-12 17:33:42 +00:00
# Tumblr formats
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://www.triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
2024-05-12 17:33:42 +00:00
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
2024-05-12 17:34:00 +00:00
(
'https://tumblr.com/blog/view/triviallytrue',
SocialSiteId.TUMBLR,
'triviallytrue',
),
2024-05-12 17:33:42 +00:00
('https://www.tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://www.tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
2024-05-12 17:34:00 +00:00
(
'https://www.tumblr.com/blog/view/triviallytrue',
SocialSiteId.TUMBLR,
'triviallytrue',
),
('http://worstdril.tumblr.com/', SocialSiteId.TUMBLR, 'worstdril'),
('https://deep-dark-fears.tumblr.com', SocialSiteId.TUMBLR, 'deep-dark-fears'),
2024-05-26 08:35:30 +00:00
# Cohost formats
(
'https://cohost.org/andrewelmore?page=0',
SocialSiteId.COHOST_PROFILE,
'andrewelmore',
),
(
'https://cohost.org/andrewelmore',
SocialSiteId.COHOST_PROFILE,
'andrewelmore',
),
# Reddit formats
(
'https://old.reddit.com/user/Harpsibored/submitted/',
SocialSiteId.REDDIT_USER,
2024-05-26 08:35:30 +00:00
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored/submitted',
SocialSiteId.REDDIT_USER,
2024-05-26 08:35:30 +00:00
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored/',
SocialSiteId.REDDIT_USER,
2024-05-26 08:35:30 +00:00
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored',
SocialSiteId.REDDIT_USER,
2024-08-25 17:13:11 +00:00
'Harpsibored',
),
(
'https://www.reddit.com/user/Harpsibored',
SocialSiteId.REDDIT_USER,
2024-05-26 08:35:30 +00:00
'Harpsibored',
),
# Ko-fi formats
('https://ko-fi.com/A627LI1/shop/', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1/shop', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1/', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1', SocialSiteId.KO_FI, 'A627LI1'),
# Twitter formats
('http://twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://www.twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://www.x.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://x.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://www.twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://www.x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
# Wikidata formats
('https://wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
('https://m.wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
# YouTube formats
(
'https://youtube.com/@WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://youtube.com/@WheelieYellow/',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow/',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow/featured',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
# GitHub
('https://github.com/love2d/love', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
('https://github.com/love2d/love/', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
(
'https://github.com/love2d/love/releases',
SocialSiteId.GITHUB_REPOSITORY,
'love2d/love',
),
# ArtStation
('https://toraji.artstation.com', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
('https://www.artstation.com/toraji', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
(
'https://www.artstation.com/toraji/profile',
SocialSiteId.ARTSTATION_PAGE,
'toraji',
),
# Tiktok
(
'https://tiktok.com/@depthsofwikipedia',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
(
'https://www.tiktok.com/@depthsofwikipedia',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
(
'https://www.tiktok.com/@depthsofwikipedia?lang=en',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
# Instagram
(
'https://instagram.com/_richardparry_',
SocialSiteId.INSTAGRAM_PAGE,
'_richardparry_',
),
('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE, 'j_kmor'),
(
'https://instagram.com/cullensartbox/',
SocialSiteId.INSTAGRAM_PAGE,
'cullensartbox',
),
(
'https://www.instagram.com/timkongart/',
SocialSiteId.INSTAGRAM_PAGE,
'timkongart',
),
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE, 'kcn.wu'),
(
'https://www.instagram.com/itsbettyjiang',
SocialSiteId.INSTAGRAM_PAGE,
'itsbettyjiang',
),
# Facebook
(
'https://www.facebook.com/fredagscafeen.dk/',
SocialSiteId.FACEBOOK_PAGE,
'fredagscafeen.dk',
),
# Pixiv
('https://www.pixiv.net/users/14866303', SocialSiteId.PIXIV_USER_ID, '14866303'),
(
'https://www.pixiv.net/member.php?id=109710',
SocialSiteId.PIXIV_USER_ID,
'109710',
),
# Etsy
(
'https://www.etsy.com/shop/aleksiremesart',
SocialSiteId.ETSY_SHOP,
'aleksiremesart',
),
# Deviantart
(
'https://www.deviantart.com/solquiet',
SocialSiteId.DEVIANT_ART_ACCOUNT,
'solquiet',
),
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
2024-06-08 14:59:38 +00:00
# Cara
('https://cara.app/simzart', SocialSiteId.CARA_PROFILE, 'simzart'),
('https://cara.app/simzart/all', SocialSiteId.CARA_PROFILE, 'simzart'),
2024-11-01 20:42:16 +00:00
# Mastodon
('https://idlethumbs.social/@testtest', SocialSiteId.MASTODON_PAGE, None),
('https://mastodon.example.org/testtest', SocialSiteId.MASTODON_PAGE, None),
# Feeds
('https://example.org/main.atom', SocialSiteId.RSS_FEED, None),
2024-11-09 15:02:59 +00:00
# Bluesky
('https://bsky.app/profile/bsky.app', SocialSiteId.BLUESKY_PROFILE, 'bsky.app'),
2024-05-12 17:33:42 +00:00
]
NOT_PARSABLE = [
2024-06-01 18:46:38 +00:00
# Twitter intents are not supported
'twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user',
'https://twitter.com/intent',
]
2024-03-31 22:50:52 +00:00
2024-06-01 18:46:38 +00:00
@pytest.mark.parametrize(
2024-11-01 20:43:46 +00:00
('url', 'expected_social_site_id', 'expected_social_id'),
2024-06-01 18:46:38 +00:00
PARSABLE_SOCIAL_IDS_COMBINED,
)
2024-11-01 20:43:46 +00:00
def test_parse_social_ids(
2024-11-09 16:54:15 +00:00
url: str,
expected_social_site_id: SocialSiteId,
expected_social_id: str,
2024-11-01 20:43:46 +00:00
) -> None:
social_link: SocialLink | None = determine_social_from_url(url)
assert social_link is not None, url
assert (social_link.social_id, social_link.social_site_id) == (
expected_social_id,
expected_social_site_id,
), url
2024-06-01 18:46:38 +00:00
@pytest.mark.parametrize('url', NOT_PARSABLE)
2024-10-20 17:38:14 +00:00
def test_not_parsable(url: str) -> None:
assert determine_social_from_url(url) is None
2024-11-01 20:42:16 +00:00
2024-11-01 20:43:46 +00:00
2024-11-01 20:42:16 +00:00
def test_wrong_parse_type() -> None:
with pytest.raises(TypeError):
assert socials_util.to_parse_result(None)
2024-11-01 20:43:46 +00:00
2024-11-01 20:42:16 +00:00
def test_from_parse_result() -> None:
2024-11-01 20:43:46 +00:00
urlresult = urllib.parse.urlparse(
'https://old.reddit.com/user/Harpsibored/submitted/',
)
2024-11-01 20:42:16 +00:00
assert socials_util.to_parse_result(urlresult) is urlresult
2024-11-01 20:43:46 +00:00
2024-11-01 20:42:16 +00:00
def test_determine_social_from_url_internally() -> None:
with pytest.raises(TypeError):
assert socials_util.determine_social_from_url_internally(None)
2024-11-09 16:53:48 +00:00
2024-11-09 16:54:15 +00:00
2024-11-09 16:53:48 +00:00
def test_normalize_url():
social_link = determine_social_from_url('http://twitter.com/dril')
assert social_link is not None
assert social_link.url.geturl() == 'https://x.com/dril'