1
0
socials-util/test/test_parsing.py
Jon Michael Aanes d71db727d5
Some checks failed
Run Python tests (through Pytest) / Test (push) Waiting to run
Verify Python project can be installed, loaded and have version checked / Test (push) Waiting to run
Python Ruff Code Quality / ruff (push) Has been cancelled
Fixed parsing of YouTube urls
2024-11-21 19:39:38 +01:00

268 lines
8.5 KiB
Python

import urllib.parse
import pytest
import socials_util
from socials_util import SocialLink, SocialSiteId, determine_social_from_url
PARSABLE_SOCIAL_IDS_COMBINED: list[tuple[str, object, str | None]] = [
# Tumblr formats
('https://triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://www.triviallytrue.tumblr.com/', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
(
'https://tumblr.com/blog/view/triviallytrue',
SocialSiteId.TUMBLR,
'triviallytrue',
),
('https://www.tumblr.com/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
('https://www.tumblr.com/blog/triviallytrue', SocialSiteId.TUMBLR, 'triviallytrue'),
(
'https://www.tumblr.com/blog/view/triviallytrue',
SocialSiteId.TUMBLR,
'triviallytrue',
),
('http://worstdril.tumblr.com/', SocialSiteId.TUMBLR, 'worstdril'),
('https://deep-dark-fears.tumblr.com', SocialSiteId.TUMBLR, 'deep-dark-fears'),
# Cohost formats
(
'https://cohost.org/andrewelmore?page=0',
SocialSiteId.COHOST_PROFILE,
'andrewelmore',
),
(
'https://cohost.org/andrewelmore',
SocialSiteId.COHOST_PROFILE,
'andrewelmore',
),
# Reddit formats
(
'https://old.reddit.com/user/Harpsibored/submitted/',
SocialSiteId.REDDIT_USER,
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored/submitted',
SocialSiteId.REDDIT_USER,
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored/',
SocialSiteId.REDDIT_USER,
'Harpsibored',
),
(
'https://old.reddit.com/user/Harpsibored',
SocialSiteId.REDDIT_USER,
'Harpsibored',
),
(
'https://www.reddit.com/user/Harpsibored',
SocialSiteId.REDDIT_USER,
'Harpsibored',
),
# Ko-fi formats
('https://ko-fi.com/A627LI1/shop/', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1/shop', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1/', SocialSiteId.KO_FI, 'A627LI1'),
('https://ko-fi.com/A627LI1', SocialSiteId.KO_FI, 'A627LI1'),
# Twitter formats
('http://twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://www.twitter.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://www.x.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://x.com/dril', SocialSiteId.TWITTER, 'dril'),
('http://twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://www.twitter.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://www.x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
('http://x.com/dril/media', SocialSiteId.TWITTER, 'dril'),
# Wikidata formats
('https://wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
('https://m.wikidata.org/wiki/Q594400', SocialSiteId.WIKIDATA, 'Q594400'),
# YouTube formats
(
'https://youtube.com/WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://youtube.com/@WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://youtube.com/@WheelieYellow/',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow/',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/c/WheelieYellow',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/@WheelieYellow/featured',
SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
'WheelieYellow',
),
(
'https://www.youtube.com/channel/UCe8PM1vX4w_spGwYy7gQV4w',
SocialSiteId.YOUTUBE_CHANNEL_ID,
'UCe8PM1vX4w_spGwYy7gQV4w',
),
# GitHub
('https://github.com/love2d/love', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
('https://github.com/love2d/love/', SocialSiteId.GITHUB_REPOSITORY, 'love2d/love'),
(
'https://github.com/love2d/love/releases',
SocialSiteId.GITHUB_REPOSITORY,
'love2d/love',
),
# ArtStation
('https://toraji.artstation.com', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
('https://www.artstation.com/toraji', SocialSiteId.ARTSTATION_PAGE, 'toraji'),
(
'https://www.artstation.com/toraji/profile',
SocialSiteId.ARTSTATION_PAGE,
'toraji',
),
# Tiktok
(
'https://tiktok.com/@depthsofwikipedia',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
(
'https://www.tiktok.com/@depthsofwikipedia',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
(
'https://www.tiktok.com/@depthsofwikipedia?lang=en',
SocialSiteId.TIKTOK_USER,
'depthsofwikipedia',
),
# Instagram
(
'https://instagram.com/_richardparry_',
SocialSiteId.INSTAGRAM_PAGE,
'_richardparry_',
),
('https://instagram.com/j_kmor/', SocialSiteId.INSTAGRAM_PAGE, 'j_kmor'),
(
'https://instagram.com/cullensartbox/',
SocialSiteId.INSTAGRAM_PAGE,
'cullensartbox',
),
(
'https://www.instagram.com/timkongart/',
SocialSiteId.INSTAGRAM_PAGE,
'timkongart',
),
('https://www.instagram.com/kcn.wu/', SocialSiteId.INSTAGRAM_PAGE, 'kcn.wu'),
(
'https://www.instagram.com/itsbettyjiang',
SocialSiteId.INSTAGRAM_PAGE,
'itsbettyjiang',
),
# Facebook
(
'https://www.facebook.com/fredagscafeen.dk/',
SocialSiteId.FACEBOOK_PAGE,
'fredagscafeen.dk',
),
# Pixiv
('https://www.pixiv.net/users/14866303', SocialSiteId.PIXIV_USER_ID, '14866303'),
(
'https://www.pixiv.net/member.php?id=109710',
SocialSiteId.PIXIV_USER_ID,
'109710',
),
# Etsy
(
'https://www.etsy.com/shop/aleksiremesart',
SocialSiteId.ETSY_SHOP,
'aleksiremesart',
),
# Deviantart
(
'https://www.deviantart.com/solquiet',
SocialSiteId.DEVIANT_ART_ACCOUNT,
'solquiet',
),
('https://solquiet.deviantart.com/', SocialSiteId.DEVIANT_ART_ACCOUNT, 'solquiet'),
# Cara
('https://cara.app/simzart', SocialSiteId.CARA_PROFILE, 'simzart'),
('https://cara.app/simzart/all', SocialSiteId.CARA_PROFILE, 'simzart'),
# Mastodon
('https://idlethumbs.social/@testtest', SocialSiteId.MASTODON_PAGE, None),
('https://mastodon.example.org/testtest', SocialSiteId.MASTODON_PAGE, None),
# Feeds
('https://example.org/main.atom', SocialSiteId.RSS_FEED, None),
# Bluesky
('https://bsky.app/profile/bsky.app', SocialSiteId.BLUESKY_PROFILE, 'bsky.app'),
]
NOT_PARSABLE = [
# Twitter intents are not supported
'twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user?user_id=123',
'https://twitter.com/intent/user',
'https://twitter.com/intent',
]
@pytest.mark.parametrize(
('url', 'expected_social_site_id', 'expected_social_id'),
PARSABLE_SOCIAL_IDS_COMBINED,
)
def test_parse_social_ids(
url: str,
expected_social_site_id: SocialSiteId,
expected_social_id: str,
) -> None:
social_link: SocialLink | None = determine_social_from_url(url)
assert social_link is not None, url
assert (social_link.social_id, social_link.social_site_id) == (
expected_social_id,
expected_social_site_id,
), url
@pytest.mark.parametrize('url', NOT_PARSABLE)
def test_not_parsable(url: str) -> None:
assert determine_social_from_url(url) is None
def test_wrong_parse_type() -> None:
with pytest.raises(TypeError):
assert socials_util.to_parse_result(None)
def test_from_parse_result() -> None:
urlresult = urllib.parse.urlparse(
'https://old.reddit.com/user/Harpsibored/submitted/',
)
assert socials_util.to_parse_result(urlresult) is urlresult
def test_determine_social_from_url_internally() -> None:
with pytest.raises(TypeError):
assert socials_util.determine_social_from_url_internally(None)
def test_normalize_url():
social_link = determine_social_from_url('http://twitter.com/dril')
assert social_link is not None
assert social_link.url.geturl() == 'https://x.com/dril'