"""# Socials-util.

Python library for parsing and processing URLs of Social Media Sites.

Used by one-page-internet.
"""

import dataclasses
import re
import urllib.parse

import aenum
import enforce_typing

from socials_util._version import __version__

__all__ = [
    '__version__',
    'SocialSiteId',
    'SocialLink',
    'WikidataInfo',
    'AGGERAGOR_SOCIALS',
    'determine_social_from_url',
]


class SocialSiteId(aenum.Enum):
    """The great social website enum."""

    # Reddit-like
    REDDIT = 1  # Should have been named REDDIT_SUBREDDIT
    REDDIT_USER = 22
    REDDIT_SUBREDDIT = REDDIT

    # Microblogging
    TWITTER = 2
    MASTODON_PAGE = 10
    INSTAGRAM_PAGE = 11
    BLUESKY_PROFILE = 12361
    BLUESKY_DID = 12409
    THREADS_USERNAME = 11892
    COHOST_PROFILE = 117203288
    PLURK = 32111

    # Blogs and feeds
    RSS_FEED = 3
    PATREON_PAGE = 12
    TUMBLR = 9
    SUBSTACK = 18
    MEDIUM_BLOG = 3899
    GOOGLE_BLOGGER_PAGE = 171186

    # Video
    TWITCH = 6

    # Artists portfolio
    ARTSTATION_PAGE = 13
    PIXIV_USER_ID = 5435
    PIXIV_USER_NICKNAME = 31
    INPRNT_PAGE = 14
    BEHANCE_PAGE = 21
    NEWGROUNDS_PAGE = 28
    ARTSY_ARTIST = 2042
    ARTNET_ARTIST = 3782
    DEVIANT_ART_ACCOUNT = 7737
    CARA_PROFILE = 35

    # Socials aggregators
    WIKIDATA = 7
    CARRD_PAGE = 24
    LINK_COLLECTION_PAGE = 29
    DANBOORU_ARTIST = 30

    # Misc
    PAGE_WATCH = 4
    LINKTREE_PAGE = 5
    SONGKICK_ARTIST = 8
    FACEBOOK_PAGE = 15
    EMAIL = 16
    JSON_LD = 17  # Similar to PAGE_WATCH, but focused on embedded microdata
    ETSY_SHOP = 19
    KO_FI = 20
    TIKTOK_USER = 7085
    YOUTUBE_CHANNEL_HANDLE = 26
    YOUTUBE_CHANNEL_ID = 2397
    VIMEO_CHANNEL = 27
    BANDCAMP_PROFILE = 3283
    ITCH_IO_DEVELOPER = 8176
    SOUNDCLOUD_ARTIST = 3040
    IGDB_GAME_ID = 5794
    STEAM_APPLICATION_ID = 1733
    GITHUB_REPOSITORY = 364
    LINKEDIN_PERSONAL_PROFILE = 6634

    # Browser bookmarks
    FIREFOX_PROFILE_BOOKMARKS = 33
    FALKON_PROFILE_BOOKMARKS = 34

    def wikidata_property(self, client):
        return client.get(WIKIDATA_PROPERTIES[self])

    def is_aggregator(self):
        return self in AGGERAGOR_SOCIALS


AGGERAGOR_SOCIALS = {
    SocialSiteId.LINKTREE_PAGE,
    SocialSiteId.WIKIDATA,
    SocialSiteId.CARRD_PAGE,
    SocialSiteId.LINK_COLLECTION_PAGE,
    SocialSiteId.DANBOORU_ARTIST,
    SocialSiteId.IGDB_GAME_ID,
}


@enforce_typing.enforce_types
@dataclasses.dataclass(frozen=True)
class SocialLink:
    url: urllib.parse.ParseResult
    social_site_id: SocialSiteId
    social_id: str | None


@enforce_typing.enforce_types
@dataclasses.dataclass(frozen=True)
class WikidataInfo:
    property_id: int | None
    issuer_id: int | None
    id_version_of: SocialSiteId | None = None
    nickname_version_of: SocialSiteId | None = None


WIKIDATA_PROPERTIES: dict[SocialSiteId | int, WikidataInfo] = {
    SocialSiteId.EMAIL: WikidataInfo(968, None),
    SocialSiteId.RSS_FEED: WikidataInfo(1079, None),
    SocialSiteId.FACEBOOK_PAGE: WikidataInfo(2013, None),
    SocialSiteId.INSTAGRAM_PAGE: WikidataInfo(2003, None),
    SocialSiteId.LINKTREE_PAGE: WikidataInfo(11079, None),
    SocialSiteId.REDDIT_SUBREDDIT: WikidataInfo(3984, None),
    SocialSiteId.REDDIT_USER: WikidataInfo(4265, None),
    SocialSiteId.RSS_FEED: WikidataInfo(1019, None),
    SocialSiteId.SONGKICK_ARTIST: WikidataInfo(3478, None),
    SocialSiteId.TWITCH: WikidataInfo(5797, None),
    SocialSiteId.TWITTER: WikidataInfo(2002, None),
    SocialSiteId.WIKIDATA: WikidataInfo(None, 2013),
    SocialSiteId.TUMBLR: WikidataInfo(3943, None),
    SocialSiteId.TIKTOK_USER: WikidataInfo(7085, None),
    SocialSiteId.PIXIV_USER_ID: WikidataInfo(
        5435,
        306956,
        id_version_of=SocialSiteId.PIXIV_USER_NICKNAME,
    ),
    SocialSiteId.PIXIV_USER_NICKNAME: WikidataInfo(
        None,
        306956,
        nickname_version_of=SocialSiteId.PIXIV_USER_ID,
    ),
    SocialSiteId.MASTODON_PAGE: WikidataInfo(4033, None),
    SocialSiteId.PATREON_PAGE: WikidataInfo(4175, 15861362),
    SocialSiteId.ARTSTATION_PAGE: WikidataInfo(None, 65551500),
    # SocialSiteId.INPRNT_PAGE: WikidataInfo(None, None),
    SocialSiteId.CARRD_PAGE: WikidataInfo(None, 106036503),
    SocialSiteId.YOUTUBE_CHANNEL_HANDLE: WikidataInfo(
        11245,
        866,
        nickname_version_of=SocialSiteId.YOUTUBE_CHANNEL_ID,
    ),
    SocialSiteId.YOUTUBE_CHANNEL_ID: WikidataInfo(
        2397,
        866,
        id_version_of=SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
    ),
    SocialSiteId.VIMEO_CHANNEL: WikidataInfo(4015, 156376),
    SocialSiteId.NEWGROUNDS_PAGE: WikidataInfo(None, 263655),
    SocialSiteId.ARTSY_ARTIST: WikidataInfo(2042, 4796642),
    SocialSiteId.ARTNET_ARTIST: WikidataInfo(3782, 266566),
    SocialSiteId.DEVIANT_ART_ACCOUNT: WikidataInfo(7737, None),
    SocialSiteId.DANBOORU_ARTIST: WikidataInfo(None, 64514853),
    SocialSiteId.BANDCAMP_PROFILE: WikidataInfo(3283, 545966),
    SocialSiteId.BLUESKY_PROFILE: WikidataInfo(12361, 78194383),
    SocialSiteId.BLUESKY_DID: WikidataInfo(12409, 78194383),
    SocialSiteId.THREADS_USERNAME: WikidataInfo(11892, 120281745),
    SocialSiteId.ITCH_IO_DEVELOPER: WikidataInfo(8176, 22905933),
    SocialSiteId.COHOST_PROFILE: WikidataInfo(None, 117203288),
    SocialSiteId.SOUNDCLOUD_ARTIST: WikidataInfo(3040, None),
    SocialSiteId.IGDB_GAME_ID: WikidataInfo(5794, None),
    SocialSiteId.STEAM_APPLICATION_ID: WikidataInfo(1733, None),
    SocialSiteId.GITHUB_REPOSITORY: WikidataInfo(None, 364),
    SocialSiteId.LINKEDIN_PERSONAL_PROFILE: WikidataInfo(6634, None),
    SocialSiteId.MEDIUM_BLOG: WikidataInfo(3899, None),
    SocialSiteId.SUBSTACK: WikidataInfo(12007, None),
    SocialSiteId.INPRNT_PAGE: WikidataInfo(None, None),
    SocialSiteId.ETSY_SHOP: WikidataInfo(None, 1353939),
    SocialSiteId.KO_FI: WikidataInfo(None, 77949925),
    SocialSiteId.BEHANCE_PAGE: WikidataInfo(None, 4880667),
    SocialSiteId.PLURK: WikidataInfo(None, 32111),
    SocialSiteId.GOOGLE_BLOGGER_PAGE: WikidataInfo(None, 171186),
    SocialSiteId.CARA_PROFILE: WikidataInfo(None, None),
    # Weird internal
    SocialSiteId.LINK_COLLECTION_PAGE: WikidataInfo(None, None),
    SocialSiteId.PAGE_WATCH: WikidataInfo(None, None),
    SocialSiteId.JSON_LD: WikidataInfo(None, None),
    SocialSiteId.FIREFOX_PROFILE_BOOKMARKS: WikidataInfo(None, None),
    SocialSiteId.FALKON_PROFILE_BOOKMARKS: WikidataInfo(None, None),
}


RE_ID_AT = r'@([^\s/]+)'
RE_ID_NOAT = r'([^\s/]+)'
RE_ID = r'@?([^\s/]+)'
RE_DUAL_ID = r'@?([^\s/]+/[^\s/]+)'
RE_ANY_SUBPATH = r'(|\/|\/\S*)$'

SPECIAL_REGEX_LITERALS = frozenset(
    {RE_ID, RE_DUAL_ID, RE_ANY_SUBPATH, RE_ID_AT, RE_ID_NOAT},
)
REGEX_LITERALS_TO_FORMATTER = {
    RE_ID: '{id}',
    RE_ID_AT: '@{id}',
    RE_ID_NOAT: '{id}',
    RE_DUAL_ID: '{id}',
    RE_ANY_SUBPATH: '',
}
DOES_NOT_NEED_AUTO_SLASH = frozenset({RE_ANY_SUBPATH})


@enforce_typing.enforce_types
@dataclasses.dataclass(frozen=True)
class SocialPathFormat:
    regex: str
    formatter: str


def social_path_format_adv(main_domain: str, *path: str) -> SocialPathFormat:
    if main_domain.startswith('www.'):
        msg = f'Redundant www: {main_domain}'
        raise ValueError(msg)
    regex_builder: list[str] = [
        r'^',
        r'(?:https?:\/\/)?',
        r'(?:www\.|m\.|mobile\.)?',
        re.escape(main_domain),
    ]
    formatter_builder = ['https://', main_domain]

    for p in path:
        if p not in DOES_NOT_NEED_AUTO_SLASH:
            regex_builder.append(r'\/')
            formatter_builder.append('/')
        regex_builder.append(
            p if p in SPECIAL_REGEX_LITERALS else re.escape(p),
        )
        formatter_builder.append(
            REGEX_LITERALS_TO_FORMATTER.get(p, p),
        )
        del p
    if path[-1] not in DOES_NOT_NEED_AUTO_SLASH:
        regex_builder.append(r'\/?$')
        formatter_builder.append('/')

    return SocialPathFormat(
        regex=''.join(regex_builder),
        formatter=''.join(formatter_builder),
    )


def social_path_format(main_domain: str) -> SocialPathFormat:
    return social_path_format_adv(main_domain, RE_ID)


def re_social_subdomain(main_domain: str) -> SocialPathFormat:
    return SocialPathFormat(
        regex=r'^(?:https?:\/\/)?(?:www\.)?([\w_-]+)\.'
        + re.escape(main_domain)
        + r'(\/.*)?$',
        formatter='https://{id}.' + main_domain,
    )


def re_social_path(main_domain: str) -> str:
    return re_social_path_adv(main_domain, RE_ID)


def re_social_path_adv(main_domain: str, *path: str) -> str:
    return social_path_format_adv(main_domain, *path).regex


MAILTO_URL = r'^mailto:(?:[\w._.]+@[\w._.]+)$'

REDDIT_SUBREDDIT_URL = r'^(?:https?:\/\/)?(?:old\.)?reddit\.com\/r\/([\w-]+)\/?$'
REDDIT_USER_URL = (
    r'^(?:https?:\/\/)?(?:old\.|www\.)?reddit\.com\/user\/([\w-]+)(?:|\/submitted)\/?$'
)

SONGKICK_ARTIST_URL = (
    r'^(?:https?:\/\/)?(?:www\.)?songkick\.com\/artists\/(\d+)([\w-]*)\/?$'
)

PIXIV_USER_ID_URL = r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/users/(\d+)\/?$'
PIXIV_USER_ID_URL_2 = (
    r'^(?:https?:\/\/)?(?:www\.)?pixiv\.net(?:\/en)?\/member\.php\/?[?]id=(\d+)$'
)

URL_FORMATS: list[tuple[object, SocialPathFormat]] = [
    # Twitter
    (SocialSiteId.TWITTER, social_path_format_adv('x.com', RE_ID, RE_ANY_SUBPATH)),
    (
        SocialSiteId.TWITTER,
        social_path_format_adv('twitter.com', RE_ID, RE_ANY_SUBPATH),
    ),
    # Linktr.ee
    (SocialSiteId.LINKTREE_PAGE, social_path_format('linktr.ee')),
    # Twitch.tv
    (SocialSiteId.TWITCH, social_path_format('twitch.tv')),
    # Wikidata
    (SocialSiteId.WIKIDATA, social_path_format_adv('wikidata.org', 'wiki', RE_ID)),
    # Tumblr
    (SocialSiteId.TUMBLR, social_path_format('tumblr.com')),
    (SocialSiteId.TUMBLR, re_social_subdomain('tumblr.com')),
    (SocialSiteId.TUMBLR, social_path_format('tumblr.com/blog')),
    (SocialSiteId.TUMBLR, social_path_format('tumblr.com/blog/view')),
    # Instagram
    (SocialSiteId.INSTAGRAM_PAGE, social_path_format('instagram.com')),
    # Patreon
    (
        SocialSiteId.PATREON_PAGE,
        social_path_format_adv('patreon.com', RE_ID, RE_ANY_SUBPATH),
    ),
    # Artstation
    (
        SocialSiteId.ARTSTATION_PAGE,
        social_path_format_adv('artstation.com', RE_ID, RE_ANY_SUBPATH),
    ),
    (SocialSiteId.ARTSTATION_PAGE, re_social_subdomain('artstation.com')),
    # Inprnt
    (SocialSiteId.INPRNT_PAGE, social_path_format_adv('inprnt.com', 'gallery', RE_ID)),
    # Facebook
    (SocialSiteId.FACEBOOK_PAGE, social_path_format('facebook.com')),
    # Substack
    (SocialSiteId.SUBSTACK, re_social_subdomain('substack.com')),
    # Etsy shop
    (SocialSiteId.ETSY_SHOP, social_path_format_adv('etsy.com', 'shop', RE_ID)),
    # Behance
    (SocialSiteId.BEHANCE_PAGE, social_path_format('behance.net')),
    # Tiktok
    (SocialSiteId.TIKTOK_USER, social_path_format('tiktok.com')),
    # Pixiv
    (
        SocialSiteId.PIXIV_USER_NICKNAME,
        social_path_format_adv('pixiv.net', 'stacc', RE_ID),
    ),
    (SocialSiteId.PIXIV_USER_NICKNAME, re_social_subdomain('fanbox.cc')),
    (
        SocialSiteId.PIXIV_USER_NICKNAME,
        social_path_format_adv('sketch.pixiv.net', RE_ID),
    ),
    # Carrd
    (SocialSiteId.CARRD_PAGE, re_social_subdomain('carrd.co')),
    # Youtube
    (
        SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
        social_path_format_adv('youtube.com', RE_ID_AT, RE_ANY_SUBPATH),
    ),
    (
        SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
        social_path_format_adv('youtube.com', 'c', RE_ID),
    ),
    (
        SocialSiteId.YOUTUBE_CHANNEL_ID,
        social_path_format_adv('youtube.com', 'channel', RE_ID),
    ),
    (
        SocialSiteId.YOUTUBE_CHANNEL_HANDLE,
        social_path_format_adv('youtube.com', RE_ID_NOAT, RE_ANY_SUBPATH),
    ),
    # Vimeo
    (SocialSiteId.VIMEO_CHANNEL, social_path_format_adv('vimeo.com', RE_ID)),
    # Newgrounds
    (SocialSiteId.NEWGROUNDS_PAGE, re_social_subdomain('newgrounds.com')),
    # Artsy
    (
        SocialSiteId.ARTSY_ARTIST,
        social_path_format_adv('artsy.net', 'artist', RE_ID, RE_ANY_SUBPATH),
    ),
    # Artnet
    (
        SocialSiteId.ARTNET_ARTIST,
        social_path_format_adv('artnet.com', 'artists', RE_ID, RE_ANY_SUBPATH),
    ),
    # Deviant art
    (SocialSiteId.DEVIANT_ART_ACCOUNT, social_path_format_adv('deviantart.com', RE_ID)),
    (SocialSiteId.DEVIANT_ART_ACCOUNT, re_social_subdomain('deviantart.com')),
    # Danbooru
    (
        SocialSiteId.DANBOORU_ARTIST,
        social_path_format_adv('danbooru.donmai.us', 'artists', RE_ID),
    ),
    # Bandcamp
    (SocialSiteId.BANDCAMP_PROFILE, re_social_subdomain('bandcamp.com')),
    # Bluesky
    (
        SocialSiteId.BLUESKY_PROFILE,
        social_path_format_adv('bsky.app', 'profile', RE_ID),
    ),
    # Medium
    (
        SocialSiteId.MEDIUM_BLOG,
        social_path_format_adv('medium.com', RE_ID),
    ),
    (
        SocialSiteId.MEDIUM_BLOG,
        re_social_subdomain('medium.com'),
    ),
    # Ko-fi
    (
        SocialSiteId.KO_FI,
        social_path_format_adv('ko-fi.com', RE_ID),
    ),
    (
        SocialSiteId.KO_FI,
        social_path_format_adv('ko-fi.com', RE_ID, 'shop'),
    ),
    # Threads
    (
        SocialSiteId.THREADS_USERNAME,
        social_path_format_adv('threads.net', RE_ID),
    ),
    # Itch.io
    (
        SocialSiteId.ITCH_IO_DEVELOPER,
        re_social_subdomain('itch.io'),
    ),
    # Cohost
    (
        SocialSiteId.COHOST_PROFILE,
        social_path_format_adv('cohost.org', RE_ID),
    ),
    # Soundcloud
    (
        SocialSiteId.SOUNDCLOUD_ARTIST,
        social_path_format_adv('soundcloud.com', RE_ID),
    ),
    # IGDB
    (
        SocialSiteId.IGDB_GAME_ID,
        social_path_format_adv('igdb.com', 'games', RE_ID),
    ),
    # Steam game
    (
        SocialSiteId.STEAM_APPLICATION_ID,
        social_path_format_adv('store.steampowered.com', 'app', RE_ID, RE_ANY_SUBPATH),
    ),
    # Github
    (
        SocialSiteId.GITHUB_REPOSITORY,
        social_path_format_adv('github.com', RE_DUAL_ID, RE_ANY_SUBPATH),
    ),
    # Plurk
    (
        SocialSiteId.PLURK,
        social_path_format_adv('plurk.com', RE_ID),
    ),
    # Linked in
    (
        SocialSiteId.LINKEDIN_PERSONAL_PROFILE,
        social_path_format_adv('linkedin.com', 'in', RE_ID),
    ),
    # Google Blogger
    (
        SocialSiteId.GOOGLE_BLOGGER_PAGE,
        re_social_subdomain('blogspot.com'),
    ),
    # Cara
    (
        SocialSiteId.CARA_PROFILE,
        social_path_format_adv('cara.app', RE_ID, RE_ANY_SUBPATH),
    ),
]

REGEXES: list[tuple[str, object]] = [
    # Reddit
    (REDDIT_SUBREDDIT_URL, SocialSiteId.REDDIT_SUBREDDIT),
    (REDDIT_USER_URL, SocialSiteId.REDDIT_USER),
    # Songkick
    (SONGKICK_ARTIST_URL, SocialSiteId.SONGKICK_ARTIST),
    # Pixiv
    (PIXIV_USER_ID_URL, SocialSiteId.PIXIV_USER_ID),
    (PIXIV_USER_ID_URL_2, SocialSiteId.PIXIV_USER_ID),
    # Email
    (MAILTO_URL, SocialSiteId.EMAIL),
] + [(fmt.regex, social_site_id) for (social_site_id, fmt) in URL_FORMATS]

WELL_KNOWN_MASTODON_INSTANCES: frozenset[str] = frozenset(
    {
        # Includes all servers with 50 000+ users as of 6 july 2023.
        # based on https://mastodonservers.net/servers/top
        'mastodon.social',
        #'pawoo.net',
        'baraag.net',
        'mstdn.jp',
        'mastodon.cloud',
        'mstdn.social',
        'mastodon.online',
        'mas.to',
        'mastodon.world',
        'mastodon.lol',
        'mastodon.sdf.org',
        'c.im',
        'mastodon.uno',
        'mastodonapp.uk',
        'fosstodon.org',
        'idlethumbs.social',
    },
)

DISALLOWED_IDENTIFIERS: frozenset[str] = frozenset({'www', 'intent', 'user'})


def determine_social_from_url_internally(
    url: str,
) -> tuple[SocialSiteId | None, str | None]:
    if not isinstance(url, str):
        msg = f'Url must be {str}'
        raise TypeError(msg)

    # Regexes
    for social_site_url_regex, social_site_id in REGEXES:
        if m := re.fullmatch(social_site_url_regex, url, re.IGNORECASE):
            groups = m.groups()
            username_or_id = groups[0] if len(groups) > 0 else None
            if username_or_id in DISALLOWED_IDENTIFIERS:
                continue
            return (social_site_id, username_or_id)
        del social_site_url_regex, social_site_id, m

    # Mastodon
    for mastodon_hostname in WELL_KNOWN_MASTODON_INSTANCES:
        if url.startswith('https://' + mastodon_hostname):
            return (SocialSiteId.MASTODON_PAGE, None)
    if 'mastodon' in url:
        return (SocialSiteId.MASTODON_PAGE, None)

    # Feed (?)
    if 'feed' in url or 'xml' in url or 'rss' in url or 'atom' in url:
        return (SocialSiteId.RSS_FEED, None)

    return (None, None)


def to_parse_result(url: str | urllib.parse.ParseResult) -> urllib.parse.ParseResult:
    if isinstance(url, str):
        return urllib.parse.urlparse(url)
    if isinstance(url, urllib.parse.ParseResult):
        return url

    # Throw error
    msg = f'Expected {urllib.parse.ParseResult} or {str}'
    raise TypeError(msg)


def to_url(
    social_site_id: SocialSiteId,
    social_id: str,
) -> urllib.parse.ParseResult | None:
    for ssi, fmt in URL_FORMATS:
        if ssi == social_site_id:
            return to_parse_result(fmt.formatter.format(id=social_id))
    return None


def determine_social_from_url(
    url_not_normalized: str | urllib.parse.ParseResult,
) -> SocialLink | None:
    url = to_parse_result(url_not_normalized)
    (social_site_id, social_id) = determine_social_from_url_internally(
        url._replace(query='', fragment='').geturl(),
    )
    if not social_site_id:
        (social_site_id, social_id) = determine_social_from_url_internally(
            url._replace(fragment='').geturl(),
        )

    if not social_site_id:
        return None

    # Normalize url if possible
    if social_id is not None:
        url = to_url(social_site_id, social_id) or url

    return SocialLink(url, social_site_id, social_id)