93 lines
2.7 KiB
Python
93 lines
2.7 KiB
Python
"""# Common HTTP/REST clients interface
|
|
"""
|
|
import urllib.parse
|
|
|
|
import abc
|
|
import logging
|
|
from collections.abc import Sequence
|
|
from typing import Any
|
|
|
|
import bs4
|
|
import lxml.html
|
|
import requests
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
API_ERROR_KEY = 'error'
|
|
|
|
|
|
class ApiError(RuntimeError):
|
|
pass
|
|
|
|
|
|
class AbstractClient(abc.ABC):
|
|
SECRETS: Sequence[str] = ()
|
|
|
|
def __init__(self, session: requests.Session):
|
|
assert isinstance(session, requests.Session)
|
|
self.session = session
|
|
|
|
def fetch_or_none(
|
|
self,
|
|
url: str,
|
|
**kwargs,
|
|
) -> requests.Response | None:
|
|
r = self._fetch(url, **kwargs)
|
|
if r.status_code == 404:
|
|
return None
|
|
return r
|
|
|
|
def fetch(self, url: str, **kwargs) -> requests.Response:
|
|
r = self._fetch(url, **kwargs)
|
|
if r.status_code in {301,302,303}:
|
|
msg = f'Redirection: {r.request.method} {url} -> GET {r.headers["Location"]}'
|
|
raise Exception(msg)
|
|
r.raise_for_status()
|
|
return r
|
|
|
|
def _fetch(self, url: str, **kwargs) -> requests.Response:
|
|
kwargs.setdefault('method', 'GET')
|
|
kwargs.setdefault('allow_redirects', True)
|
|
|
|
url_parsed = urllib.parse.urlparse(url)
|
|
origin_url = url_parsed._replace(path='',params='',query='',fragment='').geturl()
|
|
|
|
kwargs.setdefault('headers', {}).setdefault('Origin', origin_url)
|
|
kwargs.setdefault('headers', {}).setdefault('Alt-Used', url_parsed.hostname)
|
|
return self.session.request(
|
|
url=url,
|
|
**kwargs,
|
|
)
|
|
|
|
def fetch_text(self, url: str, **kwargs) -> str:
|
|
return self.fetch(url, **kwargs).text
|
|
|
|
def fetch_lxml_soup(
|
|
self,
|
|
url: str,
|
|
**kwargs,
|
|
) -> None | bs4.BeautifulSoup:
|
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
|
text = self.fetch_text(url, **kwargs)
|
|
if text is None:
|
|
return None
|
|
return lxml.html.document_fromstring(text)
|
|
|
|
def fetch_soup(self, url: str, **kwargs) -> None | bs4.BeautifulSoup:
|
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
|
text = self.fetch_text(url, **kwargs)
|
|
if text is None:
|
|
return None
|
|
return bs4.BeautifulSoup(text, 'html.parser')
|
|
|
|
def fetch_json(self, url: str, **kwargs) -> None | dict[str, Any]:
|
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'application/json')
|
|
|
|
response = self.fetch(url=url, **kwargs)
|
|
loaded_json = response.json()
|
|
if API_ERROR_KEY in loaded_json:
|
|
msg = f'Error from endpoint: {loaded_json[API_ERROR_KEY]}'
|
|
raise ApiError(msg)
|
|
return loaded_json
|