From f4d677ce671f60c596a048da92357ff12548fb90 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Thu, 3 Jul 2025 22:18:28 +0200 Subject: [PATCH] Improving header handling --- clients_protocol/__init__.py | 46 +++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/clients_protocol/__init__.py b/clients_protocol/__init__.py index 8b94f5d..049aeb9 100644 --- a/clients_protocol/__init__.py +++ b/clients_protocol/__init__.py @@ -1,5 +1,6 @@ """# Common HTTP/REST clients interface """ +import urllib.parse import abc import logging @@ -30,59 +31,60 @@ class AbstractClient(abc.ABC): def fetch_or_none( self, url: str, - params=None, **kwargs, ) -> requests.Response | None: - r = self._fetch(url, params, **kwargs) + r = self._fetch(url, **kwargs) if r.status_code == 404: return None return r - def fetch(self, url: str, params=None, **kwargs) -> requests.Response: - r = self._fetch(url, params, **kwargs) + def fetch(self, url: str, **kwargs) -> requests.Response: + r = self._fetch(url, **kwargs) + if r.status_code in {301,302,303}: + msg = f'Redirection: {r.request.method} {url} -> GET {r.headers["Location"]}' + raise Exception(msg) r.raise_for_status() return r - def _fetch(self, url: str, params=None, **kwargs) -> requests.Response: - method = 'GET' - if 'method' in kwargs: - method = kwargs['method'] - del kwargs['method'] - kwargs.setdefault('headers', {}).setdefault('Origin', url) # TODO? + def _fetch(self, url: str, **kwargs) -> requests.Response: + kwargs.setdefault('method', 'GET') + kwargs.setdefault('allow_redirects', True) + + url_parsed = urllib.parse.urlparse(url) + origin_url = url_parsed._replace(path='',params='',query='',fragment='').geturl() + + kwargs.setdefault('headers', {}).setdefault('Origin', origin_url) + kwargs.setdefault('headers', {}).setdefault('Alt-Used', url_parsed.hostname) return self.session.request( - method, - url, - params=params, - allow_redirects=True, + url=url, **kwargs, ) - def fetch_text(self, url: str, params=None, **kwargs) -> str: - return self.fetch(url, params, **kwargs).text + def fetch_text(self, url: str, **kwargs) -> str: + return self.fetch(url, **kwargs).text def fetch_lxml_soup( self, url: str, - params=None, **kwargs, ) -> None | bs4.BeautifulSoup: kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html') - text = self.fetch_text(url, params, **kwargs) + text = self.fetch_text(url, **kwargs) if text is None: return None return lxml.html.document_fromstring(text) - def fetch_soup(self, url: str, params=None, **kwargs) -> None | bs4.BeautifulSoup: + def fetch_soup(self, url: str, **kwargs) -> None | bs4.BeautifulSoup: kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html') - text = self.fetch_text(url, params, **kwargs) + text = self.fetch_text(url, **kwargs) if text is None: return None return bs4.BeautifulSoup(text, 'html.parser') - def fetch_json(self, url: str, params=None, **kwargs) -> None | dict[str, Any]: + def fetch_json(self, url: str, **kwargs) -> None | dict[str, Any]: kwargs.setdefault('headers', {}).setdefault('Accept', 'application/json') - response = self.fetch(url, params, **kwargs) + response = self.fetch(url=url, **kwargs) loaded_json = response.json() if API_ERROR_KEY in loaded_json: msg = f'Error from endpoint: {loaded_json[API_ERROR_KEY]}'