Improving header handling
This commit is contained in:
parent
7a7cda6071
commit
f4d677ce67
|
@ -1,5 +1,6 @@
|
||||||
"""# Common HTTP/REST clients interface
|
"""# Common HTTP/REST clients interface
|
||||||
"""
|
"""
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import logging
|
import logging
|
||||||
|
@ -30,59 +31,60 @@ class AbstractClient(abc.ABC):
|
||||||
def fetch_or_none(
|
def fetch_or_none(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
params=None,
|
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> requests.Response | None:
|
) -> requests.Response | None:
|
||||||
r = self._fetch(url, params, **kwargs)
|
r = self._fetch(url, **kwargs)
|
||||||
if r.status_code == 404:
|
if r.status_code == 404:
|
||||||
return None
|
return None
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def fetch(self, url: str, params=None, **kwargs) -> requests.Response:
|
def fetch(self, url: str, **kwargs) -> requests.Response:
|
||||||
r = self._fetch(url, params, **kwargs)
|
r = self._fetch(url, **kwargs)
|
||||||
|
if r.status_code in {301,302,303}:
|
||||||
|
msg = f'Redirection: {r.request.method} {url} -> GET {r.headers["Location"]}'
|
||||||
|
raise Exception(msg)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def _fetch(self, url: str, params=None, **kwargs) -> requests.Response:
|
def _fetch(self, url: str, **kwargs) -> requests.Response:
|
||||||
method = 'GET'
|
kwargs.setdefault('method', 'GET')
|
||||||
if 'method' in kwargs:
|
kwargs.setdefault('allow_redirects', True)
|
||||||
method = kwargs['method']
|
|
||||||
del kwargs['method']
|
url_parsed = urllib.parse.urlparse(url)
|
||||||
kwargs.setdefault('headers', {}).setdefault('Origin', url) # TODO?
|
origin_url = url_parsed._replace(path='',params='',query='',fragment='').geturl()
|
||||||
|
|
||||||
|
kwargs.setdefault('headers', {}).setdefault('Origin', origin_url)
|
||||||
|
kwargs.setdefault('headers', {}).setdefault('Alt-Used', url_parsed.hostname)
|
||||||
return self.session.request(
|
return self.session.request(
|
||||||
method,
|
url=url,
|
||||||
url,
|
|
||||||
params=params,
|
|
||||||
allow_redirects=True,
|
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def fetch_text(self, url: str, params=None, **kwargs) -> str:
|
def fetch_text(self, url: str, **kwargs) -> str:
|
||||||
return self.fetch(url, params, **kwargs).text
|
return self.fetch(url, **kwargs).text
|
||||||
|
|
||||||
def fetch_lxml_soup(
|
def fetch_lxml_soup(
|
||||||
self,
|
self,
|
||||||
url: str,
|
url: str,
|
||||||
params=None,
|
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> None | bs4.BeautifulSoup:
|
) -> None | bs4.BeautifulSoup:
|
||||||
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
||||||
text = self.fetch_text(url, params, **kwargs)
|
text = self.fetch_text(url, **kwargs)
|
||||||
if text is None:
|
if text is None:
|
||||||
return None
|
return None
|
||||||
return lxml.html.document_fromstring(text)
|
return lxml.html.document_fromstring(text)
|
||||||
|
|
||||||
def fetch_soup(self, url: str, params=None, **kwargs) -> None | bs4.BeautifulSoup:
|
def fetch_soup(self, url: str, **kwargs) -> None | bs4.BeautifulSoup:
|
||||||
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'text/html')
|
||||||
text = self.fetch_text(url, params, **kwargs)
|
text = self.fetch_text(url, **kwargs)
|
||||||
if text is None:
|
if text is None:
|
||||||
return None
|
return None
|
||||||
return bs4.BeautifulSoup(text, 'html.parser')
|
return bs4.BeautifulSoup(text, 'html.parser')
|
||||||
|
|
||||||
def fetch_json(self, url: str, params=None, **kwargs) -> None | dict[str, Any]:
|
def fetch_json(self, url: str, **kwargs) -> None | dict[str, Any]:
|
||||||
kwargs.setdefault('headers', {}).setdefault('Accept', 'application/json')
|
kwargs.setdefault('headers', {}).setdefault('Accept', 'application/json')
|
||||||
|
|
||||||
response = self.fetch(url, params, **kwargs)
|
response = self.fetch(url=url, **kwargs)
|
||||||
loaded_json = response.json()
|
loaded_json = response.json()
|
||||||
if API_ERROR_KEY in loaded_json:
|
if API_ERROR_KEY in loaded_json:
|
||||||
msg = f'Error from endpoint: {loaded_json[API_ERROR_KEY]}'
|
msg = f'Error from endpoint: {loaded_json[API_ERROR_KEY]}'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user