api_provider.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. import asyncio
  2. import re
  3. from http import HTTPStatus
  4. from io import BytesIO
  5. from typing import Any
  6. from typing import Awaitable
  7. from typing import Callable
  8. from typing import Dict
  9. from typing import Optional
  10. from urllib.parse import quote
  11. from urllib.parse import urlencode
  12. from urllib.parse import urljoin
  13. import aiohttp
  14. from aiohttp import ClientResponse
  15. from aiohttp import ClientSession
  16. from pydantic import AnyHttpUrl
  17. from pydantic import field_validator
  18. from clean_python import Conflict
  19. from clean_python import Json
  20. from clean_python import ValueObject
  21. from .exceptions import ApiException
  22. from .response import Response
  23. __all__ = ["ApiProvider", "FileFormPost"]
  24. RETRY_STATUSES = frozenset({413, 429, 503}) # like in urllib3
  25. def is_success(status: HTTPStatus) -> bool:
  26. """Returns True on 2xx status"""
  27. return (int(status) // 100) == 2
  28. def check_exception(status: HTTPStatus, body: Json) -> None:
  29. if status == HTTPStatus.CONFLICT:
  30. raise Conflict(body.get("message", str(body)))
  31. elif not is_success(status):
  32. raise ApiException(body, status=status)
  33. JSON_CONTENT_TYPE_REGEX = re.compile(r"^application\/[^+]*[+]?(json);?.*$")
  34. def is_json_content_type(content_type: Optional[str]) -> bool:
  35. if not content_type:
  36. return False
  37. return bool(JSON_CONTENT_TYPE_REGEX.match(content_type))
  38. def join(url: str, path: str, trailing_slash: bool = False) -> str:
  39. """Results in a full url without trailing slash"""
  40. assert url.endswith("/")
  41. assert not path.startswith("/")
  42. result = urljoin(url, path)
  43. if trailing_slash and not result.endswith("/"):
  44. result = result + "/"
  45. elif not trailing_slash and result.endswith("/"):
  46. result = result[:-1]
  47. return result
  48. def add_query_params(url: str, params: Optional[Json]) -> str:
  49. if params is None:
  50. return url
  51. return url + "?" + urlencode(params, doseq=True)
  52. class FileFormPost(ValueObject):
  53. file_name: str
  54. file: Any # typing of BinaryIO / BytesIO is hard!
  55. field_name: str = "file"
  56. content_type: str = "application/octet-stream"
  57. @field_validator("file")
  58. @classmethod
  59. def validate_file(cls, v):
  60. if isinstance(v, bytes):
  61. return BytesIO(v)
  62. assert hasattr(v, "read") # poor-mans BinaryIO validation
  63. return v
  64. class ApiProvider:
  65. """Basic JSON API provider with retry policy and bearer tokens.
  66. The default retry policy has 3 retries with 1, 2, 4 second intervals.
  67. Args:
  68. url: The url of the API (with trailing slash)
  69. headers_factory: Coroutine that returns headers (for e.g. authorization)
  70. retries: Total number of retries per request
  71. backoff_factor: Multiplier for retry delay times (1, 2, 4, ...)
  72. trailing_slash: Wether to automatically add or remove trailing slashes.
  73. """
  74. def __init__(
  75. self,
  76. url: AnyHttpUrl,
  77. headers_factory: Optional[Callable[[], Awaitable[Dict[str, str]]]] = None,
  78. retries: int = 3,
  79. backoff_factor: float = 1.0,
  80. trailing_slash: bool = False,
  81. ):
  82. self._url = str(url)
  83. if not self._url.endswith("/"):
  84. self._url += "/"
  85. self._headers_factory = headers_factory
  86. assert retries > 0
  87. self._retries = retries
  88. self._backoff_factor = backoff_factor
  89. self._trailing_slash = trailing_slash
  90. @property
  91. def _session(self) -> ClientSession:
  92. # There seems to be an issue if the ClientSession is instantiated before
  93. # the event loop runs. So we do that delayed in a property. Use this property
  94. # in a context manager.
  95. # TODO It is more efficient to reuse the connection / connection pools. One idea
  96. # is to expose .session as a context manager (like with the SQLProvider.transaction)
  97. return ClientSession()
  98. async def _request_with_retry(
  99. self,
  100. method: str,
  101. path: str,
  102. params: Optional[Json],
  103. json: Optional[Json],
  104. fields: Optional[Json],
  105. file: Optional[FileFormPost],
  106. headers: Optional[Dict[str, str]],
  107. timeout: float,
  108. ) -> ClientResponse:
  109. if file is not None:
  110. raise NotImplementedError("ApiProvider doesn't yet support file uploads")
  111. request_kwargs = {
  112. "method": method,
  113. "url": add_query_params(
  114. join(self._url, quote(path), self._trailing_slash), params
  115. ),
  116. "timeout": timeout,
  117. "json": json,
  118. "data": fields,
  119. }
  120. actual_headers = {}
  121. if self._headers_factory is not None:
  122. actual_headers.update(await self._headers_factory())
  123. if headers:
  124. actual_headers.update(headers)
  125. for attempt in range(self._retries):
  126. if attempt > 0:
  127. backoff = self._backoff_factor * 2 ** (attempt - 1)
  128. await asyncio.sleep(backoff)
  129. try:
  130. async with self._session as session:
  131. response = await session.request(
  132. headers=actual_headers, **request_kwargs
  133. )
  134. await response.read()
  135. except (aiohttp.ClientError, asyncio.exceptions.TimeoutError):
  136. if attempt == self._retries - 1:
  137. raise # propagate ClientError in case no retries left
  138. else:
  139. if response.status not in RETRY_STATUSES:
  140. return response # on all non-retry statuses: return response
  141. return response # retries exceeded; return the (possibly error) response
  142. async def request(
  143. self,
  144. method: str,
  145. path: str,
  146. params: Optional[Json] = None,
  147. json: Optional[Json] = None,
  148. fields: Optional[Json] = None,
  149. file: Optional[FileFormPost] = None,
  150. headers: Optional[Dict[str, str]] = None,
  151. timeout: float = 5.0,
  152. ) -> Optional[Json]:
  153. response = await self._request_with_retry(
  154. method, path, params, json, fields, file, headers, timeout
  155. )
  156. status = HTTPStatus(response.status)
  157. content_type = response.headers.get("Content-Type")
  158. if status is HTTPStatus.NO_CONTENT:
  159. return None
  160. if not is_json_content_type(content_type):
  161. raise ApiException(
  162. f"Unexpected content type '{content_type}'", status=status
  163. )
  164. body = await response.json()
  165. check_exception(status, body)
  166. return body
  167. async def request_raw(
  168. self,
  169. method: str,
  170. path: str,
  171. params: Optional[Json] = None,
  172. json: Optional[Json] = None,
  173. fields: Optional[Json] = None,
  174. file: Optional[FileFormPost] = None,
  175. headers: Optional[Dict[str, str]] = None,
  176. timeout: float = 5.0,
  177. ) -> Response:
  178. response = await self._request_with_retry(
  179. method, path, params, json, fields, file, headers, timeout
  180. )
  181. return Response(
  182. status=response.status,
  183. data=await response.read(),
  184. content_type=response.headers.get("Content-Type"),
  185. )