api_provider.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. import asyncio
  2. import re
  3. from http import HTTPStatus
  4. from io import BytesIO
  5. from typing import Any
  6. from typing import Awaitable
  7. from typing import Callable
  8. from typing import Dict
  9. from typing import Optional
  10. from urllib.parse import quote
  11. from urllib.parse import urlencode
  12. from urllib.parse import urljoin
  13. import aiohttp
  14. from aiohttp import ClientResponse
  15. from aiohttp import ClientSession
  16. from pydantic import AnyHttpUrl
  17. from pydantic import field_validator
  18. from clean_python import Conflict
  19. from clean_python import Json
  20. from clean_python import ValueObject
  21. from .exceptions import ApiException
  22. from .response import Response
  23. __all__ = ["ApiProvider", "FileFormPost"]
  24. RETRY_STATUSES = frozenset({413, 429, 503}) # like in urllib3
  25. def is_success(status: HTTPStatus) -> bool:
  26. """Returns True on 2xx status"""
  27. return (int(status) // 100) == 2
  28. def check_exception(status: HTTPStatus, body: Json) -> None:
  29. if status == HTTPStatus.CONFLICT:
  30. raise Conflict(body.get("message", str(body)))
  31. elif not is_success(status):
  32. raise ApiException(body, status=status)
  33. JSON_CONTENT_TYPE_REGEX = re.compile(r"^application\/[^+]*[+]?(json);?.*$")
  34. def is_json_content_type(content_type: Optional[str]) -> bool:
  35. if not content_type:
  36. return False
  37. return bool(JSON_CONTENT_TYPE_REGEX.match(content_type))
  38. def join(url: str, path: str, trailing_slash: bool = False) -> str:
  39. """Results in a full url without trailing slash"""
  40. assert url.endswith("/")
  41. assert not path.startswith("/")
  42. result = urljoin(url, path)
  43. if trailing_slash and not result.endswith("/"):
  44. result = result + "/"
  45. elif not trailing_slash and result.endswith("/"):
  46. result = result[:-1]
  47. return result
  48. def add_query_params(url: str, params: Optional[Json]) -> str:
  49. if params is None:
  50. return url
  51. return url + "?" + urlencode(params, doseq=True)
  52. class FileFormPost(ValueObject):
  53. file_name: str
  54. file: Any # typing of BinaryIO / BytesIO is hard!
  55. field_name: str = "file"
  56. content_type: str = "application/octet-stream"
  57. @field_validator("file")
  58. @classmethod
  59. def validate_file(cls, v):
  60. if isinstance(v, bytes):
  61. return BytesIO(v)
  62. assert hasattr(v, "read") # poor-mans BinaryIO validation
  63. return v
  64. class ApiProvider:
  65. """Basic JSON API provider with retry policy and bearer tokens.
  66. The default retry policy has 3 retries with 1, 2, 4 second intervals.
  67. Args:
  68. url: The url of the API (with trailing slash)
  69. headers_factory: Coroutine that returns headers (for e.g. authorization)
  70. retries: Total number of retries per request
  71. backoff_factor: Multiplier for retry delay times (1, 2, 4, ...)
  72. trailing_slash: Wether to automatically add or remove trailing slashes.
  73. """
  74. def __init__(
  75. self,
  76. url: AnyHttpUrl,
  77. headers_factory: Optional[Callable[[], Awaitable[Dict[str, str]]]] = None,
  78. retries: int = 3,
  79. backoff_factor: float = 1.0,
  80. trailing_slash: bool = False,
  81. ):
  82. self._url = str(url)
  83. if not self._url.endswith("/"):
  84. self._url += "/"
  85. self._headers_factory = headers_factory
  86. assert retries > 0
  87. self._retries = retries
  88. self._backoff_factor = backoff_factor
  89. self._trailing_slash = trailing_slash
  90. self._session = ClientSession()
  91. async def _request_with_retry(
  92. self,
  93. method: str,
  94. path: str,
  95. params: Optional[Json],
  96. json: Optional[Json],
  97. fields: Optional[Json],
  98. file: Optional[FileFormPost],
  99. timeout: float,
  100. ) -> ClientResponse:
  101. if file is not None:
  102. raise NotImplementedError("ApiProvider doesn't yet support file uploads")
  103. request_kwargs = {
  104. "method": method,
  105. "url": add_query_params(
  106. join(self._url, quote(path), self._trailing_slash), params
  107. ),
  108. "timeout": timeout,
  109. "json": json,
  110. "data": fields,
  111. }
  112. if self._headers_factory is not None:
  113. request_kwargs["headers"] = await self._headers_factory()
  114. for attempt in range(self._retries):
  115. if attempt > 0:
  116. backoff = self._backoff_factor * 2 ** (attempt - 1)
  117. await asyncio.sleep(backoff)
  118. try:
  119. response = await self._session.request(**request_kwargs)
  120. await response.read()
  121. except (aiohttp.ClientError, asyncio.exceptions.TimeoutError):
  122. if attempt == self._retries - 1:
  123. raise # propagate ClientError in case no retries left
  124. else:
  125. if response.status not in RETRY_STATUSES:
  126. return response # on all non-retry statuses: return response
  127. return response # retries exceeded; return the (possibly error) response
  128. async def request(
  129. self,
  130. method: str,
  131. path: str,
  132. params: Optional[Json] = None,
  133. json: Optional[Json] = None,
  134. fields: Optional[Json] = None,
  135. file: Optional[FileFormPost] = None,
  136. timeout: float = 5.0,
  137. ) -> Optional[Json]:
  138. response = await self._request_with_retry(
  139. method, path, params, json, fields, file, timeout
  140. )
  141. status = HTTPStatus(response.status)
  142. content_type = response.headers.get("Content-Type")
  143. if status is HTTPStatus.NO_CONTENT:
  144. return None
  145. if not is_json_content_type(content_type):
  146. raise ApiException(
  147. f"Unexpected content type '{content_type}'", status=status
  148. )
  149. body = await response.json()
  150. check_exception(status, body)
  151. return body
  152. async def request_raw(
  153. self,
  154. method: str,
  155. path: str,
  156. params: Optional[Json] = None,
  157. json: Optional[Json] = None,
  158. fields: Optional[Json] = None,
  159. file: Optional[FileFormPost] = None,
  160. timeout: float = 5.0,
  161. ) -> Response:
  162. response = await self._request_with_retry(
  163. method, path, params, json, fields, file, timeout
  164. )
  165. return Response(
  166. status=response.status,
  167. data=await response.read(),
  168. content_type=response.headers.get("Content-Type"),
  169. )