chris.util.search
1import copy 2import logging 3from dataclasses import dataclass 4from typing import ( 5 Optional, 6 TypeVar, 7 AsyncGenerator, 8 Type, 9 AsyncIterable, 10 Any, 11 Generic, 12 AsyncIterator, 13) 14 15import aiohttp 16import yarl 17from serde import deserialize 18from serde.json import from_json 19 20from chris.link.linked import deserialize_linked, Linked 21from chris.util.errors import BaseClientError, raise_for_status, NonsenseResponseError 22 23logger = logging.getLogger(__name__) 24 25T = TypeVar("T") 26 27 28@deserialize 29class _Paginated: 30 """ 31 Response from a paginated endpoint. 32 """ 33 34 count: int 35 next: Optional[str] 36 previous: Optional[str] 37 results: list[Any] 38 39 40@dataclass 41class Search(Generic[T], AsyncIterable[T]): 42 """ 43 Abstraction over paginated collection responses from *CUBE*. 44 `Search` objects are returned by methods for search endpoints of the *CUBE* API. 45 It is an [asynchronous iterable](https://docs.python.org/3/glossary.html#term-asynchronous-iterable) 46 which produces items from responses that return multiple results. 47 HTTP requests are fired as-neede, they happen in the background during iteration. 48 No request is made before the first time a `Search` object is called. 49 50 .. note:: Pagination is handled internally and automatically. 51 The query parameters `limit` and `offset` can be explicitly given, but they shouldn't. 52 53 Examples 54 -------- 55 56 Use an `async for` loop to print the name of every feed: 57 58 ```python 59 all_feeds = chris.search_feeds() # returns a Search[Feed] 60 async for feed in all_feeds: 61 print(feed.name) 62 ``` 63 """ 64 65 base_url: str 66 params: dict[str, Any] 67 client: Linked 68 Item: Type[T] 69 max_requests: int = 100 70 71 def __aiter__(self) -> AsyncIterator[T]: 72 return self._paginate(self.url) 73 74 async def first(self) -> Optional[T]: 75 """ 76 Get the first item. 77 78 See also 79 -------- 80 `get_only` : similar use, but more strict 81 """ 82 return await anext(self._first_aiter(), None) 83 84 async def get_only(self, allow_multiple=False) -> T: 85 """ 86 Get the *only* item from a search with one result. 87 88 Examples 89 -------- 90 91 This method is very commonly used for getting "one thing" from CUBE. 92 93 ```python 94 await chris.search_plugins(name_exact="pl-dircopy", version="2.1.1").get_only() 95 ``` 96 97 In the example above, a search for plugins given (`name_exact`, `version`) 98 is guaranteed to return either 0 or 1 result. 99 100 Raises 101 ------ 102 chris.util.search.NoneSearchError 103 If this search is empty. 104 chris.util.search.ManySearchError 105 If this search has more than one item and `allow_multiple` is `False` 106 107 See also 108 -------- 109 `first` : does the same thing but without checks. 110 111 Parameters 112 ---------- 113 allow_multiple: bool 114 if `True`, do not raise `ManySearchError` if `count > 1` 115 """ 116 one = await self._get_one() 117 if one.count == 0: 118 raise NoneSearchError(self.url) 119 if not allow_multiple and one.count > 1: 120 raise ManySearchError(self.url) 121 if len(one.results) < 1: 122 raise NonsenseResponseError( 123 f"Response has count={one.count} but the results are empty.", one 124 ) 125 return deserialize_linked(self.client, self.Item, one.results[0]) 126 127 async def count(self) -> int: 128 """ 129 Get the number of items in this collection search. 130 131 Examples 132 -------- 133 134 `count` is useful for rendering a progress bar. TODO example with files 135 """ 136 one = await self._get_one() 137 return one.count 138 139 async def _get_one(self) -> _Paginated: 140 async with self.client.s.get(self._first_url) as res: 141 await raise_for_status(res) 142 return from_json(_Paginated, await res.text()) 143 144 def _paginate(self, url: yarl.URL) -> AsyncIterator[T]: 145 return _get_paginated( 146 client=self.client, 147 url=url, 148 item_type=self.Item, 149 max_requests=self.max_requests, 150 ) 151 152 @property 153 def url(self) -> yarl.URL: 154 return self._search_url_with(self.params) 155 156 def _first_aiter(self) -> AsyncIterator[T]: 157 return self._paginate(self._first_url) 158 159 @property 160 def _first_url(self) -> yarl.URL: 161 params = copy.copy(self.params) 162 params["limit"] = 1 163 params["offset"] = 0 164 return self._search_url_with(params) 165 166 @property 167 def _search_url(self) -> yarl.URL: 168 return yarl.URL(self.base_url) / "search/" 169 170 def _search_url_with(self, query: dict[str, Any]): 171 return yarl.URL(self._search_url).with_query(query) 172 173 174async def _get_paginated( 175 client: Linked, 176 url: yarl.URL | str, 177 item_type: Type[T], 178 max_requests: int, 179) -> AsyncGenerator[T, None]: 180 """ 181 Make HTTP GET requests to a paginated endpoint. Further requests to the 182 "next" URL are made in the background as needed. 183 """ 184 logger.debug("GET, max_requests=%d, --> %s", max_requests, url) 185 if max_requests != -1 and max_requests == 0: 186 raise TooMuchPaginationError( 187 f"too many requests made to {url}. " 188 f"If this is expected, then pass the argument max_search_requests=-1 to " 189 f"the client constructor classmethod." 190 ) 191 async with client.s.get(url) as res: # N.B. not checking for 4XX, 5XX statuses 192 data: _Paginated = from_json(_Paginated, await res.text()) 193 for element in data.results: 194 yield deserialize_linked(client, item_type, element) 195 if data.next is not None: 196 next_results = _get_paginated(client, data.next, item_type, max_requests - 1) 197 async for next_element in next_results: 198 yield next_element 199 200 201async def acollect(async_iterable: AsyncIterable[T]) -> list[T]: 202 """ 203 Simple helper to convert a `Search` to a [`list`](https://docs.python.org/3/library/stdtypes.html#list). 204 205 Using this function is not recommended unless you can assume the collection is small. 206 """ 207 # nb: using tuple here causes 208 # TypeError: 'async_generator' object is not iterable 209 # return tuple(e async for e in async_iterable) 210 return [e async for e in async_iterable] 211 212 213class TooMuchPaginationError(BaseClientError): 214 """Specified maximum number of requests exceeded while retrieving results from a paginated resource.""" 215 216 pass 217 218 219class GetOnlyError(BaseClientError): 220 """Search does not have exactly one result.""" 221 222 pass 223 224 225class NoneSearchError(GetOnlyError): 226 """A search expected to have at least one element, has none.""" 227 228 pass 229 230 231class ManySearchError(GetOnlyError): 232 """A search expected to have only one result, has several.""" 233 234 pass
41@dataclass 42class Search(Generic[T], AsyncIterable[T]): 43 """ 44 Abstraction over paginated collection responses from *CUBE*. 45 `Search` objects are returned by methods for search endpoints of the *CUBE* API. 46 It is an [asynchronous iterable](https://docs.python.org/3/glossary.html#term-asynchronous-iterable) 47 which produces items from responses that return multiple results. 48 HTTP requests are fired as-neede, they happen in the background during iteration. 49 No request is made before the first time a `Search` object is called. 50 51 .. note:: Pagination is handled internally and automatically. 52 The query parameters `limit` and `offset` can be explicitly given, but they shouldn't. 53 54 Examples 55 -------- 56 57 Use an `async for` loop to print the name of every feed: 58 59 ```python 60 all_feeds = chris.search_feeds() # returns a Search[Feed] 61 async for feed in all_feeds: 62 print(feed.name) 63 ``` 64 """ 65 66 base_url: str 67 params: dict[str, Any] 68 client: Linked 69 Item: Type[T] 70 max_requests: int = 100 71 72 def __aiter__(self) -> AsyncIterator[T]: 73 return self._paginate(self.url) 74 75 async def first(self) -> Optional[T]: 76 """ 77 Get the first item. 78 79 See also 80 -------- 81 `get_only` : similar use, but more strict 82 """ 83 return await anext(self._first_aiter(), None) 84 85 async def get_only(self, allow_multiple=False) -> T: 86 """ 87 Get the *only* item from a search with one result. 88 89 Examples 90 -------- 91 92 This method is very commonly used for getting "one thing" from CUBE. 93 94 ```python 95 await chris.search_plugins(name_exact="pl-dircopy", version="2.1.1").get_only() 96 ``` 97 98 In the example above, a search for plugins given (`name_exact`, `version`) 99 is guaranteed to return either 0 or 1 result. 100 101 Raises 102 ------ 103 chris.util.search.NoneSearchError 104 If this search is empty. 105 chris.util.search.ManySearchError 106 If this search has more than one item and `allow_multiple` is `False` 107 108 See also 109 -------- 110 `first` : does the same thing but without checks. 111 112 Parameters 113 ---------- 114 allow_multiple: bool 115 if `True`, do not raise `ManySearchError` if `count > 1` 116 """ 117 one = await self._get_one() 118 if one.count == 0: 119 raise NoneSearchError(self.url) 120 if not allow_multiple and one.count > 1: 121 raise ManySearchError(self.url) 122 if len(one.results) < 1: 123 raise NonsenseResponseError( 124 f"Response has count={one.count} but the results are empty.", one 125 ) 126 return deserialize_linked(self.client, self.Item, one.results[0]) 127 128 async def count(self) -> int: 129 """ 130 Get the number of items in this collection search. 131 132 Examples 133 -------- 134 135 `count` is useful for rendering a progress bar. TODO example with files 136 """ 137 one = await self._get_one() 138 return one.count 139 140 async def _get_one(self) -> _Paginated: 141 async with self.client.s.get(self._first_url) as res: 142 await raise_for_status(res) 143 return from_json(_Paginated, await res.text()) 144 145 def _paginate(self, url: yarl.URL) -> AsyncIterator[T]: 146 return _get_paginated( 147 client=self.client, 148 url=url, 149 item_type=self.Item, 150 max_requests=self.max_requests, 151 ) 152 153 @property 154 def url(self) -> yarl.URL: 155 return self._search_url_with(self.params) 156 157 def _first_aiter(self) -> AsyncIterator[T]: 158 return self._paginate(self._first_url) 159 160 @property 161 def _first_url(self) -> yarl.URL: 162 params = copy.copy(self.params) 163 params["limit"] = 1 164 params["offset"] = 0 165 return self._search_url_with(params) 166 167 @property 168 def _search_url(self) -> yarl.URL: 169 return yarl.URL(self.base_url) / "search/" 170 171 def _search_url_with(self, query: dict[str, Any]): 172 return yarl.URL(self._search_url).with_query(query)
Abstraction over paginated collection responses from CUBE.
Search
objects are returned by methods for search endpoints of the CUBE API.
It is an asynchronous iterable
which produces items from responses that return multiple results.
HTTP requests are fired as-neede, they happen in the background during iteration.
No request is made before the first time a Search
object is called.
Pagination is handled internally and automatically.
The query parameters limit
and offset
can be explicitly given, but they shouldn't.
Examples
Use an async for
loop to print the name of every feed:
all_feeds = chris.search_feeds() # returns a Search[Feed]
async for feed in all_feeds:
print(feed.name)
85 async def get_only(self, allow_multiple=False) -> T: 86 """ 87 Get the *only* item from a search with one result. 88 89 Examples 90 -------- 91 92 This method is very commonly used for getting "one thing" from CUBE. 93 94 ```python 95 await chris.search_plugins(name_exact="pl-dircopy", version="2.1.1").get_only() 96 ``` 97 98 In the example above, a search for plugins given (`name_exact`, `version`) 99 is guaranteed to return either 0 or 1 result. 100 101 Raises 102 ------ 103 chris.util.search.NoneSearchError 104 If this search is empty. 105 chris.util.search.ManySearchError 106 If this search has more than one item and `allow_multiple` is `False` 107 108 See also 109 -------- 110 `first` : does the same thing but without checks. 111 112 Parameters 113 ---------- 114 allow_multiple: bool 115 if `True`, do not raise `ManySearchError` if `count > 1` 116 """ 117 one = await self._get_one() 118 if one.count == 0: 119 raise NoneSearchError(self.url) 120 if not allow_multiple and one.count > 1: 121 raise ManySearchError(self.url) 122 if len(one.results) < 1: 123 raise NonsenseResponseError( 124 f"Response has count={one.count} but the results are empty.", one 125 ) 126 return deserialize_linked(self.client, self.Item, one.results[0])
Get the only item from a search with one result.
Examples
This method is very commonly used for getting "one thing" from CUBE.
await chris.search_plugins(name_exact="pl-dircopy", version="2.1.1").get_only()
In the example above, a search for plugins given (name_exact
, version
)
is guaranteed to return either 0 or 1 result.
Raises
- chris.util.search.NoneSearchError: If this search is empty.
- chris.util.search.ManySearchError: If this search has more than one item and
allow_multiple
isFalse
See also
first
: does the same thing but without checks.
Parameters
- allow_multiple (bool):
if
True
, do not raiseManySearchError
ifcount > 1
128 async def count(self) -> int: 129 """ 130 Get the number of items in this collection search. 131 132 Examples 133 -------- 134 135 `count` is useful for rendering a progress bar. TODO example with files 136 """ 137 one = await self._get_one() 138 return one.count
Get the number of items in this collection search.
Examples
count
is useful for rendering a progress bar. TODO example with files
202async def acollect(async_iterable: AsyncIterable[T]) -> list[T]: 203 """ 204 Simple helper to convert a `Search` to a [`list`](https://docs.python.org/3/library/stdtypes.html#list). 205 206 Using this function is not recommended unless you can assume the collection is small. 207 """ 208 # nb: using tuple here causes 209 # TypeError: 'async_generator' object is not iterable 210 # return tuple(e async for e in async_iterable) 211 return [e async for e in async_iterable]
214class TooMuchPaginationError(BaseClientError): 215 """Specified maximum number of requests exceeded while retrieving results from a paginated resource.""" 216 217 pass
Specified maximum number of requests exceeded while retrieving results from a paginated resource.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
220class GetOnlyError(BaseClientError): 221 """Search does not have exactly one result.""" 222 223 pass
Search does not have exactly one result.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
226class NoneSearchError(GetOnlyError): 227 """A search expected to have at least one element, has none.""" 228 229 pass
A search expected to have at least one element, has none.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback
232class ManySearchError(GetOnlyError): 233 """A search expected to have only one result, has several.""" 234 235 pass
A search expected to have only one result, has several.
Inherited Members
- builtins.Exception
- Exception
- builtins.BaseException
- with_traceback