Coverage for /usr/local/lib/python3.8/site-packages/tgclients/ 86%

69 statements  

« prev     ^ index     » next v7.4.4, created at 2024-04-02 16:49 +0000

1# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen 


3# SPDX-License-Identifier: LGPL-3.0-or-later 


5"""TextGrid Search API.""" 

6import logging 

7from io import BytesIO 

8from typing import Optional, List 


10import requests 

11from requests.models import Response 

12from xsdata.formats.dataclass.context import XmlContext 

13from xsdata.formats.dataclass.parsers import XmlParser 


15from tgclients.config import TextgridConfig 

16from tgclients.databinding.tgsearch import Response as SearchResponse, TextgridUris 


18logger = logging.getLogger(__name__) 



21class TextgridSearchRequest: 

22 """Provide low level access to the TextGrid search service, returning the response objects.""" 


24 def __init__(self, config: TextgridConfig = TextgridConfig(), nonpublic: bool = False) -> None: 

25 if nonpublic: 25 ↛ 26line 25 didn't jump to line 26, because the condition on line 25 was never true

26 self._url = 

27 else: 

28 self._url = config.search_public 

29 self._config = config 

30 # reuse tcp connections: 

31 self._requests = requests.Session() 


33 def info(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

34 """Retrieve metadata for a textgrid object specified by its 

35 textgrid-uri 


37 Args: 

38 textgrid_uri (str): Textgrid URI 

39 sid (Optional[str]): Session ID. Defaults to None. 


41 Raises: 

42 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


44 Returns: 

45 Response: metadata for uri 

46 """ 

47 url = self._url + '/info/' 

48 response = self._requests.get( 

49 url + textgrid_uri, params={'sid': sid}, timeout=self._config.http_timeout) 

50 return self._handle_response(response) 


52 def list_project_root(self, project_id: str, sid: Optional[str] = None) -> Response: 

53 """Get objects belonging to a project, filtered by objects that are in 

54 an aggregation in the same project. 


56 Args: 

57 project_id (str): the ID of the project to list 

58 sid (Optional[str], optional): Session ID. Defaults to None. 


60 Raises: 

61 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


63 Returns: 

64 Response: HTTP response from service, containing a list of textgrid metadata entries 

65 """ 

66 response = self._requests.get( 

67 self._url + '/navigation/' + project_id, params={'sid': sid}, 

68 timeout=self._config.http_timeout) 

69 return self._handle_response(response) 


71 def list_aggregation(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

72 """Get child resources of an aggregation. 


74 Args: 

75 textgrid_uri (str): Textgrid URI 

76 sid (Optional[str], optional): Session ID. Defaults to None. 


78 Raises: 

79 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


81 Returns: 

82 Response: HTTP response from service, containing a list of textgrid metadata entries 

83 """ 

84 response = self._requests.get(self._url + '/navigation/agg/' + 

85 textgrid_uri, params={'sid': sid}, 

86 timeout=self._config.http_timeout) 

87 return self._handle_response(response) 


89 # pylint: disable-msg=too-many-arguments,too-many-locals 

90 def search(self, 

91 query: Optional[str] = '*', 

92 sid: Optional[str] = None, 

93 target: Optional[str] = None, 

94 order: Optional[str] = None, 

95 start: Optional[int] = None, 

96 limit: Optional[int] = None, 

97 kwic_width: Optional[int] = None, 

98 word_distance: Optional[int] = None, 

99 path: Optional[bool] = None, 

100 all_projects: Optional[bool] = None, 

101 sandbox: Optional[bool] = None, 

102 filters: Optional[List[str]] = None, 

103 facet: Optional[List[str]] = None, 

104 facet_limit: Optional[int] = None) -> Response: 

105 """Run fulltext queries or filters on TextGrid metadata and fulltext objects. 


107 Please note: as the defaults of this function are mostly set to None, the defaults from 

108 the service are used, and also noted in this docstring. see: 



111 Args: 

112 query (str, optional): Lucene search string. Defaults to '*'. 

113 sid (str, optional): TextGrid SessionID from tgauth. Defaults to None. 

114 target (str, optional): where to do fulltext-searches: one of 'structure', 

115 'metadata' and 'both'. Defaults to 'both'. 

116 order (str, optional): key-value ascending (asc) or descending (desc) and metadata-field 

117 like asc:title or desc:author. Defaults to 'relevance'. 

118 start (int, optional): result number to start with.. Defaults to 0. 

119 limit (int, optional): number of entries to return.. Defaults to 20. 

120 kwic_width (int, optional): number of chars before and after a kwic match. 

121 Defaults to 40. 

122 word_distance (int, optional): max distance beetween two words in fulltext query. 

123 ignored if set to a number < 0, then for a hit all words 

124 must be contained in one document. Defaults to -1. 

125 path (boo], optional): path of found result(work->edition->aggregations) should be 

126 applied to hit. Defaults to false. 

127 all_projects (bool, optional): all Projects should be searched for public data, 

128 warning: this query may be slow, if many results found. 

129 Defaults to false. 

130 sandbox (bool, optional): show sandboxed (not yet finally published) data. 

131 Defaults to false. 

132 filters (List[str], optional): add filter on query results, e.g. for faceting. 

133 Defaults to None. 

134 facet (List[str], optional): get facets for query results. Defaults to None. 

135 facet_limit (int, optional): number of results to return for each facet. Defaults to 10. 


137 Raises: 

138 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


140 Returns: 

141 Response: HTTP response from service - a list of textgrid metadata entries, 

142 KWIC hits, paths and facets if requested 

143 """ 


145 params = { 

146 'q': query, 

147 'sid': sid, 

148 'target': target, 

149 'order': order, 

150 'start': start, 

151 'limit': limit, 

152 'kwicWidth': kwic_width, 

153 'wordDistance': word_distance, 

154 'path': path, 

155 'allProjects': all_projects, 

156 'sandbox': sandbox, 

157 'filter': filters, 

158 'facet': facet, 

159 'facetLimit': facet_limit 

160 } 

161 response = self._requests.get(self._url + '/search', params=params, 

162 timeout=self._config.http_timeout) 

163 return self._handle_response(response) 

164 # pylint: enable-msg=too-many-arguments,too-many-locals 


166 def edition_work_metadata_for(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

167 """Find parent edition for an object and the edition and work metadata 


169 Args: 

170 textgrid_uri (str): Textgrid URI 

171 sid (Optional[str], optional): Session ID. Defaults to None. 


173 Raises: 

174 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


176 Returns: 

177 Response: HTTP response from service - edition and work metadata for given object 

178 from first matching parent edition 

179 """ 

180 response = self._requests.get( 

181 self._url + '/info/' + textgrid_uri + 

182 '/editionWorkMeta', params={'sid': sid}, 

183 timeout=self._config.http_timeout) 

184 return self._handle_response(response) 


186 def children(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

187 """List URIs for all children of this aggregation and its child aggregations 


189 Args: 

190 textgrid_uri (str): Textgrid URI of an aggregation 

191 sid (Optional[str], optional): Session ID. Defaults to None. 


193 Raises: 

194 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


196 Returns: 

197 Response: HTTP response from service - URIs for children of this 

198 aggregation and its child aggregations 

199 """ 

200 response = self._requests.get( 

201 self._url + '/info/' + textgrid_uri + '/children', params={'sid': sid}, 

202 timeout=self._config.http_timeout) 

203 return self._handle_response(response) 


205 @staticmethod 

206 def _handle_response(response: Response) -> Response: 

207 """Error handling for responses from tgsearch 


209 Args: 

210 response (Response): a response from tgsearch 


212 Raises: 

213 TextgridSearchException: if HTTP status code >= 400 


215 Returns: 

216 Response: the response 

217 """ 

218 if not response.ok: 218 ↛ 219line 218 didn't jump to line 219, because the condition on line 218 was never true

219 message = '[Error] HTTP Code: ' + \ 

220 str(response.status_code) + ' - ' + response.text[0:255] 

221 logger.warning(message) 

222 raise TextgridSearchException(message) 

223 return response 



226class TextgridSearch(TextgridSearchRequest): 

227 """Provide access to the TextGrid search service using a XML data binding """ 


229 def __init__(self, config: TextgridConfig = TextgridConfig(), nonpublic: bool = False) -> None: 

230 super().__init__(config, nonpublic) 

231 # It’s recommended to either reuse the same parser/serializer instance 

232 # or reuse the context instance. see 

233 context = XmlContext() 

234 self._parser = XmlParser(context=context) 


236 def info(self, textgrid_uri: str, sid: Optional[str] = None) -> SearchResponse: 

237 """Retrieve metadata for a textgrid object specified by its 

238 textgrid-uri 


240 Args: 

241 textgrid_uri (str): Textgrid URI 

242 sid (Optional[str]): Session ID. Defaults to None. 


244 Raises: 

245 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


247 Returns: 

248 SearchResponse: metadata for uri 

249 """ 

250 response = super().info(textgrid_uri, sid) 

251 return self._parser.parse(BytesIO(response.content), SearchResponse) 


253 def list_project_root(self, project_id: str, sid: Optional[str] = None) -> SearchResponse: 

254 """Get objects belonging to a project, filtered by objects that are in 

255 an aggregation in the same project. 


257 Args: 

258 project_id (str): the ID of the project to list 

259 sid (Optional[str], optional): Session ID. Defaults to None. 


261 Raises: 

262 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


264 Returns: 

265 SearchResponse: A list of textgrid metadata entries 

266 """ 

267 response = super().list_project_root(project_id, sid) 

268 return self._parser.parse(BytesIO(response.content), SearchResponse) 


270 def list_aggregation(self, textgrid_uri: str, sid: Optional[str] = None) -> SearchResponse: 

271 """Get child resources of an aggregation. 


273 Args: 

274 textgrid_uri (str): Textgrid URI 

275 sid (Optional[str], optional): Session ID. Defaults to None. 


277 Raises: 

278 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


280 Returns: 

281 SearchResponse: A list of textgrid metadata entries 

282 """ 

283 response = super().list_aggregation(textgrid_uri, sid) 

284 return self._parser.parse(BytesIO(response.content), SearchResponse) 


286 # pylint: disable-msg=too-many-arguments,too-many-locals 


288 def search(self, 

289 query: Optional[str] = '*', 

290 sid: Optional[str] = None, 

291 target: Optional[str] = None, 

292 order: Optional[str] = None, 

293 start: Optional[int] = None, 

294 limit: Optional[int] = None, 

295 kwic_width: Optional[int] = None, 

296 word_distance: Optional[int] = None, 

297 path: Optional[bool] = None, 

298 all_projects: Optional[bool] = None, 

299 sandbox: Optional[bool] = None, 

300 filters: Optional[List[str]] = None, 

301 facet: Optional[List[str]] = None, 

302 facet_limit: Optional[int] = None) -> SearchResponse: 

303 """Run fulltext queries or filters on TextGrid metadata and fulltext objects. 


305 Please note: as the defaults of this function are mostly set to None, the defaults from 

306 the service are used, and also noted in this docstring. see: 



309 Args: 

310 query (str, optional): Lucene search string. Defaults to '*'. 

311 sid (str, optional): TextGrid SessionID from tgauth. Defaults to None. 

312 target (str, optional): where to do fulltext-searches: one of 'structure', 

313 'metadata' and 'both'. Defaults to 'both'. 

314 order (str, optional): key-value ascending (asc) or descending (desc) and metadata-field 

315 like asc:title or desc:author. Defaults to 'relevance'. 

316 start (int, optional): result number to start with.. Defaults to 0. 

317 limit (int, optional): number of entries to return.. Defaults to 20. 

318 kwic_width (int, optional): number of chars before and after a kwic match. 

319 Defaults to 40. 

320 word_distance (int, optional): max distance beetween two words in fulltext query. 

321 ignored if set to a number < 0, then for a hit all words 

322 must be contained in one document. Defaults to -1. 

323 path (boo], optional): path of found result(work->edition->aggregations) should be 

324 applied to hit. Defaults to false. 

325 all_projects (bool, optional): all Projects should be searched for public data, 

326 warning: this query may be slow, if many results found. 

327 Defaults to false. 

328 sandbox (bool, optional): show sandboxed (not yet finally published) data. 

329 Defaults to false. 

330 filters (List[str], optional): add filter on query results, e.g. for faceting. 

331 Defaults to None. 

332 facet (List[str], optional): get facets for query results. Defaults to None. 

333 facet_limit (int, optional): number of results to return for each facet. Defaults to 10. 


335 Raises: 

336 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


338 Returns: 

339 SearchResponse: a list of textgrid metadata entries, 

340 KWIC hits, paths and facets if requested 

341 """ 


343 response = super().search(query=query, 

344 sid=sid, 

345 target=target, 

346 order=order, 

347 start=start, 

348 limit=limit, 

349 kwic_width=kwic_width, 

350 word_distance=word_distance, 

351 path=path, 

352 all_projects=all_projects, 

353 sandbox=sandbox, 

354 filters=filters, 

355 facet=facet, 

356 facet_limit=facet_limit) 

357 return self._parser.parse(BytesIO(response.content), SearchResponse) 

358 # pylint: enable-msg=too-many-arguments,too-many-locals 


360 def edition_work_metadata_for(self, textgrid_uri: str, 

361 sid: Optional[str] = None) -> SearchResponse: 

362 """Find parent edition for an object and the edition and work metadata 


364 Args: 

365 textgrid_uri (str): Textgrid URI 

366 sid (Optional[str], optional): Session ID. Defaults to None. 


368 Raises: 

369 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


371 Returns: 

372 SearchResponse: Edition and work metadata for given object 

373 from first matching parent edition 

374 """ 

375 response = super().edition_work_metadata_for(textgrid_uri, sid) 

376 return self._parser.parse(BytesIO(response.content), SearchResponse) 


378 def children(self, textgrid_uri: str, sid: Optional[str] = None) -> TextgridUris: 

379 """List URIs for all children of this aggregation and its child aggregations 


381 Args: 

382 textgrid_uri (str): Textgrid URI of an aggregation 

383 sid (Optional[str], optional): Session ID. Defaults to None. 


385 Raises: 

386 TextgridSearchException: if HTTP status code >= 400 (# noqa: DAR402) 


388 Returns: 

389 TextgridUris: URIs for children of this aggregation and its child aggregations 

390 """ 

391 response = super().children(textgrid_uri, sid) 

392 return self._parser.parse(BytesIO(response.content), TextgridUris) 



395class TextgridSearchException(Exception): 

396 """Exception communicating with tgsearch"""