Coverage for /usr/local/lib/python3.9/site-packages/tgclients/crud.py: 95%
115 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-02 16:49 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-02 16:49 +0000
1# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen
2#
3# SPDX-License-Identifier: LGPL-3.0-or-later
5"""TextGrid CRUD API."""
6import logging
7from io import BytesIO
8from typing import Optional
10from bs4 import BeautifulSoup
12import requests
13from requests.models import Response
14from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
15from xsdata.formats.dataclass.context import XmlContext
16from xsdata.formats.dataclass.parsers import XmlParser
17from xsdata.formats.dataclass.serializers import XmlSerializer
19from tgclients.config import TextgridConfig
20from tgclients.databinding.textgrid_metadata_2010 import MetadataContainerType
22logger = logging.getLogger(__name__)
23RESPONSE_ENCODING = 'utf-8'
26class TextgridCrudRequest:
27 """Provide low level access to the TextGrid CRUD Service."""
29 def __init__(self, config: TextgridConfig = TextgridConfig(),
30 for_publication: bool = False) -> None:
31 if for_publication: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true
32 logger.warning('for_publication set. this tgcrud client is able to publish data to '
33 + 'the public repository, please make sure you know what you are doing.')
34 self._url = config.crud_public
35 else:
36 self._url = config.crud
37 self._config = config
38 # reuse tcp connections: https://requests.readthedocs.io/en/latest/user/advanced/#session-objects
39 self._requests = requests.Session()
41 # It’s recommended to either reuse the same parser/serializer instance
42 # or reuse the context instance. see https://xsdata.readthedocs.io/en/latest/xml.html
43 context = XmlContext()
44 self._parser = XmlParser(context=context)
45 self._serializer = XmlSerializer()
47 def read_data(self, textgrid_uri: str, sid: Optional[str] = None) -> Response:
48 """Read Data
50 Args:
51 textgrid_uri (str): Textgrid URI
52 sid (Optional[str]): Session ID. Defaults to None.
54 Raises:
55 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
57 Returns:
58 Response: HTTP response from service
59 """
60 # defer downloading the response body until accessing Response.content
61 response = self._requests.get(self._url + '/' + textgrid_uri + '/data',
62 params={'sessionId': sid},
63 stream=True,
64 timeout=self._config.http_timeout)
65 return self._handle_response(response)
67 def read_metadata(self, textgrid_uri: str, sid: Optional[str] = None) -> Response:
68 """Read Metadata
70 Args:
71 textgrid_uri (str): Textgrid URI
72 sid (Optional[str]): Session ID. Defaults to None.
74 Raises:
75 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
77 Returns:
78 Response: HTTP response from service
79 """
80 response = self._requests.get(self._url + '/' + textgrid_uri + '/metadata',
81 params={'sessionId': sid},
82 stream=True,
83 timeout=self._config.http_timeout)
84 return self._handle_response(response)
86 def create_resource(self, sid: str, project_id: str, data, metadata) -> Response:
87 """Create a TextGrid object
89 Args:
90 sid (str): Session ID
91 project_id (str): Project ID
92 data ([type]): the data
93 metadata ([type]): the metadata
95 Raises:
96 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
98 Returns:
99 Response: HTTP response from service with metadata from newly created object
100 """
101 encoder = self._prepare_multipart(metadata, data)
102 params = {'sessionId': sid, 'projectId': project_id,
103 'createRevision': 'false'}
104 response = self._requests.post(
105 self._url + '/' + 'create', params=params, data=encoder,
106 headers={'Content-Type': encoder.content_type},
107 timeout=self._config.http_timeout)
108 return self._handle_response(response)
110 def create_revision(self, sid: str, project_id: str, textgrid_uri: str,
111 data, metadata: str) -> Response:
112 """Create a TextGrid object revision.
114 Args:
115 sid (str): Session ID
116 project_id (str): Project ID
117 textgrid_uri (str): Textgrid URI
118 data ([type]): the data
119 metadata (str): the metadata
121 Raises:
122 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
124 Returns:
125 Response: HTTP response from service with metadata from newly created object revision
126 """
127 encoder = self._prepare_multipart(metadata, data)
128 params = {'sessionId': sid, 'uri': textgrid_uri,
129 'createRevision': 'true', 'projectId': project_id}
130 response = self._requests.post(
131 self._url + '/' + 'create', params=params, data=encoder,
132 headers={'Content-Type': encoder.content_type},
133 timeout=self._config.http_timeout)
134 return self._handle_response(response)
136 def update_resource(self, sid: str, textgrid_uri: str, data, metadata,
137 create_revision: bool = False) -> Response:
138 """Update a TextGrid object
140 Args:
141 sid (str): Session ID
142 textgrid_uri (str): Textgrid URI
143 data ([type]): the data
144 metadata ([type]): the metadata
145 create_revision (bool): If `True`, create new textgrid object revision. Default: `False`
147 Raises:
148 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
150 Returns:
151 Response: HTTP response from service with updated metadata
152 """
153 if create_revision:
154 metadata_obj = self._parser.from_string(metadata, MetadataContainerType)
155 project_id = metadata_obj.object_value.generic.generated.project.id
156 return self.create_revision(sid, project_id, textgrid_uri, data, metadata)
158 encoder = self._prepare_multipart(metadata, data)
159 params = {'sessionId': sid}
160 response = self._requests.post(
161 self._url + '/' + textgrid_uri + '/update', params=params, data=encoder,
162 headers={'Content-Type': encoder.content_type},
163 timeout=self._config.http_timeout)
164 return self._handle_response(response)
166 def update_metadata(self, sid: str, textgrid_uri: str, metadata) -> Response:
167 """Update metadata for TextGrid object
169 Args:
170 sid (str): Session ID
171 textgrid_uri (str): Textgrid URI
172 metadata ([type]): the metadata
174 Returns:
175 Response: HTTP response from service with updated metadata
176 """
177 encoder = self._prepare_multipart(metadata)
178 params = {'sessionId': sid}
179 response = self._requests.post(
180 self._url + '/' + textgrid_uri + '/updateMetadata', params=params, data=encoder,
181 headers={'Content-Type': encoder.content_type},
182 timeout=self._config.http_timeout)
183 return self._handle_response(response)
185 def delete_resource(self, sid: str, textgrid_uri: str) -> Response:
186 """Delete a TextGrid object
188 Args:
189 sid (str): Session ID
190 textgrid_uri (str): Textgrid URI
192 Raises:
193 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
195 Returns:
196 Response: HTTP response from service
197 """
198 params = {'sessionId': sid}
199 response = self._requests.get(
200 self._url + '/' + textgrid_uri + '/delete', params=params,
201 timeout=self._config.http_timeout)
202 return self._handle_response(response)
204 @staticmethod
205 def _handle_response(response: Response) -> Response:
206 """Error handling for responses from crud
208 Args:
209 response (Response): a response from tgcrud
211 Raises:
212 TextgridCrudException: if HTTP status code >= 400
214 Returns:
215 Response: the response
216 """
217 response.encoding = RESPONSE_ENCODING
218 if not response.ok:
219 error = TextgridCrudRequest._error_msg_from_html(response.text)
220 message = '[Error] HTTP Code: ' + \
221 str(response.status_code) + ' - ' + error
222 logger.warning(message)
223 raise TextgridCrudException(message)
224 return response
226 @staticmethod
227 def _error_msg_from_html(html: str):
228 """ Extract error message from html, as the text string for
229 the error is in <meta name="description" content="error message">
231 Args:
232 html: an error response body from tgcrud
234 Returns:
235 Response: the content of the meta[name=description] tag
236 """
237 #
238 soup = BeautifulSoup(html, 'html.parser')
239 metatag = soup.select_one('meta[name="description"]')
240 if metatag != None:
241 msg = metatag['content']
242 else:
243 msg = html[0:255]
244 return msg
246 @staticmethod
247 def _prepare_multipart(metadata, data=None):
248 """Create a streaming multipart object.
249 Monitor the upload progress if log level is DEBUG.
251 See also: https://toolbelt.readthedocs.io/en/latest/uploading-data.html
253 Args:
254 metadata ([type]): the metadata
255 data ([type]): the data
257 Returns:
258 [MultipartEncoder]: Multipart containing data and metadata
259 """
260 fields = {
261 'tgObjectMetadata': ('tgObjectMetadata', metadata, 'text/xml')
262 }
263 if data:
264 fields['tgObjectData'] = (
265 'tgObjectData', data, 'application/octet-stream')
267 encoder = MultipartEncoder(fields=fields)
268 if logger.isEnabledFor(logging.DEBUG): 268 ↛ 269line 268 didn't jump to line 269, because the condition on line 268 was never true
269 return MultipartEncoderMonitor(encoder, TextgridCrudRequest._debug_monitor_callback)
271 return encoder
273 @staticmethod
274 def _debug_monitor_callback(monitor: MultipartEncoderMonitor):
275 """Callback for _prepare_multipart.
276 Helper to log upload progress for streaming multipart when log level is DEBUG.
278 Args:
279 monitor (MultipartEncoderMonitor): the monitor
280 """
281 logger.debug('[debug multipart upload] bytes read: %s ',
282 monitor.bytes_read)
285class TextgridCrudException(Exception):
286 """Exception communicating with tgcrud"""
289class TextgridCrud(TextgridCrudRequest):
290 """Provide access to the Textgrid CRUD Service using a XML data binding """
292 def __init__(self, config: TextgridConfig = TextgridConfig(),
293 for_publication: bool = False) -> None:
294 super().__init__(config, for_publication)
296 def create_resource(self, sid: str, project_id: str,
297 data, metadata: MetadataContainerType) -> MetadataContainerType:
298 """Create a TextGrid object
300 Args:
301 sid (str): Session ID
302 project_id (str): Project ID
303 data ([type]): the data
304 metadata (MetadataContainerType): the metadata
306 Raises:
307 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
309 Returns:
310 MetadataContainerType: metadata for newly created object
311 """
312 metadata_string = self._serializer.render(metadata)
313 response = super().create_resource(sid, project_id, data, metadata_string)
314 return self._parser.parse(BytesIO(response.content), MetadataContainerType)
316 def create_revision(self, sid: str, project_id: str, textgrid_uri: str,
317 data, metadata: MetadataContainerType) -> MetadataContainerType:
318 """Create a TextGrid object revision.
320 Args:
321 sid (str): Session ID
322 project_id (str): Project ID
323 textgrid_uri (str): Textgrid URI
324 data ([type]): the data
325 metadata (MetadataContainerType): the metadata
327 Raises:
328 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
330 Returns:
331 MetadataContainerType: metadata from newly created object revision
332 """
333 metadata_string = self._serializer.render(metadata)
334 response = super().create_revision(sid, project_id, textgrid_uri, data, metadata_string)
335 return self._parser.parse(BytesIO(response.content), MetadataContainerType)
337 def read_metadata(self, textgrid_uri: str, sid: Optional[str] = None) -> MetadataContainerType:
338 """Read Metadata
340 Args:
341 textgrid_uri (str): Textgrid URI
342 sid (Optional[str]): Session ID. Defaults to ''.
344 Raises:
345 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
347 Returns:
348 MetadataContainerType: metadata for object
349 """
350 response = super().read_metadata(textgrid_uri, sid)
351 return self._parser.parse(BytesIO(response.content), MetadataContainerType)
353 def update_metadata(self, sid: str, textgrid_uri: str,
354 metadata: MetadataContainerType) -> MetadataContainerType:
355 """Update metadata for TextGrid object
357 Args:
358 sid (str): Session ID
359 textgrid_uri (str): Textgrid URI
360 metadata (MetadataContainerType): the metadata
362 Returns:
363 MetadataContainerType: updated metadata
364 """
365 metadata_string = self._serializer.render(metadata)
366 response = super().update_metadata(sid, textgrid_uri, metadata_string)
367 return self._parser.parse(BytesIO(response.content), MetadataContainerType)
369 def update_resource(self, sid: str, textgrid_uri: str,
370 data, metadata: MetadataContainerType,
371 create_revision: bool = False) -> MetadataContainerType:
372 """Update a TextGrid object
374 Args:
375 sid (str): Session ID
376 textgrid_uri (str): Textgrid URI
377 data ([type]): the data
378 metadata (MetadataContainerType): the metadata
379 create_revision (bool): If `True`, create a new textgrid object revision. Default: `False`
381 Raises:
382 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402)
384 Returns:
385 MetadataContainerType: updated metadata
386 """
387 if create_revision:
388 project_id = str(metadata.object_value.generic.generated.project.id)
389 return self.create_revision(sid, project_id, textgrid_uri, data, metadata)
391 metadata_string = self._serializer.render(metadata)
392 response = super().update_resource(sid, textgrid_uri, data, metadata_string)
394 return self._parser.parse(BytesIO(response.content), MetadataContainerType)