Coverage for /usr/local/lib/python3.8/site-packages/tgclients/crud.py: 95%

115 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-02 16:49 +0000

1# SPDX-FileCopyrightText: 2022 Georg-August-Universität Göttingen 

2# 

3# SPDX-License-Identifier: LGPL-3.0-or-later 

4 

5"""TextGrid CRUD API.""" 

6import logging 

7from io import BytesIO 

8from typing import Optional 

9 

10from bs4 import BeautifulSoup 

11 

12import requests 

13from requests.models import Response 

14from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor 

15from xsdata.formats.dataclass.context import XmlContext 

16from xsdata.formats.dataclass.parsers import XmlParser 

17from xsdata.formats.dataclass.serializers import XmlSerializer 

18 

19from tgclients.config import TextgridConfig 

20from tgclients.databinding.textgrid_metadata_2010 import MetadataContainerType 

21 

22logger = logging.getLogger(__name__) 

23RESPONSE_ENCODING = 'utf-8' 

24 

25 

26class TextgridCrudRequest: 

27 """Provide low level access to the TextGrid CRUD Service.""" 

28 

29 def __init__(self, config: TextgridConfig = TextgridConfig(), 

30 for_publication: bool = False) -> None: 

31 if for_publication: 31 ↛ 32line 31 didn't jump to line 32, because the condition on line 31 was never true

32 logger.warning('for_publication set. this tgcrud client is able to publish data to ' 

33 + 'the public repository, please make sure you know what you are doing.') 

34 self._url = config.crud_public 

35 else: 

36 self._url = config.crud 

37 self._config = config 

38 # reuse tcp connections: https://requests.readthedocs.io/en/latest/user/advanced/#session-objects 

39 self._requests = requests.Session() 

40 

41 # It’s recommended to either reuse the same parser/serializer instance 

42 # or reuse the context instance. see https://xsdata.readthedocs.io/en/latest/xml.html 

43 context = XmlContext() 

44 self._parser = XmlParser(context=context) 

45 self._serializer = XmlSerializer() 

46 

47 def read_data(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

48 """Read Data 

49 

50 Args: 

51 textgrid_uri (str): Textgrid URI 

52 sid (Optional[str]): Session ID. Defaults to None. 

53 

54 Raises: 

55 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

56 

57 Returns: 

58 Response: HTTP response from service 

59 """ 

60 # defer downloading the response body until accessing Response.content 

61 response = self._requests.get(self._url + '/' + textgrid_uri + '/data', 

62 params={'sessionId': sid}, 

63 stream=True, 

64 timeout=self._config.http_timeout) 

65 return self._handle_response(response) 

66 

67 def read_metadata(self, textgrid_uri: str, sid: Optional[str] = None) -> Response: 

68 """Read Metadata 

69 

70 Args: 

71 textgrid_uri (str): Textgrid URI 

72 sid (Optional[str]): Session ID. Defaults to None. 

73 

74 Raises: 

75 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

76 

77 Returns: 

78 Response: HTTP response from service 

79 """ 

80 response = self._requests.get(self._url + '/' + textgrid_uri + '/metadata', 

81 params={'sessionId': sid}, 

82 stream=True, 

83 timeout=self._config.http_timeout) 

84 return self._handle_response(response) 

85 

86 def create_resource(self, sid: str, project_id: str, data, metadata) -> Response: 

87 """Create a TextGrid object 

88 

89 Args: 

90 sid (str): Session ID 

91 project_id (str): Project ID 

92 data ([type]): the data 

93 metadata ([type]): the metadata 

94 

95 Raises: 

96 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

97 

98 Returns: 

99 Response: HTTP response from service with metadata from newly created object 

100 """ 

101 encoder = self._prepare_multipart(metadata, data) 

102 params = {'sessionId': sid, 'projectId': project_id, 

103 'createRevision': 'false'} 

104 response = self._requests.post( 

105 self._url + '/' + 'create', params=params, data=encoder, 

106 headers={'Content-Type': encoder.content_type}, 

107 timeout=self._config.http_timeout) 

108 return self._handle_response(response) 

109 

110 def create_revision(self, sid: str, project_id: str, textgrid_uri: str, 

111 data, metadata: str) -> Response: 

112 """Create a TextGrid object revision. 

113 

114 Args: 

115 sid (str): Session ID 

116 project_id (str): Project ID 

117 textgrid_uri (str): Textgrid URI 

118 data ([type]): the data 

119 metadata (str): the metadata 

120 

121 Raises: 

122 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

123 

124 Returns: 

125 Response: HTTP response from service with metadata from newly created object revision 

126 """ 

127 encoder = self._prepare_multipart(metadata, data) 

128 params = {'sessionId': sid, 'uri': textgrid_uri, 

129 'createRevision': 'true', 'projectId': project_id} 

130 response = self._requests.post( 

131 self._url + '/' + 'create', params=params, data=encoder, 

132 headers={'Content-Type': encoder.content_type}, 

133 timeout=self._config.http_timeout) 

134 return self._handle_response(response) 

135 

136 def update_resource(self, sid: str, textgrid_uri: str, data, metadata, 

137 create_revision: bool = False) -> Response: 

138 """Update a TextGrid object 

139 

140 Args: 

141 sid (str): Session ID 

142 textgrid_uri (str): Textgrid URI 

143 data ([type]): the data 

144 metadata ([type]): the metadata 

145 create_revision (bool): If `True`, create new textgrid object revision. Default: `False` 

146 

147 Raises: 

148 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

149 

150 Returns: 

151 Response: HTTP response from service with updated metadata 

152 """ 

153 if create_revision: 

154 metadata_obj = self._parser.from_string(metadata, MetadataContainerType) 

155 project_id = metadata_obj.object_value.generic.generated.project.id 

156 return self.create_revision(sid, project_id, textgrid_uri, data, metadata) 

157 

158 encoder = self._prepare_multipart(metadata, data) 

159 params = {'sessionId': sid} 

160 response = self._requests.post( 

161 self._url + '/' + textgrid_uri + '/update', params=params, data=encoder, 

162 headers={'Content-Type': encoder.content_type}, 

163 timeout=self._config.http_timeout) 

164 return self._handle_response(response) 

165 

166 def update_metadata(self, sid: str, textgrid_uri: str, metadata) -> Response: 

167 """Update metadata for TextGrid object 

168 

169 Args: 

170 sid (str): Session ID 

171 textgrid_uri (str): Textgrid URI 

172 metadata ([type]): the metadata 

173 

174 Returns: 

175 Response: HTTP response from service with updated metadata 

176 """ 

177 encoder = self._prepare_multipart(metadata) 

178 params = {'sessionId': sid} 

179 response = self._requests.post( 

180 self._url + '/' + textgrid_uri + '/updateMetadata', params=params, data=encoder, 

181 headers={'Content-Type': encoder.content_type}, 

182 timeout=self._config.http_timeout) 

183 return self._handle_response(response) 

184 

185 def delete_resource(self, sid: str, textgrid_uri: str) -> Response: 

186 """Delete a TextGrid object 

187 

188 Args: 

189 sid (str): Session ID 

190 textgrid_uri (str): Textgrid URI 

191 

192 Raises: 

193 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

194 

195 Returns: 

196 Response: HTTP response from service 

197 """ 

198 params = {'sessionId': sid} 

199 response = self._requests.get( 

200 self._url + '/' + textgrid_uri + '/delete', params=params, 

201 timeout=self._config.http_timeout) 

202 return self._handle_response(response) 

203 

204 @staticmethod 

205 def _handle_response(response: Response) -> Response: 

206 """Error handling for responses from crud 

207 

208 Args: 

209 response (Response): a response from tgcrud 

210 

211 Raises: 

212 TextgridCrudException: if HTTP status code >= 400 

213 

214 Returns: 

215 Response: the response 

216 """ 

217 response.encoding = RESPONSE_ENCODING 

218 if not response.ok: 

219 error = TextgridCrudRequest._error_msg_from_html(response.text) 

220 message = '[Error] HTTP Code: ' + \ 

221 str(response.status_code) + ' - ' + error 

222 logger.warning(message) 

223 raise TextgridCrudException(message) 

224 return response 

225 

226 @staticmethod 

227 def _error_msg_from_html(html: str): 

228 """ Extract error message from html, as the text string for 

229 the error is in <meta name="description" content="error message"> 

230 

231 Args: 

232 html: an error response body from tgcrud 

233 

234 Returns: 

235 Response: the content of the meta[name=description] tag 

236 """ 

237 # 

238 soup = BeautifulSoup(html, 'html.parser') 

239 metatag = soup.select_one('meta[name="description"]') 

240 if metatag != None: 

241 msg = metatag['content'] 

242 else: 

243 msg = html[0:255] 

244 return msg 

245 

246 @staticmethod 

247 def _prepare_multipart(metadata, data=None): 

248 """Create a streaming multipart object. 

249 Monitor the upload progress if log level is DEBUG. 

250 

251 See also: https://toolbelt.readthedocs.io/en/latest/uploading-data.html 

252 

253 Args: 

254 metadata ([type]): the metadata 

255 data ([type]): the data 

256 

257 Returns: 

258 [MultipartEncoder]: Multipart containing data and metadata 

259 """ 

260 fields = { 

261 'tgObjectMetadata': ('tgObjectMetadata', metadata, 'text/xml') 

262 } 

263 if data: 

264 fields['tgObjectData'] = ( 

265 'tgObjectData', data, 'application/octet-stream') 

266 

267 encoder = MultipartEncoder(fields=fields) 

268 if logger.isEnabledFor(logging.DEBUG): 268 ↛ 269line 268 didn't jump to line 269, because the condition on line 268 was never true

269 return MultipartEncoderMonitor(encoder, TextgridCrudRequest._debug_monitor_callback) 

270 

271 return encoder 

272 

273 @staticmethod 

274 def _debug_monitor_callback(monitor: MultipartEncoderMonitor): 

275 """Callback for _prepare_multipart. 

276 Helper to log upload progress for streaming multipart when log level is DEBUG. 

277 

278 Args: 

279 monitor (MultipartEncoderMonitor): the monitor 

280 """ 

281 logger.debug('[debug multipart upload] bytes read: %s ', 

282 monitor.bytes_read) 

283 

284 

285class TextgridCrudException(Exception): 

286 """Exception communicating with tgcrud""" 

287 

288 

289class TextgridCrud(TextgridCrudRequest): 

290 """Provide access to the Textgrid CRUD Service using a XML data binding """ 

291 

292 def __init__(self, config: TextgridConfig = TextgridConfig(), 

293 for_publication: bool = False) -> None: 

294 super().__init__(config, for_publication) 

295 

296 def create_resource(self, sid: str, project_id: str, 

297 data, metadata: MetadataContainerType) -> MetadataContainerType: 

298 """Create a TextGrid object 

299 

300 Args: 

301 sid (str): Session ID 

302 project_id (str): Project ID 

303 data ([type]): the data 

304 metadata (MetadataContainerType): the metadata 

305 

306 Raises: 

307 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

308 

309 Returns: 

310 MetadataContainerType: metadata for newly created object 

311 """ 

312 metadata_string = self._serializer.render(metadata) 

313 response = super().create_resource(sid, project_id, data, metadata_string) 

314 return self._parser.parse(BytesIO(response.content), MetadataContainerType) 

315 

316 def create_revision(self, sid: str, project_id: str, textgrid_uri: str, 

317 data, metadata: MetadataContainerType) -> MetadataContainerType: 

318 """Create a TextGrid object revision. 

319 

320 Args: 

321 sid (str): Session ID 

322 project_id (str): Project ID 

323 textgrid_uri (str): Textgrid URI 

324 data ([type]): the data 

325 metadata (MetadataContainerType): the metadata 

326 

327 Raises: 

328 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

329 

330 Returns: 

331 MetadataContainerType: metadata from newly created object revision 

332 """ 

333 metadata_string = self._serializer.render(metadata) 

334 response = super().create_revision(sid, project_id, textgrid_uri, data, metadata_string) 

335 return self._parser.parse(BytesIO(response.content), MetadataContainerType) 

336 

337 def read_metadata(self, textgrid_uri: str, sid: Optional[str] = None) -> MetadataContainerType: 

338 """Read Metadata 

339 

340 Args: 

341 textgrid_uri (str): Textgrid URI 

342 sid (Optional[str]): Session ID. Defaults to ''. 

343 

344 Raises: 

345 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

346 

347 Returns: 

348 MetadataContainerType: metadata for object 

349 """ 

350 response = super().read_metadata(textgrid_uri, sid) 

351 return self._parser.parse(BytesIO(response.content), MetadataContainerType) 

352 

353 def update_metadata(self, sid: str, textgrid_uri: str, 

354 metadata: MetadataContainerType) -> MetadataContainerType: 

355 """Update metadata for TextGrid object 

356 

357 Args: 

358 sid (str): Session ID 

359 textgrid_uri (str): Textgrid URI 

360 metadata (MetadataContainerType): the metadata 

361 

362 Returns: 

363 MetadataContainerType: updated metadata 

364 """ 

365 metadata_string = self._serializer.render(metadata) 

366 response = super().update_metadata(sid, textgrid_uri, metadata_string) 

367 return self._parser.parse(BytesIO(response.content), MetadataContainerType) 

368 

369 def update_resource(self, sid: str, textgrid_uri: str, 

370 data, metadata: MetadataContainerType, 

371 create_revision: bool = False) -> MetadataContainerType: 

372 """Update a TextGrid object 

373 

374 Args: 

375 sid (str): Session ID 

376 textgrid_uri (str): Textgrid URI 

377 data ([type]): the data 

378 metadata (MetadataContainerType): the metadata 

379 create_revision (bool): If `True`, create a new textgrid object revision. Default: `False` 

380 

381 Raises: 

382 TextgridCrudException: if HTTP status code >= 400 (# noqa: DAR402) 

383 

384 Returns: 

385 MetadataContainerType: updated metadata 

386 """ 

387 if create_revision: 

388 project_id = str(metadata.object_value.generic.generated.project.id) 

389 return self.create_revision(sid, project_id, textgrid_uri, data, metadata) 

390 

391 metadata_string = self._serializer.render(metadata) 

392 response = super().update_resource(sid, textgrid_uri, data, metadata_string) 

393 

394 return self._parser.parse(BytesIO(response.content), MetadataContainerType)