1 | #!/usr/bin/env python3 |
||
2 | # -*- coding: utf-8 -*- |
||
3 | """ |
||
4 | Created on Fri Oct 5 11:22:33 2018 |
||
5 | |||
6 | @author: Paolo Cozzi <[email protected]> |
||
7 | |||
8 | Useful staff to deal with validation process |
||
9 | |||
10 | """ |
||
11 | |||
12 | import json |
||
13 | import traceback |
||
14 | |||
15 | from collections import Counter, defaultdict |
||
16 | from celery.utils.log import get_task_logger |
||
17 | |||
18 | from common.constants import ( |
||
19 | READY, ERROR, LOADED, NEED_REVISION, COMPLETED, SUBMITTED, STATUSES, |
||
20 | KNOWN_STATUSES) |
||
21 | from common.helpers import send_mail_to_admins |
||
22 | from common.tasks import BaseTask, NotifyAdminTaskMixin |
||
23 | from image.celery import app as celery_app |
||
24 | from uid.models import Sample, Animal |
||
25 | from submissions.tasks import SubmissionTaskMixin |
||
26 | from validation.models import ValidationSummary |
||
27 | |||
28 | from .models import ValidationResult as ValidationResultModel |
||
29 | from .helpers import MetaDataValidation, OntologyCacheError, RulesetError |
||
30 | |||
31 | # Get an instance of a logger |
||
32 | logger = get_task_logger(__name__) |
||
33 | |||
34 | # get a dictionary from status name (ie {0: 'Waiting'}) |
||
35 | key2status = dict([x.value for x in STATUSES]) |
||
36 | |||
37 | |||
38 | # A class to deal with validation errors |
||
39 | class ValidationError(Exception): |
||
40 | pass |
||
41 | |||
42 | |||
43 | class ValidateSubmission(object): |
||
44 | """ |
||
45 | An helper class for submission task, useful to pass parameters like |
||
46 | submission data between tasks""" |
||
47 | |||
48 | # define my class attributes |
||
49 | def __init__(self, submission_obj, ruleset): |
||
50 | # track submission object |
||
51 | self.submission_obj = submission_obj |
||
52 | |||
53 | # track ruleset |
||
54 | self.ruleset = ruleset |
||
55 | |||
56 | # collect all unique messages for samples and animals |
||
57 | self.animals_messages = defaultdict(list) |
||
58 | self.samples_messages = defaultdict(list) |
||
59 | |||
60 | self.animals_offending_columns = dict() |
||
61 | self.samples_offending_columns = dict() |
||
62 | |||
63 | # track global statuses for animals and samples |
||
64 | # Don't set keys: if you take a key which doesn't exists, you will |
||
65 | # get 0 instead of key errors. This is how Counter differ from a |
||
66 | # default dictionary object |
||
67 | self.animals_statuses = Counter() |
||
68 | self.samples_statuses = Counter() |
||
69 | |||
70 | def check_valid_statuses(self): |
||
71 | """Check if validation return with an unsupported status message""" |
||
72 | |||
73 | # test for keys in model_statuses |
||
74 | for key in self.animals_statuses.keys(): |
||
75 | if key not in KNOWN_STATUSES: |
||
76 | logger.error("Unsupported status '%s' from validation" % key) |
||
77 | return False |
||
78 | |||
79 | for key in self.samples_statuses.keys(): |
||
80 | if key not in KNOWN_STATUSES: |
||
81 | logger.error("Unsupported status '%s' from validation" % key) |
||
82 | return False |
||
83 | |||
84 | # if I arrive here, all validation statuses are handled |
||
85 | return True |
||
86 | |||
87 | def __has_key_in_rules(self, key): |
||
88 | """Generic function to test errors in validation rules""" |
||
89 | |||
90 | if (self.animals_statuses[key] > 0 or |
||
91 | self.samples_statuses[key] > 0): |
||
92 | return True |
||
93 | |||
94 | else: |
||
95 | return False |
||
96 | |||
97 | def has_errors_in_rules(self): |
||
98 | "Return True if there is any errors in validation rules""" |
||
99 | |||
100 | return self.__has_key_in_rules('Error') |
||
101 | |||
102 | def has_warnings_in_rules(self): |
||
103 | "Return True if there is any warnings in validation rules""" |
||
104 | |||
105 | return self.__has_key_in_rules('Warning') |
||
106 | |||
107 | def validate_model(self, model): |
||
108 | logger.debug("Validating %s" % (model)) |
||
109 | |||
110 | # thsi could be animal or sample |
||
111 | if isinstance(model, Sample): |
||
112 | model_statuses = self.samples_statuses |
||
113 | |||
114 | elif isinstance(model, Animal): |
||
115 | model_statuses = self.animals_statuses |
||
116 | |||
117 | # get data in biosample format |
||
118 | data = model.to_biosample() |
||
119 | |||
120 | # TODO: remove this when IMAGE-metadata rules will support |
||
121 | # IMAGE submission id |
||
122 | del(data['attributes']['IMAGE submission id']) |
||
123 | |||
124 | # input is a list object |
||
125 | usi_result = self.ruleset.check_usi_structure([data]) |
||
126 | |||
127 | # if I have errors here, JSON isn't valid: this is not an error |
||
128 | # on user's data but on InjectTool itself |
||
129 | if usi_result.get_overall_status() != 'Pass': |
||
130 | # update statuses (update counters), mark model and return |
||
131 | self.update_statuses(model_statuses, model, usi_result) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
132 | |||
133 | # It make no sense continue validation since JSON is wrong |
||
134 | return |
||
135 | |||
136 | # no check_duplicates: it checks against alias (that is a pk) |
||
137 | # HINT: improve check_duplicates or implement database constraints |
||
138 | |||
139 | # check against image metadata |
||
140 | ruleset_result = self.ruleset.validate(data) |
||
141 | |||
142 | # update status and track data in a overall variable |
||
143 | self.update_statuses(model_statuses, model, ruleset_result) |
||
144 | |||
145 | # inspired from validation.deal_with_validation_results |
||
146 | def update_statuses(self, model_statuses, model, result): |
||
147 | """ |
||
148 | Update validation summary counter and then mark model with an |
||
149 | appropriate status (READY for Pass and Warning, NEED_REVISION for |
||
150 | the remaining statuses) |
||
151 | |||
152 | Args: |
||
153 | model_statuses (Counter): a counter object for animal or sample |
||
154 | validation statuese |
||
155 | model (Sample/Animal): a Sample or Animal object |
||
156 | result (ValidationResultRecord): a validation result for a record |
||
157 | """ |
||
158 | |||
159 | # get overall status (ie Pass, Error) |
||
160 | overall = result.get_overall_status() |
||
161 | |||
162 | # set model as valid even if has some warnings |
||
163 | if overall in ["Pass", "Warning"]: |
||
164 | self.mark_model(model, result, READY) |
||
165 | |||
166 | else: |
||
167 | model_statuses.update(['Issues']) |
||
168 | self.mark_model(model, result, NEED_REVISION) |
||
169 | |||
170 | # update a collections.Counter objects by key |
||
171 | model_statuses.update({overall}) |
||
172 | model_statuses.update(['Known']) |
||
173 | |||
174 | def mark_model(self, model, result, status): |
||
175 | """Set status to a model and instantiate a ValidationResult obj""" |
||
176 | |||
177 | messages = result.get_messages() |
||
178 | |||
179 | # get comparable messages for batch update |
||
180 | comparable_messages = list() |
||
181 | for result_set in result.result_set: |
||
182 | comparable_messages.append({ |
||
183 | 'message': result_set.get_comparable_str(), |
||
184 | 'offending_column': result_set.get_field_name() |
||
185 | }) |
||
186 | overall_status = result.get_overall_status() |
||
187 | |||
188 | # Save all messages for validation summary |
||
189 | if isinstance(model, Sample): |
||
190 | for message in comparable_messages: |
||
191 | # samples_messages is a counter object |
||
192 | self.samples_messages[message['message']].append(model.pk) |
||
193 | self.samples_offending_columns[message['message']] = \ |
||
194 | message['offending_column'] |
||
195 | |||
196 | # is as an animal object |
||
197 | elif isinstance(model, Animal): |
||
198 | for message in comparable_messages: |
||
199 | self.animals_messages[message['message']].append(model.pk) |
||
200 | self.animals_offending_columns[message['message']] = \ |
||
201 | message['offending_column'] |
||
202 | |||
203 | # get a validation result model or create a new one |
||
204 | if model.validationresult: |
||
205 | validationresult = model.validationresult |
||
206 | |||
207 | else: |
||
208 | validationresult = ValidationResultModel() |
||
209 | model.validationresult = validationresult |
||
210 | |||
211 | # setting valdiationtool results and save |
||
212 | validationresult.messages = messages |
||
213 | validationresult.status = overall_status |
||
214 | validationresult.save() |
||
215 | |||
216 | # ok, don't update statuses for submitted objects which |
||
217 | # already are in biosamples and pass validation |
||
218 | if model.status in [COMPLETED, SUBMITTED] and status == READY: |
||
219 | logger.debug( |
||
220 | "Ignoring %s: status was '%s' and validation is OK" % ( |
||
221 | model, key2status[model.status])) |
||
222 | |||
223 | else: |
||
224 | logger.debug( |
||
225 | "Marking %s with '%s' status (%s)" % ( |
||
226 | model, key2status[status], messages)) |
||
227 | |||
228 | # update model status and save |
||
229 | model.status = status |
||
230 | model.save() |
||
231 | |||
232 | def create_validation_summary(self): |
||
233 | """ |
||
234 | This function will create ValidationSummary object that will be used |
||
235 | on validation_summary view |
||
236 | """ |
||
237 | |||
238 | for model_type in ['animal', 'sample']: |
||
239 | summary_obj, created = ValidationSummary.objects.get_or_create( |
||
240 | submission=self.submission_obj, type=model_type) |
||
241 | |||
242 | if created: |
||
243 | logger.debug( |
||
244 | "Created %s validationSummary for %s" % ( |
||
245 | model_type, self.submission_obj)) |
||
246 | |||
247 | # reset all_count |
||
248 | summary_obj.reset_all_count() |
||
249 | |||
250 | if model_type == 'animal': |
||
251 | messages = self.animals_messages |
||
252 | model_statuses = self.animals_statuses |
||
253 | offending_column = self.animals_offending_columns |
||
254 | |||
255 | # Im cycling with animal and sample type |
||
256 | else: |
||
257 | messages = self.samples_messages |
||
258 | model_statuses = self.samples_statuses |
||
259 | offending_column = self.samples_offending_columns |
||
260 | |||
261 | summary_obj.submission = self.submission_obj |
||
262 | |||
263 | # they are counter object, so no Keyerror and returns 0 |
||
264 | summary_obj.pass_count = model_statuses['Pass'] |
||
265 | summary_obj.warning_count = model_statuses['Warning'] |
||
266 | summary_obj.error_count = model_statuses['Error'] |
||
267 | summary_obj.issues_count = model_statuses['Issues'] |
||
268 | summary_obj.validation_known_count = model_statuses['Known'] |
||
269 | |||
270 | validation_messages = list() |
||
271 | |||
272 | for message, ids in messages.items(): |
||
273 | validation_messages.append({ |
||
274 | 'message': message, |
||
275 | 'count': len(ids), |
||
276 | 'ids': ids, |
||
277 | 'offending_column': offending_column[message] |
||
278 | }) |
||
279 | |||
280 | summary_obj.messages = validation_messages |
||
281 | summary_obj.type = model_type |
||
282 | summary_obj.save() |
||
283 | |||
284 | logger.debug( |
||
285 | "Results for submission %s: animals - %s, samples - %s" % ( |
||
286 | self.submission_obj, |
||
287 | dict(self.animals_statuses), |
||
288 | dict(self.samples_statuses)) |
||
289 | ) |
||
290 | |||
291 | |||
292 | class ValidateTask(SubmissionTaskMixin, NotifyAdminTaskMixin, BaseTask): |
||
293 | name = "Validate Submission" |
||
294 | description = """Validate submission data against IMAGE rules""" |
||
295 | action = "validation" |
||
296 | |||
297 | # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation |
||
298 | # A task is not instantiated for every request, but is registered in |
||
299 | # the task registry as a global instance. This means that the __init__ |
||
300 | # constructor will only be called once per process, and that the |
||
301 | # task class is semantically closer to an Actor. if you have a task and |
||
302 | # you route every request to the same process, then it will keep state |
||
303 | # between requests. This can also be useful to cache resources, For |
||
304 | # example, a base Task class that caches a database connection |
||
305 | |||
306 | # override SubmissionTaskMixin update_submission_status |
||
307 | def update_submission_status( |
||
308 | self, submission_obj, status, message, construct_message=True): |
||
309 | """Mark submission with status, then send message |
||
310 | |||
311 | Args: |
||
312 | submission_obj (uid.models.Submission): an UID submission |
||
313 | object |
||
314 | status (int): a :py:class:`common.constants.STATUSES` value |
||
315 | message (str): the message to send |
||
316 | construct_message (bool): construct validation message or not |
||
317 | """ |
||
318 | |||
319 | super().update_submission_status( |
||
320 | submission_obj, status, message, construct_message) |
||
321 | |||
322 | def __generic_error_report( |
||
323 | self, submission_obj, status, message, notify_admins=False): |
||
324 | """ |
||
325 | Generic report for updating submission objects and send email after |
||
326 | an exception is called |
||
327 | |||
328 | Args: |
||
329 | submission_obj (uid.models.Submission): an UID submission |
||
330 | object |
||
331 | status (int): a :py:class:`common.constants.STATUSES` object |
||
332 | message (str): a text object |
||
333 | notify_admins (bool): send mail to the admins or not |
||
334 | """ |
||
335 | |||
336 | # mark submission with its status |
||
337 | self.update_submission_status( |
||
338 | submission_obj, |
||
339 | status, |
||
340 | message |
||
341 | ) |
||
342 | |||
343 | # get exception info |
||
344 | einfo = traceback.format_exc() |
||
345 | |||
346 | # send a mail to the user with the stacktrace (einfo) |
||
347 | email_subject = "Error in IMAGE Validation: %s" % (message) |
||
348 | email_message = ( |
||
349 | "Something goes wrong with validation. Please report " |
||
350 | "this to InjectTool team\n\n %s" % str(einfo)) |
||
351 | |||
352 | self.mail_to_owner(submission_obj, email_subject, email_message) |
||
353 | |||
354 | # this is a common.helpers method that should be used when needed |
||
355 | if notify_admins: |
||
356 | # submit mail to admins |
||
357 | send_mail_to_admins(email_subject, email_message) |
||
358 | |||
359 | # TODO: define a method to inform user for error in validation (Task run |
||
360 | # with success but errors in data) |
||
361 | |||
362 | def temporary_error_report(self, exc, submission_obj): |
||
363 | """ |
||
364 | Deal with known issues in validation task. Notify the user using |
||
365 | email and set status as READY in order to recall this task |
||
366 | |||
367 | Args: |
||
368 | exc (Exception): an py:exc`Exception` object |
||
369 | submission_obj (uid.models.Submission): an UID submission |
||
370 | object |
||
371 | |||
372 | Return |
||
373 | str: "success" since this task is correctly managed |
||
374 | """ |
||
375 | |||
376 | logger.error("Error in validation: %s" % exc) |
||
377 | |||
378 | message = "Errors in EBI API endpoints. Please try again later" |
||
379 | logger.error(message) |
||
380 | |||
381 | # call generic report which update submission and send email |
||
382 | self.__generic_error_report(submission_obj, LOADED, message) |
||
383 | |||
384 | return "success" |
||
385 | |||
386 | def ruleset_error_report(self, exc, submission_obj): |
||
387 | """ |
||
388 | Deal with ruleset issue in validation task. Notify the user using |
||
389 | email and set status as ERROR, since he can't do anything without |
||
390 | admin intervention |
||
391 | |||
392 | Args: |
||
393 | exc (Exception): an py:exc`Exception` object |
||
394 | submission_obj (uid.models.Submission): an UID submission |
||
395 | object |
||
396 | |||
397 | Return |
||
398 | str: "success" since this task is correctly managed |
||
399 | """ |
||
400 | |||
401 | logger.error("Error ruleset: %s" % exc) |
||
402 | |||
403 | message = ( |
||
404 | "Error in IMAGE-metadata ruleset. Please inform InjectTool team") |
||
405 | logger.error(message) |
||
406 | |||
407 | # call generic report which update submission and send email |
||
408 | self.__generic_error_report( |
||
409 | submission_obj, ERROR, message, notify_admins=True) |
||
410 | |||
411 | return "success" |
||
412 | |||
413 | def run(self, submission_id): |
||
414 | """a function to perform validation steps""" |
||
415 | |||
416 | logger.info("Validate Submission started") |
||
417 | |||
418 | # get submissio object |
||
419 | submission_obj = self.get_uid_submission(submission_id) |
||
420 | |||
421 | # read rules when task starts. Model issues when starting |
||
422 | # OntologyCache at start |
||
423 | try: |
||
424 | self.ruleset = MetaDataValidation() |
||
425 | |||
426 | except OntologyCacheError as exc: |
||
427 | return self.temporary_error_report(exc, submission_obj) |
||
428 | |||
429 | except RulesetError as exc: |
||
430 | return self.ruleset_error_report(exc, submission_obj) |
||
431 | |||
432 | # get a submission data helper instance |
||
433 | validate_submission = ValidateSubmission(submission_obj, self.ruleset) |
||
434 | |||
435 | try: |
||
436 | for animal in Animal.objects.filter( |
||
437 | submission=submission_obj).order_by('id'): |
||
438 | validate_submission.validate_model(animal) |
||
439 | |||
440 | for sample in Sample.objects.filter( |
||
441 | submission=submission_obj).order_by('id'): |
||
442 | validate_submission.validate_model(sample) |
||
443 | |||
444 | # TODO: errors in validation should raise custom exception |
||
445 | except json.decoder.JSONDecodeError as exc: |
||
446 | return self.temporary_error_report(exc, submission_obj) |
||
447 | |||
448 | except Exception as exc: |
||
449 | raise self.retry(exc=exc) |
||
450 | |||
451 | # if error messages changes in IMAGE-ValidationTool, all this |
||
452 | # stuff isn't valid and I throw an exception |
||
453 | |||
454 | if not validate_submission.check_valid_statuses(): |
||
455 | message = ( |
||
456 | "Unsupported validation status for submission %s" % ( |
||
457 | submission_obj)) |
||
458 | |||
459 | # debug: print error in log |
||
460 | logger.error(message) |
||
461 | |||
462 | # create validation summary |
||
463 | validate_submission.create_validation_summary() |
||
464 | |||
465 | # mark submission with ERROR (this is not related to user data) |
||
466 | # calling the appropriate method passing ERROR as status |
||
467 | self.submission_fail(submission_obj, message, status=ERROR) |
||
468 | |||
469 | # raise an exception since is an InjectTool issue |
||
470 | raise ValidationError(message) |
||
471 | |||
472 | # set a proper value for status (READY or NEED_REVISION) |
||
473 | # If I will found any error or warning, I will |
||
474 | # return a message and I will set NEED_REVISION |
||
475 | elif validate_submission.has_errors_in_rules(): |
||
476 | # create validation summary |
||
477 | validate_submission.create_validation_summary() |
||
478 | |||
479 | message = ( |
||
480 | "Error in metadata. Need revisions before submit") |
||
481 | |||
482 | # mark submission with NEED_REVISION |
||
483 | self.submission_fail(submission_obj, message) |
||
484 | |||
485 | logger.warning( |
||
486 | "Error in metadata for submission %s" % (submission_obj)) |
||
487 | |||
488 | # WOW: I can submit those data |
||
489 | elif validate_submission.has_warnings_in_rules(): |
||
490 | # create validation summary |
||
491 | validate_submission.create_validation_summary() |
||
492 | |||
493 | message = "Submission validated with some warnings" |
||
494 | |||
495 | # mark submission with READY status |
||
496 | self.submission_ready(submission_obj, message) |
||
497 | |||
498 | logger.info( |
||
499 | "Submission %s validated with some warning" % (submission_obj)) |
||
500 | |||
501 | else: |
||
502 | # create validation summary |
||
503 | validate_submission.create_validation_summary() |
||
504 | |||
505 | message = "Submission validated with success" |
||
506 | |||
507 | # mark submission with READY status |
||
508 | self.submission_ready(submission_obj, message) |
||
509 | |||
510 | logger.info( |
||
511 | "Submission %s validated with success" % (submission_obj)) |
||
512 | |||
513 | logger.info("Validate Submission completed") |
||
514 | |||
515 | return "success" |
||
516 | |||
517 | def submission_fail(self, submission_obj, message, status=NEED_REVISION): |
||
518 | """Mark a submission with NEED_REVISION status""" |
||
519 | |||
520 | # ovverride message |
||
521 | message = ("Validation got errors: %s" % (message)) |
||
522 | self.update_submission_status(submission_obj, status, message) |
||
523 | |||
524 | def submission_ready(self, submission_obj, message): |
||
525 | """Mark a submission with READY status""" |
||
526 | |||
527 | self.update_submission_status(submission_obj, READY, message) |
||
528 | |||
529 | |||
530 | # register explicitly tasks |
||
531 | # https://github.com/celery/celery/issues/3744#issuecomment-271366923 |
||
532 | celery_app.tasks.register(ValidateTask) |
||
533 |