Completed
Push — master ( 2c16e2...2c16e2 )
by Paolo
13s queued 11s
created

validation.tasks.ValidateTask.on_failure()   A

Complexity

Conditions 1

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 12
rs 10
c 0
b 0
f 0
cc 1
nop 6
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
15
from collections import Counter, defaultdict
16
from celery.utils.log import get_task_logger
17
18
from common.constants import (
19
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES, KNOWN_STATUSES)
20
from common.helpers import send_mail_to_admins
21
from common.tasks import BaseTask, NotifyAdminTaskMixin
22
from image.celery import app as celery_app
23
from image_app.models import Sample, Animal
24
from submissions.tasks import SubmissionTaskMixin
25
from validation.models import ValidationSummary
26
27
from .models import ValidationResult as ValidationResultModel
28
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
29
30
# Get an instance of a logger
31
logger = get_task_logger(__name__)
32
33
# get a dictionary from status name (ie {0: 'Waiting'})
34
key2status = dict([x.value for x in STATUSES])
35
36
37
# A class to deal with validation errors
38
class ValidationError(Exception):
39
    pass
40
41
42
class ValidateSubmission(object):
43
    """
44
    An helper class for submission task, useful to pass parameters like
45
    submission data between tasks"""
46
47
    # define my class attributes
48
    def __init__(self, submission_obj, ruleset):
49
        # track submission object
50
        self.submission_obj = submission_obj
51
52
        # track ruleset
53
        self.ruleset = ruleset
54
55
        # collect all unique messages for samples and animals
56
        self.animals_messages = defaultdict(list)
57
        self.samples_messages = defaultdict(list)
58
59
        self.animals_offending_columns = dict()
60
        self.samples_offending_columns = dict()
61
62
        # track global statuses for animals and samples
63
        # Don't set keys: if you take a key which doesn't exists, you will
64
        # get 0 instead of key errors. This is how Counter differ from a
65
        # default dictionary object
66
        self.animals_statuses = Counter()
67
        self.samples_statuses = Counter()
68
69
    def check_valid_statuses(self):
70
        """Check if validation return with an unsupported status message"""
71
72
        # test for keys in model_statuses
73
        for key in self.animals_statuses.keys():
74
            if key not in KNOWN_STATUSES:
75
                logger.error("Unsupported status '%s' from validation" % key)
76
                return False
77
78
        for key in self.samples_statuses.keys():
79
            if key not in KNOWN_STATUSES:
80
                logger.error("Unsupported status '%s' from validation" % key)
81
                return False
82
83
        # if I arrive here, all validation statuses are handled
84
        return True
85
86
    def __has_key_in_rules(self, key):
87
        """Generic function to test errors in validation rules"""
88
89
        if (self.animals_statuses[key] > 0 or
90
                self.samples_statuses[key] > 0):
91
            return True
92
93
        else:
94
            return False
95
96
    def has_errors_in_rules(self):
97
        "Return True if there is any errors in validation rules"""
98
99
        return self.__has_key_in_rules('Error')
100
101
    def has_warnings_in_rules(self):
102
        "Return True if there is any warnings in validation rules"""
103
104
        return self.__has_key_in_rules('Warning')
105
106
    def validate_model(self, model):
107
        logger.debug("Validating %s" % (model))
108
109
        # thsi could be animal or sample
110
        if isinstance(model, Sample):
111
            model_statuses = self.samples_statuses
112
113
        elif isinstance(model, Animal):
114
            model_statuses = self.animals_statuses
115
116
        # get data in biosample format
117
        data = model.to_biosample()
118
119
        # input is a list object
120
        usi_result = self.ruleset.check_usi_structure([data])
121
122
        # if I have errors here, JSON isn't valid: this is not an error
123
        # on user's data but on InjectTool itself
124
        if usi_result.get_overall_status() != 'Pass':
125
            # update statuses (update counters), mark model and return
126
            self.update_statuses(model_statuses, model, usi_result)
0 ignored issues
show
introduced by
The variable model_statuses does not seem to be defined for all execution paths.
Loading history...
127
128
            # It make no sense continue validation since JSON is wrong
129
            return
130
131
        # no check_duplicates: it checks against alias (that is a pk)
132
        # HINT: improve check_duplicates or implement database constraints
133
134
        # check against image metadata
135
        ruleset_result = self.ruleset.validate(data)
136
137
        # update status and track data in a overall variable
138
        self.update_statuses(model_statuses, model, ruleset_result)
139
140
    # inspired from validation.deal_with_validation_results
141
    def update_statuses(self, model_statuses, model, result):
142
        """
143
        Update validation summary counter and then mark model with an
144
        appropriate status (READY for Pass and Warning, NEED_REVISION for
145
        the remaining statuses)
146
147
        Args:
148
            model_statuses (Counter): a counter object for animal or sample
149
            validation statuese
150
            model (Sample/Animal): a Sample or Animal object
151
            result (ValidationResultRecord): a validation result for a record
152
        """
153
154
        # get overall status (ie Pass, Error)
155
        overall = result.get_overall_status()
156
157
        # set model as valid even if has some warnings
158
        if overall in ["Pass", "Warning"]:
159
            self.mark_model(model, result, READY)
160
161
        else:
162
            model_statuses.update(['Issues'])
163
            self.mark_model(model, result, NEED_REVISION)
164
165
        # update a collections.Counter objects by key
166
        model_statuses.update({overall})
167
        model_statuses.update(['Known'])
168
169
    def mark_model(self, model, result, status):
170
        """Set status to a model and instantiate a ValidationResult obj"""
171
172
        messages = result.get_messages()
173
174
        # get comparable messages for batch update
175
        comparable_messages = list()
176
        for result_set in result.result_set:
177
            comparable_messages.append({
178
                'message': result_set.get_comparable_str(),
179
                'offending_column': result_set.get_field_name()
180
            })
181
        overall_status = result.get_overall_status()
182
183
        # Save all messages for validation summary
184
        if isinstance(model, Sample):
185
            for message in comparable_messages:
186
                # samples_messages is a counter object
187
                self.samples_messages[message['message']].append(model.pk)
188
                self.samples_offending_columns[message['message']] = \
189
                    message['offending_column']
190
191
        # is as an animal object
192
        elif isinstance(model, Animal):
193
            for message in comparable_messages:
194
                self.animals_messages[message['message']].append(model.pk)
195
                self.animals_offending_columns[message['message']] = \
196
                    message['offending_column']
197
198
        # get a validation result model or create a new one
199
        if hasattr(model.name, 'validationresult'):
200
            validationresult = model.name.validationresult
201
202
        else:
203
            validationresult = ValidationResultModel()
204
            model.name.validationresult = validationresult
205
206
        # setting valdiationtool results and save
207
        validationresult.messages = messages
208
        validationresult.status = overall_status
209
        validationresult.save()
210
211
        # ok, don't update Name statuses for submitted objects which
212
        # already are in biosamples and pass validation
213
        if model.name.status == COMPLETED and status == READY:
214
            logger.debug(
215
                "Ignoring %s: status was '%s' and validation is OK" % (
216
                    model, key2status[model.name.status]))
217
218
        else:
219
            logger.debug(
220
                "Marking %s with '%s' status (%s)" % (
221
                    model, key2status[status], messages))
222
223
            # update model status and save
224
            model.name.status = status
225
            model.name.save()
226
227
    def create_validation_summary(self):
228
        """
229
        This function will create ValidationSummary object that will be used
230
        on validation_summary view
231
        """
232
233
        for model_type in ['animal', 'sample']:
234
            summary_obj, created = ValidationSummary.objects.get_or_create(
235
                submission=self.submission_obj, type=model_type)
236
237
            if created:
238
                logger.debug(
239
                    "Created %s validationSummary for %s" % (
240
                        model_type, self.submission_obj))
241
242
            # reset all_count
243
            summary_obj.reset_all_count()
244
245
            if model_type == 'animal':
246
                messages = self.animals_messages
247
                model_statuses = self.animals_statuses
248
                offending_column = self.animals_offending_columns
249
250
            # Im cycling with animal and sample type
251
            else:
252
                messages = self.samples_messages
253
                model_statuses = self.samples_statuses
254
                offending_column = self.samples_offending_columns
255
256
            summary_obj.submission = self.submission_obj
257
258
            # they are counter object, so no Keyerror and returns 0
259
            summary_obj.pass_count = model_statuses['Pass']
260
            summary_obj.warning_count = model_statuses['Warning']
261
            summary_obj.error_count = model_statuses['Error']
262
            summary_obj.issues_count = model_statuses['Issues']
263
            summary_obj.validation_known_count = model_statuses['Known']
264
265
            validation_messages = list()
266
267
            for message, ids in messages.items():
268
                validation_messages.append({
269
                    'message': message,
270
                    'count': len(ids),
271
                    'ids': ids,
272
                    'offending_column': offending_column[message]
273
                })
274
275
            summary_obj.messages = validation_messages
276
            summary_obj.type = model_type
277
            summary_obj.save()
278
279
        logger.debug(
280
            "Results for submission %s: animals - %s, samples - %s" % (
281
                self.submission_obj,
282
                dict(self.animals_statuses),
283
                dict(self.samples_statuses))
284
        )
285
286
287
class ValidateTask(SubmissionTaskMixin, NotifyAdminTaskMixin, BaseTask):
288
    name = "Validate Submission"
289
    description = """Validate submission data against IMAGE rules"""
290
    action = "validation"
291
292
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
293
    # A task is not instantiated for every request, but is registered in
294
    # the task registry as a global instance. This means that the __init__
295
    # constructor will only be called once per process, and that the
296
    # task class is semantically closer to an Actor. if you have a task and
297
    # you route every request to the same process, then it will keep state
298
    # between requests. This can also be useful to cache resources, For
299
    # example, a base Task class that caches a database connection
300
301
    # override SubmissionTaskMixin update_submission_status
302
    def update_submission_status(
303
            self, submission_obj, status, message, construct_message=True):
304
        """Mark submission with status, then send message
305
306
        Args:
307
            submission_obj (image_app.models.Submission): an UID submission
308
            object
309
            status (int): a :py:class:`common.constants.STATUSES` value
310
            message (str): the message to send
311
            construct_message (bool): construct validation message or not
312
        """
313
314
        super().update_submission_status(
315
            submission_obj, status, message, construct_message)
316
317
    def __generic_error_report(
318
            self, submission_obj, status, message, notify_admins=False):
319
        """
320
        Generic report for updating submission objects and send email after
321
        an exception is called
322
323
        Args:
324
            submission_obj (image_app.models.Submission): an UID submission
325
            object
326
            status (int): a :py:class:`common.constants.STATUSES` object
327
            message (str): a text object
328
            notify_admins (bool): send mail to the admins or not
329
        """
330
331
        # mark submission with its status
332
        self.update_submission_status(
333
            submission_obj,
334
            status,
335
            message
336
        )
337
338
        # get exception info
339
        einfo = traceback.format_exc()
340
341
        # send a mail to the user with the stacktrace (einfo)
342
        email_subject = "Error in IMAGE Validation: %s" % (message)
343
        email_message = (
344
            "Something goes wrong with validation. Please report "
345
            "this to InjectTool team\n\n %s" % str(einfo))
346
347
        self.mail_to_owner(submission_obj, email_subject, email_message)
348
349
        # this is a common.helpers method that should be used when needed
350
        if notify_admins:
351
            # submit mail to admins
352
            send_mail_to_admins(email_subject, email_message)
353
354
    # TODO: define a method to inform user for error in validation (Task run
355
    # with success but errors in data)
356
357
    def temporary_error_report(self, exc, submission_obj):
358
        """
359
        Deal with known issues in validation task. Notify the user using
360
        email and set status as READY in order to recall this task
361
362
        Args:
363
            exc (Exception): an py:exc`Exception` object
364
            submission_obj (image_app.models.Submission): an UID submission
365
            object
366
367
        Return
368
            str: "success" since this task is correctly managed
369
        """
370
371
        logger.error("Error in validation: %s" % exc)
372
373
        message = "Errors in EBI API endpoints. Please try again later"
374
        logger.error(message)
375
376
        # call generic report which update submission and send email
377
        self.__generic_error_report(submission_obj, LOADED, message)
378
379
        return "success"
380
381
    def ruleset_error_report(self, exc, submission_obj):
382
        """
383
        Deal with ruleset issue in validation task. Notify the user using
384
        email and set status as ERROR, since he can't do anything without
385
        admin intervention
386
387
        Args:
388
            exc (Exception): an py:exc`Exception` object
389
            submission_obj (image_app.models.Submission): an UID submission
390
            object
391
392
        Return
393
            str: "success" since this task is correctly managed
394
        """
395
396
        logger.error("Error ruleset: %s" % exc)
397
398
        message = (
399
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
400
        logger.error(message)
401
402
        # call generic report which update submission and send email
403
        self.__generic_error_report(
404
            submission_obj, ERROR, message, notify_admins=True)
405
406
        return "success"
407
408
    def run(self, submission_id):
409
        """a function to perform validation steps"""
410
411
        logger.info("Validate Submission started")
412
413
        # get submissio object
414
        submission_obj = self.get_uid_submission(submission_id)
415
416
        # read rules when task starts. Model issues when starting
417
        # OntologyCache at start
418
        try:
419
            self.ruleset = MetaDataValidation()
420
421
        except OntologyCacheError as exc:
422
            return self.temporary_error_report(exc, submission_obj)
423
424
        except RulesetError as exc:
425
            return self.ruleset_error_report(exc, submission_obj)
426
427
        # get a submission data helper instance
428
        validate_submission = ValidateSubmission(submission_obj, self.ruleset)
429
430
        try:
431
            for animal in Animal.objects.filter(
432
                    name__submission=submission_obj).order_by('id'):
433
                validate_submission.validate_model(animal)
434
435
            for sample in Sample.objects.filter(
436
                    name__submission=submission_obj).order_by('id'):
437
                validate_submission.validate_model(sample)
438
439
        # TODO: errors in validation should raise custom exception
440
        except json.decoder.JSONDecodeError as exc:
441
            return self.temporary_error_report(exc, submission_obj)
442
443
        except Exception as exc:
444
            raise self.retry(exc=exc)
445
446
        # if error messages changes in IMAGE-ValidationTool, all this
447
        # stuff isn't valid and I throw an exception
448
449
        if not validate_submission.check_valid_statuses():
450
            message = (
451
                "Unsupported validation status for submission %s" % (
452
                    submission_obj))
453
454
            # debug: print error in log
455
            logger.error(message)
456
457
            # create validation summary
458
            validate_submission.create_validation_summary()
459
460
            # mark submission with ERROR (this is not related to user data)
461
            # calling the appropriate method passing ERROR as status
462
            self.submission_fail(submission_obj, message, status=ERROR)
463
464
            # raise an exception since is an InjectTool issue
465
            raise ValidationError(message)
466
467
        # set a proper value for status (READY or NEED_REVISION)
468
        # If I will found any error or warning, I will
469
        # return a message and I will set NEED_REVISION
470
        elif validate_submission.has_errors_in_rules():
471
            # create validation summary
472
            validate_submission.create_validation_summary()
473
474
            message = (
475
                "Error in metadata. Need revisions before submit")
476
477
            # mark submission with NEED_REVISION
478
            self.submission_fail(submission_obj, message)
479
480
            logger.warning(
481
                "Error in metadata for submission %s" % (submission_obj))
482
483
        # WOW: I can submit those data
484
        elif validate_submission.has_warnings_in_rules():
485
            # create validation summary
486
            validate_submission.create_validation_summary()
487
488
            message = "Submission validated with some warnings"
489
490
            # mark submission with READY status
491
            self.submission_ready(submission_obj, message)
492
493
            logger.info(
494
                "Submission %s validated with some warning" % (submission_obj))
495
496
        else:
497
            # create validation summary
498
            validate_submission.create_validation_summary()
499
500
            message = "Submission validated with success"
501
502
            # mark submission with READY status
503
            self.submission_ready(submission_obj, message)
504
505
            logger.info(
506
                "Submission %s validated with success" % (submission_obj))
507
508
        logger.info("Validate Submission completed")
509
510
        return "success"
511
512
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
513
        """Mark a submission with NEED_REVISION status"""
514
515
        # ovverride message
516
        message = ("Validation got errors: %s" % (message))
517
        self.update_submission_status(submission_obj, status, message)
518
519
    def submission_ready(self, submission_obj, message):
520
        """Mark a submission with READY status"""
521
522
        self.update_submission_status(submission_obj, READY, message)
523
524
525
# register explicitly tasks
526
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
527
celery_app.tasks.register(ValidateTask)
528