Passed
Pull Request — master (#38)
by Paolo
03:00
created

validation.tasks   B

Complexity

Total Complexity 50

Size/Duplication

Total Lines 551
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 50
eloc 241
dl 0
loc 551
rs 8.4
c 0
b 0
f 0

18 Methods

Rating   Name   Duplication   Size   Complexity  
A ValidateSubmission.has_warnings_in_rules() 0 4 1
A ValidateSubmission.__has_key_in_rules() 0 9 3
A ValidateSubmission.check_valid_statuses() 0 16 5
A ValidateSubmission.has_errors_in_rules() 0 4 1
A ValidateSubmission.__init__() 0 17 1
A ValidateSubmission.update_statuses() 0 27 2
A ValidateTask.submission_ready() 0 4 1
A ValidateTask.temporary_error_report() 0 23 1
A ValidateTask.send_message() 0 13 1
C ValidateTask.run() 0 103 10
A ValidateSubmission.validate_model() 0 33 4
C ValidateSubmission.mark_model() 0 50 9
A ValidateTask.ruleset_error_report() 0 26 1
A ValidateTask.__mark_submission() 0 8 1
A ValidateTask.__generic_error_report() 0 45 2
A ValidateTask.submission_fail() 0 6 1
A ValidateTask.on_failure() 0 12 1
B ValidateSubmission.create_validation_summary() 0 53 5

How to fix   Complexity   

Complexity

Complex classes like validation.tasks often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
15
from collections import Counter
16
from celery.utils.log import get_task_logger
17
18
from django.conf import settings
19
from django.core.mail import send_mass_mail
20
21
from common.constants import (
22
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES, KNOWN_STATUSES)
23
from validation.helpers import construct_validation_message
24
from image.celery import app as celery_app, MyTask
25
from image_app.helpers import get_admin_emails
26
from image_app.models import Submission, Sample, Animal
27
from submissions.helpers import send_message
28
from validation.models import ValidationSummary
29
30
from .models import ValidationResult as ValidationResultModel
31
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
32
33
# Get an instance of a logger
34
logger = get_task_logger(__name__)
35
36
# get a dictionary from status name (ie {0: 'Waiting'})
37
key2status = dict([x.value for x in STATUSES])
38
39
40
# A class to deal with validation errors
41
class ValidationError(Exception):
42
    pass
43
44
45
class ValidateSubmission(object):
46
    """
47
    An helper class for submission task, useful to pass parameters like
48
    submission data between tasks"""
49
50
    # define my class attributes
51
    def __init__(self, submission_obj, ruleset):
52
        # track submission object
53
        self.submission_obj = submission_obj
54
55
        # track ruleset
56
        self.ruleset = ruleset
57
58
        # collect all unique messages for samples and animals
59
        self.messages_animals = Counter()
60
        self.messages_samples = Counter()
61
62
        # track global statuses for animals and samples
63
        # Don't set keys: if you take a key which doesn't exists, you will
64
        # get 0 instead of key errors. This is how Counter differ from a
65
        # default dictionary object
66
        self.statuses_animals = Counter()
67
        self.statuses_samples = Counter()
68
69
    def check_valid_statuses(self):
70
        """Check if validation return with an unsupported status message"""
71
72
        # test for keys in model_statuses
73
        for key in self.statuses_animals.keys():
74
            if key not in KNOWN_STATUSES:
75
                logger.error("Unsupported status '%s' from validation" % key)
76
                return False
77
78
        for key in self.statuses_samples.keys():
79
            if key not in KNOWN_STATUSES:
80
                logger.error("Unsupported status '%s' from validation" % key)
81
                return False
82
83
        # if I arrive here, all validation statuses are handled
84
        return True
85
86
    def __has_key_in_rules(self, key):
87
        """Generic function to test errors in validation rules"""
88
89
        if (self.statuses_animals[key] > 0 or
90
                self.statuses_samples[key] > 0):
91
            return True
92
93
        else:
94
            return False
95
96
    def has_errors_in_rules(self):
97
        "Return True if there is any errors in validation rules"""
98
99
        return self.__has_key_in_rules('Error')
100
101
    def has_warnings_in_rules(self):
102
        "Return True if there is any warnings in validation rules"""
103
104
        return self.__has_key_in_rules('Warning')
105
106
    def validate_model(self, model):
107
        logger.debug("Validating %s" % (model))
108
109
        # thsi could be animal or sample
110
        if isinstance(model, Sample):
111
            model_statuses = self.statuses_samples
112
113
        elif isinstance(model, Animal):
114
            model_statuses = self.statuses_animals
115
116
        # get data in biosample format
117
        data = model.to_biosample()
118
119
        # input is a list object
120
        usi_result = self.ruleset.check_usi_structure([data])
121
122
        # if I have errors here, JSON isn't valid: this is not an error
123
        # on user's data but on InjectTool itself
124
        if usi_result.get_overall_status() != 'Pass':
125
            # update statuses (update counters), mark model and return
126
            self.update_statuses(model_statuses, model, usi_result)
0 ignored issues
show
introduced by
The variable model_statuses does not seem to be defined for all execution paths.
Loading history...
127
128
            # It make no sense continue validation since JSON is wrong
129
            return
130
131
        # no check_duplicates: it checks against alias (that is a pk)
132
        # HINT: improve check_duplicates or implement database constraints
133
134
        # check against image metadata
135
        ruleset_result = self.ruleset.validate(data)
136
137
        # update status and track data in a overall variable
138
        self.update_statuses(model_statuses, model, ruleset_result)
139
140
    # inspired from validation.deal_with_validation_results
141
    def update_statuses(self, model_statuses, model, result):
142
        """
143
        Update validation summary counter and then mark model with an
144
        appropriate status (READY for Pass and Warning, NEED_REVISION for
145
        the remaining statuses)
146
147
        Args:
148
            model_statuses (Counter): a counter object for animal or sample
149
            validation statuese
150
            model (Sample/Animal): a Sample or Animal object
151
            result (ValidationResultRecord): a validation result for a record
152
        """
153
154
        # get overall status (ie Pass, Error)
155
        overall = result.get_overall_status()
156
157
        # set model as valid even if has some warnings
158
        if overall in ["Pass", "Warning"]:
159
            self.mark_model(model, result, READY)
160
161
        else:
162
            model_statuses.update(['Issues'])
163
            self.mark_model(model, result, NEED_REVISION)
164
165
        # update a collections.Counter objects by key
166
        model_statuses.update({overall})
167
        model_statuses.update(['Known'])
168
169
    def mark_model(self, model, result, status):
170
        """Set status to a model and instantiate a ValidationResult obj"""
171
172
        messages = result.get_messages()
173
174
        # get comparable messages for batch update
175
        comparable_messages = list()
176
        for result_set in result.result_set:
177
            comparable_messages.append(result_set.get_comparable_str())
178
        overall_status = result.get_overall_status()
179
180
        # Save all messages for validation summary
181
        if isinstance(model, Sample):
182
            for message in comparable_messages:
183
                # messages_samples iss a counter object
184
                self.messages_samples.update({message})
185
186
        # is as an animal object
187
        elif isinstance(model, Animal):
188
            for message in comparable_messages:
189
                self.messages_animals.update({message})
190
191
        # get a validation result model or create a new one
192
        if hasattr(model.name, 'validationresult'):
193
            validationresult = model.name.validationresult
194
195
        else:
196
            validationresult = ValidationResultModel()
197
            model.name.validationresult = validationresult
198
199
        # setting valdiationtool results and save
200
        validationresult.messages = messages
201
        validationresult.status = overall_status
202
        validationresult.save()
203
204
        # ok, don't update Name statuses for submitted objects which
205
        # already are in biosamples and pass validation
206
        if model.name.status == COMPLETED and status == READY:
207
            logger.debug(
208
                "Ignoring %s: status was '%s' and validation is OK" % (
209
                    model, key2status[model.name.status]))
210
211
        else:
212
            logger.debug(
213
                "Marking %s with '%s' status (%s)" % (
214
                    model, key2status[status], messages))
215
216
            # update model status and save
217
            model.name.status = status
218
            model.name.save()
219
220
    def create_validation_summary(self):
221
        """
222
        This function will create ValidationSummary object that will be used
223
        on validation_summary view
224
        """
225
226
        for model_type in ['animal', 'sample']:
227
            summary_obj, created = ValidationSummary.objects.get_or_create(
228
                submission=self.submission_obj, type=model_type)
229
230
            if created:
231
                logger.debug(
232
                    "Created %s validationSummary for %s" % (
233
                        model_type, self.submission_obj))
234
235
            # reset all_count
236
            summary_obj.reset_all_count()
237
238
            if model_type == 'animal':
239
                messages = self.messages_animals
240
                model_statuses = self.statuses_animals
241
242
            # Im cycling with animal and sample type
243
            else:
244
                messages = self.messages_samples
245
                model_statuses = self.statuses_samples
246
247
            summary_obj.submission = self.submission_obj
248
249
            # they are counter object, so no Keyerror and returns 0
250
            summary_obj.pass_count = model_statuses['Pass']
251
            summary_obj.warning_count = model_statuses['Warning']
252
            summary_obj.error_count = model_statuses['Error']
253
            summary_obj.issues_count = model_statuses['Issues']
254
            summary_obj.validation_known_count = model_statuses['Known']
255
256
            validation_messages = list()
257
258
            for message, count in messages.items():
259
                validation_messages.append({
260
                    'message': message,
261
                    'count': count
262
                })
263
264
            summary_obj.messages = validation_messages
265
            summary_obj.type = model_type
266
            summary_obj.save()
267
268
        logger.debug(
269
            "Results for submission %s: animals - %s, samples - %s" % (
270
                self.submission_obj,
271
                dict(self.statuses_animals),
272
                dict(self.statuses_samples))
273
        )
274
275
276
class ValidateTask(MyTask):
277
    name = "Validate Submission"
278
    description = """Validate submission data against IMAGE rules"""
279
280
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
281
    # A task is not instantiated for every request, but is registered in
282
    # the task registry as a global instance. This means that the __init__
283
    # constructor will only be called once per process, and that the
284
    # task class is semantically closer to an Actor. if you have a task and
285
    # you route every request to the same process, then it will keep state
286
    # between requests. This can also be useful to cache resources, For
287
    # example, a base Task class that caches a database connection
288
289
    # extract a generic send_message for all modules which need it
290
    def send_message(self, submission_obj):
291
        """
292
        Update submission.status and submission message using django
293
        channels
294
295
        Args:
296
            submission_obj (image_app.models.Submission): an UID submission
297
            object
298
        """
299
300
        send_message(
301
            submission_obj,
302
            validation_message=construct_validation_message(submission_obj))
303
304
    def __generic_error_report(
305
            self, submission_obj, status, message, notify_admins=False):
306
        """
307
        Generic report for updating submission objects and send email after
308
        an exception is called
309
310
        Args:
311
            submission_obj (image_app.models.Submission): an UID submission
312
            object
313
            status (int): a :py:class:`common.constants.STATUSES` object
314
            message (str): a text object
315
            notify_admins (bool): send mail to the admins or not
316
        """
317
318
        # mark submission with its status
319
        submission_obj.status = status
320
        submission_obj.message = message
321
        submission_obj.save()
322
323
        self.send_message(submission_obj)
324
325
        # get exception info
326
        einfo = traceback.format_exc()
327
328
        # send a mail to the user with the stacktrace (einfo)
329
        email_subject = "Error in IMAGE Validation: %s" % (message)
330
        email_message = (
331
            "Something goes wrong with validation. Please report "
332
            "this to InjectTool team\n\n %s" % str(einfo))
333
334
        submission_obj.owner.email_user(
335
            email_subject,
336
            email_message,
337
        )
338
339
        # TODO: should this be a common.helpers method?
340
        if notify_admins:
341
            # submit mail to admins
342
            datatuple = (
343
                email_subject,
344
                email_message,
345
                settings.DEFAULT_FROM_EMAIL,
346
                get_admin_emails())
347
348
            send_mass_mail((datatuple, ))
349
350
    # Ovverride default on failure method
351
    # This is not a failed validation for a wrong value, this is an
352
    # error in task that mean an error in coding
353
    def on_failure(self, exc, task_id, args, kwargs, einfo):
354
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
355
356
        # define message
357
        message = "Unknown error in validation - %s" % str(exc)
358
359
        # get submissio object
360
        submission_obj = Submission.objects.get(pk=args[0])
361
362
        # call generic report which update submission and send email
363
        self.__generic_error_report(
364
            submission_obj, ERROR, message, notify_admins=True)
365
366
        # returns None: this task will have the ERROR status
367
368
    # TODO: define a method to inform user for error in validation (Task run
369
    # with success but errors in data)
370
371
    def temporary_error_report(self, exc, submission_obj):
372
        """
373
        Deal with known issues in validation task. Notify the user using
374
        email and set status as READY in order to recall this task
375
376
        Args:
377
            exc (Exception): an py:exc`Exception` object
378
            submission_obj (image_app.models.Submission): an UID submission
379
            object
380
381
        Return
382
            str: "success" since this task is correctly managed
383
        """
384
385
        logger.error("Error in validation: %s" % exc)
386
387
        message = "Errors in EBI API endpoints. Please try again later"
388
        logger.error(message)
389
390
        # call generic report which update submission and send email
391
        self.__generic_error_report(submission_obj, LOADED, message)
392
393
        return "success"
394
395
    def ruleset_error_report(self, exc, submission_obj):
396
        """
397
        Deal with ruleset issue in validation task. Notify the user using
398
        email and set status as ERROR, since he can't do anything without
399
        admin intervention
400
401
        Args:
402
            exc (Exception): an py:exc`Exception` object
403
            submission_obj (image_app.models.Submission): an UID submission
404
            object
405
406
        Return
407
            str: "success" since this task is correctly managed
408
        """
409
410
        logger.error("Error ruleset: %s" % exc)
411
412
        message = (
413
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
414
        logger.error(message)
415
416
        # call generic report which update submission and send email
417
        self.__generic_error_report(
418
            submission_obj, ERROR, message, notify_admins=True)
419
420
        return "success"
421
422
    def run(self, submission_id):
423
        """a function to perform validation steps"""
424
425
        logger.info("Validate Submission started")
426
427
        # get submissio object
428
        submission_obj = Submission.objects.get(pk=submission_id)
429
430
        # read rules when task starts. Model issues when starting
431
        # OntologyCache at start
432
        try:
433
            self.ruleset = MetaDataValidation()
434
435
        except OntologyCacheError as exc:
436
            return self.temporary_error_report(exc, submission_obj)
437
438
        except RulesetError as exc:
439
            return self.ruleset_error_report(exc, submission_obj)
440
441
        # get a submission data helper instance
442
        validate_submission = ValidateSubmission(submission_obj, self.ruleset)
443
444
        try:
445
            for animal in Animal.objects.filter(
446
                    name__submission=submission_obj).order_by('id'):
447
                validate_submission.validate_model(animal)
448
449
            for sample in Sample.objects.filter(
450
                    name__submission=submission_obj).order_by('id'):
451
                validate_submission.validate_model(sample)
452
453
        # TODO: errors in validation should raise custom exception
454
        except json.decoder.JSONDecodeError as exc:
455
            return self.temporary_error_report(exc, submission_obj)
456
457
        except Exception as exc:
458
            raise self.retry(exc=exc)
459
460
        # if error messages changes in IMAGE-ValidationTool, all this
461
        # stuff isn't valid and I throw an exception
462
463
        if not validate_submission.check_valid_statuses():
464
            message = (
465
                "Unsupported validation status for submission %s" % (
466
                    submission_obj))
467
468
            # debug: print error in log
469
            logger.error(message)
470
471
            # create validation summary
472
            validate_submission.create_validation_summary()
473
474
            # mark submission with ERROR (this is not related to user data)
475
            # calling the appropriate method passing ERROR as status
476
            self.submission_fail(submission_obj, message, status=ERROR)
477
478
            # raise an exception since is an InjectTool issue
479
            raise ValidationError(message)
480
481
        # set a proper value for status (READY or NEED_REVISION)
482
        # If I will found any error or warning, I will
483
        # return a message and I will set NEED_REVISION
484
        elif validate_submission.has_errors_in_rules():
485
            # create validation summary
486
            validate_submission.create_validation_summary()
487
488
            message = (
489
                "Error in metadata. Need revisions before submit")
490
491
            # mark submission with NEED_REVISION
492
            self.submission_fail(submission_obj, message)
493
494
            logger.warning(
495
                "Error in metadata for submission %s" % (submission_obj))
496
497
        # WOW: I can submit those data
498
        elif validate_submission.has_warnings_in_rules():
499
            # create validation summary
500
            validate_submission.create_validation_summary()
501
502
            message = "Submission validated with some warnings"
503
504
            # mark submission with READY status
505
            self.submission_ready(submission_obj, message)
506
507
            logger.info(
508
                "Submission %s validated with some warning" % (submission_obj))
509
510
        else:
511
            # create validation summary
512
            validate_submission.create_validation_summary()
513
514
            message = "Submission validated with success"
515
516
            # mark submission with READY status
517
            self.submission_ready(submission_obj, message)
518
519
            logger.info(
520
                "Submission %s validated with success" % (submission_obj))
521
522
        logger.info("Validate Submission completed")
523
524
        return "success"
525
526
    def __mark_submission(self, submission_obj, message, status):
527
        """Mark submission with status and message"""
528
529
        submission_obj.status = status
530
        submission_obj.message = message
531
        submission_obj.save()
532
533
        self.send_message(submission_obj)
534
535
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
536
        """Mark a submission with NEED_REVISION status"""
537
538
        # ovverride message
539
        message = ("Validation got errors: %s" % (message))
540
        self.__mark_submission(submission_obj, message, status)
541
542
    def submission_ready(self, submission_obj, message):
543
        """Mark a submission with READY status"""
544
545
        self.__mark_submission(submission_obj, message, READY)
546
547
548
# register explicitly tasks
549
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
550
celery_app.tasks.register(ValidateTask)
551