Completed
Branch master (206998)
by Paolo
04:19
created

validation.tasks.ValidateSubmission.mark_model()   C

Complexity

Conditions 10

Size

Total Lines 56
Code Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 32
dl 0
loc 56
rs 5.9999
c 0
b 0
f 0
cc 10
nop 4

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like validation.tasks.ValidateSubmission.mark_model() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
15
from collections import Counter
16
from celery.utils.log import get_task_logger
17
18
from django.conf import settings
19
from django.core.mail import send_mass_mail
20
21
from common.constants import (
22
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES, KNOWN_STATUSES)
23
from validation.helpers import construct_validation_message
24
from image.celery import app as celery_app, MyTask
25
from image_app.helpers import get_admin_emails
26
from image_app.models import Submission, Sample, Animal
27
from submissions.helpers import send_message
28
from validation.models import ValidationSummary
29
30
from .models import ValidationResult as ValidationResultModel
31
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
32
33
# Get an instance of a logger
34
logger = get_task_logger(__name__)
35
36
# get a dictionary from status name (ie {0: 'Waiting'})
37
key2status = dict([x.value for x in STATUSES])
38
39
40
# A class to deal with validation errors
41
class ValidationError(Exception):
42
    pass
43
44
45
class ValidateSubmission(object):
46
    """
47
    An helper class for submission task, useful to pass parameters like
48
    submission data between tasks"""
49
50
    # define my class attributes
51
    def __init__(self, submission_obj, ruleset):
52
        # track submission object
53
        self.submission_obj = submission_obj
54
55
        # track ruleset
56
        self.ruleset = ruleset
57
58
        # collect all unique messages for samples and animals
59
        self.messages_animals = Counter()
60
        self.messages_samples = Counter()
61
62
        # track global statuses for animals and samples
63
        # Don't set keys: if you take a key which doesn't exists, you will
64
        # get 0 instead of key errors. This is how Counter differ from a
65
        # default dictionary object
66
        self.statuses_animals = Counter()
67
        self.statuses_samples = Counter()
68
69
    def check_valid_statuses(self):
70
        """Check if validation return with an unsupported status message"""
71
72
        # test for keys in model_statuses
73
        for key in self.statuses_animals.keys():
74
            if key not in KNOWN_STATUSES:
75
                logger.error("Unsupported status '%s' from validation" % key)
76
                return False
77
78
        for key in self.statuses_samples.keys():
79
            if key not in KNOWN_STATUSES:
80
                logger.error("Unsupported status '%s' from validation" % key)
81
                return False
82
83
        # if I arrive here, all validation statuses are handled
84
        return True
85
86
    def __has_key_in_rules(self, key):
87
        """Generic function to test errors in validation rules"""
88
89
        if (self.statuses_animals[key] > 0 or
90
                self.statuses_samples[key] > 0):
91
            return True
92
93
        else:
94
            return False
95
96
    def has_errors_in_rules(self):
97
        "Return True if there is any errors in validation rules"""
98
99
        return self.__has_key_in_rules('Error')
100
101
    def has_warnings_in_rules(self):
102
        "Return True if there is any warnings in validation rules"""
103
104
        return self.__has_key_in_rules('Warning')
105
106
    def has_errors_in_json(self):
107
        "Return True if there is any error in JSON"""
108
109
        return self.__has_key_in_rules('JSON')
110
111
    def validate_model(self, model):
112
        logger.debug("Validating %s" % (model))
113
114
        # thsi could be animal or sample
115
        if isinstance(model, Sample):
116
            model_statuses = self.statuses_samples
117
118
        elif isinstance(model, Animal):
119
            model_statuses = self.statuses_animals
120
121
        # get data in biosample format
122
        data = model.to_biosample()
123
124
        # input is a list object
125
        usi_result = self.ruleset.check_usi_structure([data])
126
127
        # if I have errors here, JSON isn't valid: this is not an error
128
        # on user's data but on InjectTool itself
129
        if len(usi_result) > 0:
130
            # update counter for JSON
131
            model_statuses.update({'JSON': len(usi_result)})
0 ignored issues
show
introduced by
The variable model_statuses does not seem to be defined for all execution paths.
Loading history...
132
            model_statuses.update(['Issues', 'Known'])
133
134
            # update model results
135
            self.mark_model(model, usi_result, NEED_REVISION)
136
137
            # It make no sense continue validation since JSON is wrong
138
            return
139
140
        # no check_duplicates: it checks against alias (that is a pk)
141
        # HINT: improve check_duplicates or implement database constraints
142
143
        # check against image metadata
144
        ruleset_result = self.ruleset.validate(data)
145
146
        # update status and track data in a overall variable
147
        self.update_statuses(model_statuses, model, ruleset_result)
148
149
    # inspired from validation.deal_with_validation_results
150
    def update_statuses(self, model_statuses, model, result):
151
        # get overall status (ie Pass, Error)
152
        overall = result.get_overall_status()
153
154
        # set model as valid even if has some warnings
155
        if overall in ["Pass", "Warning"]:
156
            self.mark_model(model, result, READY)
157
158
        else:
159
            model_statuses['Issues'] += 1
160
            self.mark_model(model, result, NEED_REVISION)
161
162
        # update a collections.Counter objects by key
163
        model_statuses.update({overall})
164
        model_statuses['Known'] += 1
165
166
    def mark_model(self, model, result, status):
167
        """Set status to a model and instantiate a ValidationResult obj"""
168
169
        if isinstance(result, list):
170
            messages = result
171
            comparable_messages = result
172
            overall_status = "Wrong JSON structure"
173
174
        else:
175
            messages = result.get_messages()
176
177
            # get comparable messages for batch update
178
            comparable_messages = list()
179
            for result_set in result.result_set:
180
                comparable_messages.append(result_set.get_comparable_str())
181
            overall_status = result.get_overall_status()
182
183
        # Save all messages for validation summary
184
        if isinstance(model, Sample):
185
            for message in comparable_messages:
186
                # messages_samples iss a counter object
187
                self.messages_samples.update({message})
188
189
        # is as an animal object
190
        elif isinstance(model, Animal):
191
            for message in comparable_messages:
192
                self.messages_animals.update({message})
193
194
        # get a validation result model or create a new one
195
        if hasattr(model.name, 'validationresult'):
196
            validationresult = model.name.validationresult
197
198
        else:
199
            validationresult = ValidationResultModel()
200
            model.name.validationresult = validationresult
201
202
        # setting valdiationtool results and save
203
        validationresult.messages = messages
204
        validationresult.status = overall_status
205
        validationresult.save()
206
207
        # ok, don't update Name statuses for submitted objects which
208
        # already are in biosamples and pass validation
209
        if model.name.status == COMPLETED and status == READY:
210
            logger.debug(
211
                "Ignoring %s: status was '%s' and validation is OK" % (
212
                    model, key2status[model.name.status]))
213
214
        else:
215
            logger.debug(
216
                "Marking %s with '%s' status (%s)" % (
217
                    model, key2status[status], messages))
218
219
            # update model status and save
220
            model.name.status = status
221
            model.name.save()
222
223
    def create_validation_summary(self):
224
        """
225
        This function will create ValidationSummary object that will be used
226
        on validation_summary view
227
        """
228
229
        for model_type in ['animal', 'sample']:
230
            summary_obj, created = ValidationSummary.objects.get_or_create(
231
                submission=self.submission_obj, type=model_type)
232
233
            if created:
234
                logger.debug(
235
                    "Created %s validationSummary for %s" % (
236
                        model_type, self.submission_obj))
237
238
            # reset all_count
239
            summary_obj.reset_all_count()
240
241
            if model_type == 'animal':
242
                messages = self.messages_animals
243
                model_statuses = self.statuses_animals
244
245
            # Im cycling with animal and sample type
246
            else:
247
                messages = self.messages_samples
248
                model_statuses = self.statuses_samples
249
250
            summary_obj.submission = self.submission_obj
251
252
            # they are counter object, so no Keyerror and returns 0
253
            summary_obj.pass_count = model_statuses['Pass']
254
            summary_obj.warning_count = model_statuses['Warning']
255
            summary_obj.error_count = model_statuses['Error']
256
            summary_obj.json_count = model_statuses['JSON']
257
            summary_obj.issues_count = model_statuses['Issues']
258
            summary_obj.validation_known_count = model_statuses['Known']
259
260
            validation_messages = list()
261
262
            for message, count in messages.items():
263
                validation_messages.append({
264
                    'message': message,
265
                    'count': count
266
                })
267
268
            summary_obj.messages = validation_messages
269
            summary_obj.type = model_type
270
            summary_obj.save()
271
272
        logger.debug(
273
            "Results for submission %s: animals - %s, samples - %s" % (
274
                self.submission_obj,
275
                dict(self.statuses_animals),
276
                dict(self.statuses_samples))
277
        )
278
279
280
class ValidateTask(MyTask):
281
    name = "Validate Submission"
282
    description = """Validate submission data against IMAGE rules"""
283
284
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
285
    # A task is not instantiated for every request, but is registered in
286
    # the task registry as a global instance. This means that the __init__
287
    # constructor will only be called once per process, and that the
288
    # task class is semantically closer to an Actor. if you have a task and
289
    # you route every request to the same process, then it will keep state
290
    # between requests. This can also be useful to cache resources, For
291
    # example, a base Task class that caches a database connection
292
293
    # TODO: extract a generic send_message for all modules which need it
294
    def send_message(self, submission_obj):
295
        """
296
        Update submission.status and submission message using django
297
        channels
298
299
        Args:
300
            submission_obj (image_app.models.Submission): an UID submission
301
            object
302
        """
303
304
        send_message(
305
            submission_obj,
306
            validation_message=construct_validation_message(submission_obj))
307
308
    def __generic_error_report(
309
            self, submission_obj, status, message, notify_admins=False):
310
        """
311
        Generic report for updating submission objects and send email after
312
        an exception is called
313
314
        Args:
315
            submission_obj (image_app.models.Submission): an UID submission
316
            object
317
            status (int): a :py:class:`common.constants.STATUSES` object
318
            message (str): a text object
319
            notify_admins (bool): send mail to the admins or not
320
        """
321
322
        # mark submission with its status
323
        submission_obj.status = status
324
        submission_obj.message = message
325
        submission_obj.save()
326
327
        self.send_message(submission_obj)
328
329
        # get exception info
330
        einfo = traceback.format_exc()
331
332
        # send a mail to the user with the stacktrace (einfo)
333
        email_subject = "Error in IMAGE Validation: %s" % (message)
334
        email_message = (
335
            "Something goes wrong with validation. Please report "
336
            "this to InjectTool team\n\n %s" % str(einfo))
337
338
        submission_obj.owner.email_user(
339
            email_subject,
340
            email_message,
341
        )
342
343
        # TODO: should this be a common.helpers method?
344
        if notify_admins:
345
            # submit mail to admins
346
            datatuple = (
347
                email_subject,
348
                email_message,
349
                settings.DEFAULT_FROM_EMAIL,
350
                get_admin_emails())
351
352
            send_mass_mail((datatuple, ))
353
354
    # Ovverride default on failure method
355
    # This is not a failed validation for a wrong value, this is an
356
    # error in task that mean an error in coding
357
    def on_failure(self, exc, task_id, args, kwargs, einfo):
358
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
359
360
        # define message
361
        message = "Unknown error in validation - %s" % str(exc)
362
363
        # get submissio object
364
        submission_obj = Submission.objects.get(pk=args[0])
365
366
        # call generic report which update submission and send email
367
        self.__generic_error_report(
368
            submission_obj, ERROR, message, notify_admins=True)
369
370
        # returns None: this task will have the ERROR status
371
372
    # TODO: define a method to inform user for error in validation (Task run
373
    # with success but errors in data)
374
375
    def temporary_error_report(self, exc, submission_obj):
376
        """
377
        Deal with known issues in validation task. Notify the user using
378
        email and set status as READY in order to recall this task
379
380
        Args:
381
            exc (Exception): an py:exc`Exception` object
382
            submission_obj (image_app.models.Submission): an UID submission
383
            object
384
385
        Return
386
            str: "success" since this task is correctly managed
387
        """
388
389
        logger.error("Error in validation: %s" % exc)
390
391
        message = "Errors in EBI API endpoints. Please try again later"
392
        logger.error(message)
393
394
        # call generic report which update submission and send email
395
        self.__generic_error_report(submission_obj, LOADED, message)
396
397
        return "success"
398
399
    def ruleset_error_report(self, exc, submission_obj):
400
        """
401
        Deal with ruleset issue in validation task. Notify the user using
402
        email and set status as ERROR, since he can't do anything without
403
        admin intervention
404
405
        Args:
406
            exc (Exception): an py:exc`Exception` object
407
            submission_obj (image_app.models.Submission): an UID submission
408
            object
409
410
        Return
411
            str: "success" since this task is correctly managed
412
        """
413
414
        logger.error("Error ruleset: %s" % exc)
415
416
        message = (
417
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
418
        logger.error(message)
419
420
        # call generic report which update submission and send email
421
        self.__generic_error_report(
422
            submission_obj, ERROR, message, notify_admins=True)
423
424
        return "success"
425
426
    def run(self, submission_id):
427
        """a function to perform validation steps"""
428
429
        logger.info("Validate Submission started")
430
431
        # get submissio object
432
        submission_obj = Submission.objects.get(pk=submission_id)
433
434
        # read rules when task starts. Model issues when starting
435
        # OntologyCache at start
436
        try:
437
            self.ruleset = MetaDataValidation()
438
439
        except OntologyCacheError as exc:
440
            return self.temporary_error_report(exc, submission_obj)
441
442
        except RulesetError as exc:
443
            return self.ruleset_error_report(exc, submission_obj)
444
445
        # get a submission data helper instance
446
        validate_submission = ValidateSubmission(submission_obj, self.ruleset)
447
448
        try:
449
            for animal in Animal.objects.filter(
450
                    name__submission=submission_obj).order_by('id'):
451
                validate_submission.validate_model(animal)
452
453
            for sample in Sample.objects.filter(
454
                    name__submission=submission_obj).order_by('id'):
455
                validate_submission.validate_model(sample)
456
457
        # TODO: errors in validation should raise custom exception
458
        except json.decoder.JSONDecodeError as exc:
459
            return self.temporary_error_report(exc, submission_obj)
460
461
        except Exception as exc:
462
            raise self.retry(exc=exc)
463
464
        # if error messages changes in IMAGE-ValidationTool, all this
465
        # stuff isn't valid and I throw an exception
466
467
        if not validate_submission.check_valid_statuses():
468
            message = (
469
                "Unsupported validation status for submission %s" % (
470
                    submission_obj))
471
472
            # debug: print error in log
473
            logger.error(message)
474
475
            # create validation summary
476
            validate_submission.create_validation_summary()
477
478
            # mark submission with ERROR (this is not related to user data)
479
            # calling the appropriate method passing ERROR as status
480
            self.submission_fail(submission_obj, message, status=ERROR)
481
482
            # raise an exception since is an InjectTool issue
483
            raise ValidationError(message)
484
485
        # If I have any error in JSON is a problem of injectool
486
        if validate_submission.has_errors_in_json():
487
            # create validation summary
488
            validate_submission.create_validation_summary()
489
490
            # mark submission with NEED_REVISION
491
            self.submission_fail(submission_obj, "Wrong JSON structure")
492
493
            # debug
494
            logger.warning(
495
                "Wrong JSON structure for submission %s" % (submission_obj))
496
497
        # set a proper value for status (READY or NEED_REVISION)
498
        # If I will found any error or warning, I will
499
        # return a message and I will set NEED_REVISION
500
        elif validate_submission.has_errors_in_rules():
501
            # create validation summary
502
            validate_submission.create_validation_summary()
503
504
            message = (
505
                "Error in metadata. Need revisions before submit")
506
507
            # mark submission with NEED_REVISION
508
            self.submission_fail(submission_obj, message)
509
510
            logger.warning(
511
                "Error in metadata for submission %s" % (submission_obj))
512
513
        # WOW: I can submit those data
514
        elif validate_submission.has_warnings_in_rules():
515
            # create validation summary
516
            validate_submission.create_validation_summary()
517
518
            message = "Submission validated with some warnings"
519
520
            # mark submission with READY status
521
            self.submission_ready(submission_obj, message)
522
523
            logger.info(
524
                "Submission %s validated with some warning" % (submission_obj))
525
526
        else:
527
            # create validation summary
528
            validate_submission.create_validation_summary()
529
530
            message = "Submission validated with success"
531
532
            # mark submission with READY status
533
            self.submission_ready(submission_obj, message)
534
535
            logger.info(
536
                "Submission %s validated with success" % (submission_obj))
537
538
        logger.info("Validate Submission completed")
539
540
        return "success"
541
542
    def __mark_submission(self, submission_obj, message, status):
543
        """Mark submission with status and message"""
544
545
        submission_obj.status = status
546
        submission_obj.message = message
547
        submission_obj.save()
548
549
        self.send_message(submission_obj)
550
551
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
552
        """Mark a submission with NEED_REVISION status"""
553
554
        # ovverride message
555
        message = ("Validation got errors: %s" % (message))
556
        self.__mark_submission(submission_obj, message, status)
557
558
    def submission_ready(self, submission_obj, message):
559
        """Mark a submission with READY status"""
560
561
        self.__mark_submission(submission_obj, message, READY)
562
563
564
# register explicitly tasks
565
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
566
celery_app.tasks.register(ValidateTask)
567