Completed
Push — master ( c0c2a1...e4f1e8 )
by Paolo
01:28 queued 01:04
created

validation.tasks   B

Complexity

Total Complexity 50

Size/Duplication

Total Lines 561
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 50
eloc 297
dl 0
loc 561
rs 8.4
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A ValidateTask.temporary_error_report() 0 23 1
A ValidateTask.send_message() 0 20 1
A ValidateTask.ruleset_error_report() 0 26 1
A ValidateTask.__generic_error_report() 0 44 2
A ValidateTask.on_failure() 0 12 1
D ValidateTask.mark_model() 0 60 12
F ValidateTask.run() 0 169 15
A ValidateTask.has_warnings_in_rules() 0 7 2
B ValidateTask.create_validation_summary() 0 46 6
A ValidateTask.update_statuses() 0 16 3
A ValidateTask.has_errors_in_json() 0 4 1
A ValidateTask.has_errors_in_rules() 0 7 2
A ValidateTask.submission_fail() 0 7 1
A ValidateTask.validate_model() 0 31 2

How to fix   Complexity   

Complexity

Complex classes like validation.tasks often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
import asyncio
15
16
from collections import Counter
17
from celery.utils.log import get_task_logger
18
19
from django.conf import settings
20
from django.core.mail import send_mass_mail
21
from django.core.exceptions import ObjectDoesNotExist
22
23
from common.constants import (
24
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES, KNOWN_STATUSES)
25
from common.helpers import send_message_to_websocket
26
from validation.helpers import construct_validation_message
27
from image.celery import app as celery_app, MyTask
28
from image_app.helpers import get_admin_emails
29
from image_app.models import Submission, Sample, Animal
30
from validation.models import ValidationSummary
31
32
from .models import ValidationResult as ValidationResultModel
33
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
34
35
# Get an instance of a logger
36
logger = get_task_logger(__name__)
37
38
# get a dictionary from status name (ie {0: 'Waiting'})
39
key2status = dict([x.value for x in STATUSES])
40
41
42
# A class to deal with validation errors
43
class ValidationError(Exception):
44
    pass
45
46
47
class ValidateTask(MyTask):
48
    name = "Validate Submission"
49
    description = """Validate submission data against IMAGE rules"""
50
51
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
52
    # A task is not instantiated for every request, but is registered in
53
    # the task registry as a global instance. This means that the __init__
54
    # constructor will only be called once per process, and that the
55
    # task class is semantically closer to an Actor. if you have a task and
56
    # you route every request to the same process, then it will keep state
57
    # between requests. This can also be useful to cache resources, For
58
    # example, a base Task class that caches a database connection
59
60
    def send_message(self, status, submission_obj):
61
        """
62
        Update submission.status and submission message using django
63
        channels
64
65
        Args:
66
            status (int): a :py:class:`common.constants.STATUSES` object
67
            submission_obj (image_app.models.Submission): an UID submission
68
            object
69
        """
70
71
        asyncio.get_event_loop().run_until_complete(
72
            send_message_to_websocket(
73
                {
74
                    'message': STATUSES.get_value_display(status),
75
                    'notification_message': submission_obj.message,
76
                    'validation_message': construct_validation_message(
77
                            submission_obj)
78
                },
79
                submission_obj.pk
80
            )
81
        )
82
83
    def __generic_error_report(
84
            self, submission_obj, status, message, notify_admins=False):
85
        """
86
        Generic report for updating submission objects and send email after
87
        an exception is called
88
89
        Args:
90
            submission_obj (image_app.models.Submission): an UID submission
91
            object
92
            status (int): a :py:class:`common.constants.STATUSES` object
93
            message (str): a text object
94
            notify_admins (bool): send mail to the admins or not
95
        """
96
97
        # mark submission with its status
98
        submission_obj.status = status
99
        submission_obj.message = message
100
        submission_obj.save()
101
102
        self.send_message(status, submission_obj)
103
104
        # get exception info
105
        einfo = traceback.format_exc()
106
107
        # send a mail to the user with the stacktrace (einfo)
108
        email_subject = "Error in IMAGE Validation: %s" % (message)
109
        email_message = (
110
            "Something goes wrong with validation. Please report "
111
            "this to InjectTool team\n\n %s" % str(einfo))
112
113
        submission_obj.owner.email_user(
114
            email_subject,
115
            email_message,
116
        )
117
118
        if notify_admins:
119
            # submit mail to admins
120
            datatuple = (
121
                email_subject,
122
                email_message,
123
                settings.DEFAULT_FROM_EMAIL,
124
                get_admin_emails())
125
126
            send_mass_mail((datatuple, ))
127
128
    # Ovverride default on failure method
129
    # This is not a failed validation for a wrong value, this is an
130
    # error in task that mean an error in coding
131
    def on_failure(self, exc, task_id, args, kwargs, einfo):
132
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
133
134
        # define message
135
        message = "Unknown error in validation - %s" % str(exc)
136
137
        # get submissio object
138
        submission_obj = Submission.objects.get(pk=args[0])
139
140
        # call generic report which update submission and send email
141
        self.__generic_error_report(
142
            submission_obj, ERROR, message, notify_admins=True)
143
144
        # returns None: this task will have the ERROR status
145
146
    # TODO: define a method to inform user for error in validation (Task run
147
    # with success but errors in data)
148
149
    def temporary_error_report(self, exc, submission_obj):
150
        """
151
        Deal with known issues in validation task. Notify the user using
152
        email and set status as READY in order to recall this task
153
154
        Args:
155
            exc (Exception): an py:exc`Exception` object
156
            submission_obj (image_app.models.Submission): an UID submission
157
            object
158
159
        Return
160
            str: "success" since this task is correctly managed
161
        """
162
163
        logger.error("Error in validation: %s" % exc)
164
165
        message = "Errors in EBI API endpoints. Please try again later"
166
        logger.error(message)
167
168
        # call generic report which update submission and send email
169
        self.__generic_error_report(submission_obj, LOADED, message)
170
171
        return "success"
172
173
    def ruleset_error_report(self, exc, submission_obj):
174
        """
175
        Deal with ruleset issue in validation task. Notify the user using
176
        email and set status as ERROR, since he can't do anything without
177
        admin intervention
178
179
        Args:
180
            exc (Exception): an py:exc`Exception` object
181
            submission_obj (image_app.models.Submission): an UID submission
182
            object
183
184
        Return
185
            str: "success" since this task is correctly managed
186
        """
187
188
        logger.error("Error ruleset: %s" % exc)
189
190
        message = (
191
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
192
        logger.error(message)
193
194
        # call generic report which update submission and send email
195
        self.__generic_error_report(
196
            submission_obj, ERROR, message, notify_admins=True)
197
198
        return "success"
199
200
    def run(self, submission_id):
201
        """a function to perform validation steps"""
202
203
        logger.info("Validate Submission started")
204
205
        # collect all unique messages for samples and animals
206
        self.messages_samples = dict()
207
        self.messages_animals = dict()
208
209
        # get submissio object
210
        submission_obj = Submission.objects.get(pk=submission_id)
211
212
        # read rules when task starts. Model issues when starting
213
        # OntologyCache at start
214
        try:
215
            self.ruleset = MetaDataValidation()
216
217
        except OntologyCacheError as exc:
218
            return self.temporary_error_report(exc, submission_obj)
219
220
        except RulesetError as exc:
221
            return self.ruleset_error_report(exc, submission_obj)
222
223
        # track global statuses for animals and samples
224
        submission_statuses_animals = Counter(
225
            {'Pass': 0,
226
             'Warning': 0,
227
             'Error': 0,
228
             'JSON': 0,
229
             'Issues': 0,
230
             'Known': 0})
231
232
        submission_statuses_samples = Counter(
233
            {'Pass': 0,
234
             'Warning': 0,
235
             'Error': 0,
236
             'JSON': 0,
237
             'Issues': 0,
238
             'Known': 0})
239
240
        try:
241
            for animal in Animal.objects.filter(
242
                    name__submission=submission_obj).order_by('id'):
243
                self.validate_model(animal, submission_statuses_animals)
244
245
            for sample in Sample.objects.filter(
246
                    name__submission=submission_obj).order_by('id'):
247
                self.validate_model(sample, submission_statuses_samples)
248
249
        # TODO: errors in validation should raise custom exception
250
        except json.decoder.JSONDecodeError as exc:
251
            return self.temporary_error_report(exc, submission_obj)
252
253
        except Exception as exc:
254
            raise self.retry(exc=exc)
255
256
        # test for keys in submission_statuses
257
        statuses_animals = sorted(submission_statuses_animals.keys())
258
        statuses_samples = sorted(submission_statuses_samples.keys())
259
260
        # if error messages changes in IMAGE-ValidationTool, all this
261
        # stuff isn't valid and I throw an exception
262
263
        if statuses_animals != KNOWN_STATUSES or statuses_samples != \
264
                KNOWN_STATUSES:
265
            message = "Error in statuses for submission %s: animals - %s, " \
266
                      "samples - %s" % (submission_obj, statuses_animals,
267
                                        statuses_samples)
268
269
            # debug: print error in log
270
            logger.error(message)
271
272
            # mark submission with ERROR (this is not related to user data)
273
            # calling the appropriate method passing ERROR as status
274
            self.create_validation_summary(submission_obj,
275
                                           submission_statuses_animals,
276
                                           submission_statuses_samples)
277
            self.submission_fail(submission_obj, message, status=ERROR)
278
279
            # raise an exception since is an InjectTool issue
280
            raise ValidationError(message)
281
282
        # If I have any error in JSON is a problem of injectool
283
        if self.has_errors_in_json(submission_statuses_animals) or \
284
                self.has_errors_in_json(submission_statuses_samples):
285
            # mark submission with NEED_REVISION
286
            self.create_validation_summary(submission_obj,
287
                                           submission_statuses_animals,
288
                                           submission_statuses_samples)
289
            self.submission_fail(submission_obj, "Wrong JSON structure")
290
291
            # debug
292
            logger.warning(
293
                "Wrong JSON structure for submission %s" % (submission_obj))
294
295
            logger.debug(
296
                "Results for submission %s: animals - %s, samples - %s" % (
297
                    submission_id, submission_statuses_animals,
298
                    submission_statuses_samples)
299
            )
300
301
        # set a proper value for status (READY or NEED_REVISION)
302
        # If I will found any error or warning, I will
303
        # return a message and I will set NEED_REVISION
304
        elif self.has_errors_in_rules(submission_statuses_animals) or \
305
                self.has_errors_in_rules(submission_statuses_samples):
306
            message = (
307
                "Error in metadata. Need revisions before submit")
308
309
            # mark submission with NEED_REVISION
310
            self.create_validation_summary(submission_obj,
311
                                           submission_statuses_animals,
312
                                           submission_statuses_samples)
313
            self.submission_fail(submission_obj, message)
314
315
            logger.warning(
316
                "Error in metadata for submission %s" % (submission_obj))
317
318
            logger.debug(
319
                "Results for submission %s: animals - %s, samples - %s" % (
320
                    submission_id, submission_statuses_animals,
321
                    submission_statuses_samples)
322
            )
323
324
        # WOW: I can submit those data
325
        elif self.has_warnings_in_rules(submission_statuses_animals) or \
326
                self.has_warnings_in_rules(submission_statuses_samples):
327
            submission_obj.status = READY
328
            submission_obj.message = "Submission validated with some warnings"
329
            submission_obj.save()
330
            self.create_validation_summary(submission_obj,
331
                                           submission_statuses_animals,
332
                                           submission_statuses_samples)
333
334
            # send message with channel
335
            self.send_message(READY, submission_obj)
336
337
            logger.info(
338
                "Submission %s validated with some warning" % (submission_obj))
339
340
            logger.debug(
341
                "Results for submission %s: animals - %s, samples - %s" % (
342
                    submission_id, submission_statuses_animals,
343
                    submission_statuses_samples)
344
            )
345
346
        else:
347
            submission_obj.status = READY
348
            submission_obj.message = "Submission validated with success"
349
            submission_obj.save()
350
            self.create_validation_summary(submission_obj,
351
                                           submission_statuses_animals,
352
                                           submission_statuses_samples)
353
354
            # send message with channel
355
            self.send_message(READY, submission_obj)
356
357
            logger.info(
358
                "Submission %s validated with success" % (submission_obj))
359
360
            logger.debug(
361
                "Results for submission %s: animals - %s, samples - %s" % (
362
                    submission_id, submission_statuses_animals,
363
                    submission_statuses_samples)
364
            )
365
366
        logger.info("Validate Submission completed")
367
368
        return "success"
369
370
    def validate_model(self, model, submission_statuses):
371
        logger.debug("Validating %s" % (model))
372
373
        # get data in biosample format
374
        data = model.to_biosample()
375
376
        # input is a list object
377
        usi_result = self.ruleset.check_usi_structure([data])
378
379
        # if I have errors here, JSON isn't valid: this is not an error
380
        # on user's data but on InjectTool itself
381
        if len(usi_result) > 0:
382
            # update counter for JSON
383
            submission_statuses.update({'JSON': len(usi_result)})
384
            submission_statuses['Issues'] += 1
385
            submission_statuses['Known'] += 1
386
387
            # update model results
388
            self.mark_model(model, usi_result, NEED_REVISION)
389
390
            # It make no sense continue validation since JSON is wrong
391
            return
392
393
        # no check_duplicates: it checks against alias (that is a pk)
394
        # HINT: improve check_duplicates or implement database constraints
395
396
        # check against image metadata
397
        ruleset_result = self.ruleset.validate(data)
398
399
        # update status and track data in a overall variable
400
        self.update_statuses(submission_statuses, model, ruleset_result)
401
402
    # inspired from validation.deal_with_validation_results
403
    def update_statuses(self, submission_statuses, model, result):
404
        # get overall status (ie Pass, Error)
405
        overall = result.get_overall_status()
406
407
        # set model as valid even if has some warnings
408
        if overall in ["Pass", "Warning"]:
409
            self.mark_model(model, result, READY)
410
            if overall == 'Warning':
411
                submission_statuses['Issues'] += 1
412
        else:
413
            submission_statuses['Issues'] += 1
414
            self.mark_model(model, result, NEED_REVISION)
415
416
        # update a collections.Counter objects by key
417
        submission_statuses.update({overall})
418
        submission_statuses['Known'] += 1
419
420
    def has_errors_in_rules(self, submission_statuses):
421
        "Return True if there is any errors"""
422
423
        if submission_statuses["Error"] != 0:
424
            return True
425
        else:
426
            return False
427
428
    def has_warnings_in_rules(self, submission_statuses):
429
        "Return True if there is any warnings"""
430
431
        if submission_statuses["Warning"] != 0:
432
            return True
433
        else:
434
            return False
435
436
    def has_errors_in_json(self, submission_statuses):
437
        "Return True if there is any error in JSON"""
438
439
        return submission_statuses["JSON"] > 0
440
441
    def mark_model(self, model, result, status):
442
        """Set status to a model and instantiate a ValidationResult obj"""
443
444
        if isinstance(result, list):
445
            messages = result
446
            comparable_messages = result
447
            overall_status = "Wrong JSON structure"
448
449
        else:
450
            messages = result.get_messages()
451
            # get comparable messages for batch update
452
            comparable_messages = list()
453
            for result_set in result.result_set:
454
                comparable_messages.append(result_set.get_comparable_str())
455
            overall_status = result.get_overall_status()
456
457
        # Save all messages for validation summary
458
        if isinstance(model, Sample):
459
            # messages_samples might not exist when doing tests
460
            if not hasattr(self, 'messages_samples'):
461
                self.messages_samples = dict()
462
            for message in comparable_messages:
463
                self.messages_samples.setdefault(message, 0)
464
                self.messages_samples[message] += 1
465
        elif isinstance(model, Animal):
466
            # messages_animals might not exist when doing tests
467
            if not hasattr(self, 'messages_animals'):
468
                self.messages_animals = dict()
469
            for message in comparable_messages:
470
                self.messages_animals.setdefault(message, 0)
471
                self.messages_animals[message] += 1
472
473
        # get a validation result model or create a new one
474
        if hasattr(model.name, 'validationresult'):
475
            validationresult = model.name.validationresult
476
477
        else:
478
            validationresult = ValidationResultModel()
479
            model.name.validationresult = validationresult
480
481
        # setting valdiationtool results and save
482
        validationresult.messages = messages
483
        validationresult.status = overall_status
484
        validationresult.save()
485
486
        # ok, don't update Name statuses for submitted objects which
487
        # already are in biosamples and pass validation
488
        if model.name.status == COMPLETED and status == READY:
489
            logger.debug(
490
                "Ignoring %s: status was '%s' and validation is OK" % (
491
                    model, key2status[model.name.status]))
492
493
        else:
494
            logger.debug(
495
                "Marking %s with '%s' status (%s)" % (
496
                    model, key2status[status], messages))
497
498
            # update model status and save
499
            model.name.status = status
500
            model.name.save()
501
502
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
503
        """Mark a submission with NEED_REVISION status"""
504
505
        submission_obj.status = status
506
        submission_obj.message = ("Validation got errors: %s" % (message))
507
        submission_obj.save()
508
        self.send_message(status, submission_obj)
509
510
    def create_validation_summary(self, submission_obj,
511
                                  submission_statuses_animals,
512
                                  submission_statuses_samples):
513
        """
514
        This function will create ValidationSummary object that will be used
515
        on validation_summary view
516
        Args:
517
            submission_obj: submission ref which has gone through validation
518
            submission_statuses_animals: Counter with statuses for animals
519
            submission_statuses_samples: Counter with statuses for samples
520
        """
521
        for model_type in ['animal', 'sample']:
522
            try:
523
                validation_summary = ValidationSummary.objects.get(
524
                    submission=submission_obj, type=model_type)
525
            except ObjectDoesNotExist:
526
                validation_summary = ValidationSummary()
527
            if model_type == 'animal':
528
                messages = self.messages_animals
529
                submission_statuses = submission_statuses_animals
530
            elif model_type == 'sample':
531
                messages = self.messages_samples
532
                submission_statuses = submission_statuses_samples
533
            else:
534
                messages = dict()
535
                submission_statuses = dict()
536
            validation_summary.submission = submission_obj
537
            validation_summary.pass_count = submission_statuses.get('Pass', 0)
538
            validation_summary.warning_count = submission_statuses.get(
539
                'Warning', 0)
540
            validation_summary.error_count = submission_statuses.get(
541
                'Error', 0)
542
            validation_summary.json_count = submission_statuses.get('JSON', 0)
543
            validation_summary.issues_count = submission_statuses.get(
544
                'Issues', 0)
545
            validation_summary.validation_known_count = submission_statuses.get(
546
                'Known', 0)
547
            validation_messages = list()
548
            for message, count in messages.items():
549
                validation_messages.append({
550
                    'message': message,
551
                    'count': count
552
                })
553
            validation_summary.messages = validation_messages
554
            validation_summary.type = model_type
555
            validation_summary.save()
556
557
558
# register explicitly tasks
559
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
560
celery_app.tasks.register(ValidateTask)
561