Passed
Pull Request — master (#30)
by Paolo
01:17
created

validation.tasks.ValidateTask.update_statuses()   A

Complexity

Conditions 2

Size

Total Lines 13
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 13
rs 10
c 0
b 0
f 0
cc 2
nop 4
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
import asyncio
15
16
from collections import Counter
17
from celery.utils.log import get_task_logger
18
19
from django.conf import settings
20
from django.core.mail import send_mass_mail
21
from django.core.exceptions import ObjectDoesNotExist
22
23
from common.constants import (
24
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES)
25
from common.helpers import send_message_to_websocket, \
26
    construct_validation_message
27
from image.celery import app as celery_app, MyTask
28
from image_app.helpers import get_admin_emails
29
from image_app.models import Submission, Sample, Animal, ValidationSummary
30
31
from .models import ValidationResult as ValidationResultModel
32
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
33
34
# Get an instance of a logger
35
logger = get_task_logger(__name__)
36
37
# get a dictionary from status name (ie {0: 'Waiting'})
38
key2status = dict([x.value for x in STATUSES])
39
40
41
# A class to deal with validation errors
42
class ValidationError(Exception):
43
    pass
44
45
46
class ValidateTask(MyTask):
47
    name = "Validate Submission"
48
    description = """Validate submission data against IMAGE rules"""
49
50
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
51
    # A task is not instantiated for every request, but is registered in
52
    # the task registry as a global instance. This means that the __init__
53
    # constructor will only be called once per process, and that the
54
    # task class is semantically closer to an Actor. if you have a task and
55
    # you route every request to the same process, then it will keep state
56
    # between requests. This can also be useful to cache resources, For
57
    # example, a base Task class that caches a database connection
58
59
    def send_message(self, status, submission_obj):
60
        """
61
        Update submission.status and submission message using django
62
        channels
63
64
        Args:
65
            status (int): a :py:class:`common.constants.STATUSES` object
66
            submission_obj (image_app.models.Submission): an UID submission
67
            object
68
        """
69
70
        asyncio.get_event_loop().run_until_complete(
71
            send_message_to_websocket(
72
                {
73
                    'message': STATUSES.get_value_display(status),
74
                    'notification_message': submission_obj.message,
75
                    'validation_message': construct_validation_message(
76
                            submission_obj)
77
                },
78
                submission_obj.pk
79
            )
80
        )
81
82
    def __generic_error_report(
83
            self, submission_obj, status, message, notify_admins=False):
84
        """
85
        Generic report for updating submission objects and send email after
86
        an exception is called
87
88
        Args:
89
            submission_obj (image_app.models.Submission): an UID submission
90
            object
91
            status (int): a :py:class:`common.constants.STATUSES` object
92
            message (str): a text object
93
            notify_admins (bool): send mail to the admins or not
94
        """
95
96
        # mark submission with its status
97
        submission_obj.status = status
98
        submission_obj.message = message
99
        submission_obj.save()
100
101
        self.send_message(status, submission_obj)
102
103
        # get exception info
104
        einfo = traceback.format_exc()
105
106
        # send a mail to the user with the stacktrace (einfo)
107
        email_subject = "Error in IMAGE Validation: %s" % (message)
108
        email_message = (
109
            "Something goes wrong with validation. Please report "
110
            "this to InjectTool team\n\n %s" % str(einfo))
111
112
        submission_obj.owner.email_user(
113
            email_subject,
114
            email_message,
115
        )
116
117
        if notify_admins:
118
            # submit mail to admins
119
            datatuple = (
120
                email_subject,
121
                email_message,
122
                settings.DEFAULT_FROM_EMAIL,
123
                get_admin_emails())
124
125
            send_mass_mail((datatuple, ))
126
127
    # Ovverride default on failure method
128
    # This is not a failed validation for a wrong value, this is an
129
    # error in task that mean an error in coding
130
    def on_failure(self, exc, task_id, args, kwargs, einfo):
131
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
132
133
        # define message
134
        message = "Unknown error in validation - %s" % str(exc)
135
136
        # get submissio object
137
        submission_obj = Submission.objects.get(pk=args[0])
138
139
        # call generic report which update submission and send email
140
        self.__generic_error_report(
141
            submission_obj, ERROR, message, notify_admins=True)
142
143
        # returns None: this task will have the ERROR status
144
145
    # TODO: define a method to inform user for error in validation (Task run
146
    # with success but errors in data)
147
148
    def temporary_error_report(self, exc, submission_obj):
149
        """
150
        Deal with known issues in validation task. Notify the user using
151
        email and set status as READY in order to recall this task
152
153
        Args:
154
            exc (Exception): an py:exc`Exception` object
155
            submission_obj (image_app.models.Submission): an UID submission
156
            object
157
158
        Return
159
            str: "success" since this task is correctly managed
160
        """
161
162
        logger.error("Error in validation: %s" % exc)
163
164
        message = "Errors in EBI API endpoints. Please try again later"
165
        logger.error(message)
166
167
        # call generic report which update submission and send email
168
        self.__generic_error_report(submission_obj, LOADED, message)
169
170
        return "success"
171
172
    def ruleset_error_report(self, exc, submission_obj):
173
        """
174
        Deal with ruleset issue in validation task. Notify the user using
175
        email and set status as ERROR, since he can't do anything without
176
        admin intervention
177
178
        Args:
179
            exc (Exception): an py:exc`Exception` object
180
            submission_obj (image_app.models.Submission): an UID submission
181
            object
182
183
        Return
184
            str: "success" since this task is correctly managed
185
        """
186
187
        logger.error("Error ruleset: %s" % exc)
188
189
        message = (
190
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
191
        logger.error(message)
192
193
        # call generic report which update submission and send email
194
        self.__generic_error_report(
195
            submission_obj, ERROR, message, notify_admins=True)
196
197
        return "success"
198
199
    def run(self, submission_id):
200
        """a function to perform validation steps"""
201
202
        logger.info("Validate Submission started")
203
204
        # collect all unique messages for samples and animals
205
        self.messages_samples = dict()
206
        self.messages_animals = dict()
207
208
        # get submissio object
209
        submission_obj = Submission.objects.get(pk=submission_id)
210
211
        # read rules when task starts. Model issues when starting
212
        # OntologyCache at start
213
        try:
214
            self.ruleset = MetaDataValidation()
215
216
        except OntologyCacheError as exc:
217
            return self.temporary_error_report(exc, submission_obj)
218
219
        except RulesetError as exc:
220
            return self.ruleset_error_report(exc, submission_obj)
221
222
        # track global statuses for animals and samples
223
        submission_statuses_animals = Counter(
224
            {'Pass': 0,
225
             'Warning': 0,
226
             'Error': 0,
227
             'JSON': 0})
228
229
        submission_statuses_samples = Counter(
230
            {'Pass': 0,
231
             'Warning': 0,
232
             'Error': 0,
233
             'JSON': 0})
234
235
        try:
236
            for animal in Animal.objects.filter(
237
                    name__submission=submission_obj).order_by('id'):
238
                self.validate_model(animal, submission_statuses_animals)
239
240
            for sample in Sample.objects.filter(
241
                    name__submission=submission_obj).order_by('id'):
242
                self.validate_model(sample, submission_statuses_samples)
243
244
        # TODO: errors in validation should raise custom exception
245
        except json.decoder.JSONDecodeError as exc:
246
            return self.temporary_error_report(exc, submission_obj)
247
248
        except Exception as exc:
249
            raise self.retry(exc=exc)
250
251
        # test for keys in submission_statuses
252
        statuses_animals = sorted(submission_statuses_animals.keys())
253
        statuses_samples = sorted(submission_statuses_samples.keys())
254
255
        # if error messages changes in IMAGE-ValidationTool, all this
256
        # stuff isn't valid and I throw an exception
257
        if statuses_animals != ['Error', 'JSON', 'Pass', 'Warning'] or \
258
                statuses_samples != ['Error', 'JSON', 'Pass', 'Warning']:
259
            message = "Error in statuses for submission %s: animals - %s, " \
260
                      "samples - %s" % (submission_obj, statuses_animals,
261
                                        statuses_samples)
262
263
            # debug: print error in log
264
            logger.error(message)
265
266
            # mark submission with ERROR (this is not related to user data)
267
            # calling the appropriate method passing ERROR as status
268
            self.submission_fail(submission_obj, message, status=ERROR)
269
270
            # raise an exception since is an InjectTool issue
271
            raise ValidationError(message)
272
273
        # If I have any error in JSON is a problem of injectool
274
        if self.has_errors_in_json(submission_statuses_animals) or \
275
                self.has_errors_in_json(submission_statuses_samples):
276
            # mark submission with NEED_REVISION
277
            self.submission_fail(submission_obj, "Wrong JSON structure")
278
            self.create_validation_summary(submission_obj,
279
                                           submission_statuses_animals,
280
                                           submission_statuses_samples)
281
282
            # debug
283
            logger.warning(
284
                "Wrong JSON structure for submission %s" % (submission_obj))
285
286
            logger.debug(
287
                "Results for submission %s: animals - %s, samples - %s" % (
288
                    submission_id, submission_statuses_animals,
289
                    submission_statuses_samples)
290
            )
291
292
        # set a proper value for status (READY or NEED_REVISION)
293
        # If I will found any error or warning, I will
294
        # return a message and I will set NEED_REVISION
295
        elif self.has_errors_in_rules(submission_statuses_animals) or \
296
                self.has_errors_in_rules(submission_statuses_samples):
297
            message = (
298
                "Error in metadata. Need revisions before submit")
299
300
            # mark submission with NEED_REVISION
301
            self.submission_fail(submission_obj, message)
302
            self.create_validation_summary(submission_obj,
303
                                           submission_statuses_animals,
304
                                           submission_statuses_samples)
305
306
            logger.warning(
307
                "Error in metadata for submission %s" % (submission_obj))
308
309
            logger.debug(
310
                "Results for submission %s: animals - %s, samples - %s" % (
311
                    submission_id, submission_statuses_animals,
312
                    submission_statuses_samples)
313
            )
314
315
        # WOW: I can submit those data
316
        elif self.has_warnings_in_rules(submission_statuses_animals) or \
317
                self.has_warnings_in_rules(submission_statuses_samples):
318
            submission_obj.status = READY
319
            submission_obj.message = "Submission validated with some warnings"
320
            submission_obj.save()
321
            self.create_validation_summary(submission_obj,
322
                                           submission_statuses_animals,
323
                                           submission_statuses_samples)
324
325
            # send message with channel
326
            self.send_message(READY, submission_obj)
327
328
            logger.info(
329
                "Submission %s validated with some warning" % (submission_obj))
330
331
            logger.debug(
332
                "Results for submission %s: animals - %s, samples - %s" % (
333
                    submission_id, submission_statuses_animals,
334
                    submission_statuses_samples)
335
            )
336
337
        else:
338
            submission_obj.status = READY
339
            submission_obj.message = "Submission validated with success"
340
            submission_obj.save()
341
            self.create_validation_summary(submission_obj,
342
                                           submission_statuses_animals,
343
                                           submission_statuses_samples)
344
345
            # send message with channel
346
            self.send_message(READY, submission_obj)
347
348
            logger.info(
349
                "Submission %s validated with success" % (submission_obj))
350
351
            logger.debug(
352
                "Results for submission %s: animals - %s, samples - %s" % (
353
                    submission_id, submission_statuses_animals,
354
                    submission_statuses_samples)
355
            )
356
357
        logger.info("Validate Submission completed")
358
359
        return "success"
360
361
    def validate_model(self, model, submission_statuses):
362
        logger.debug("Validating %s" % (model))
363
364
        # get data in biosample format
365
        data = model.to_biosample()
366
367
        # input is a list object
368
        usi_result = self.ruleset.check_usi_structure([data])
369
370
        # if I have errors here, JSON isn't valid: this is not an error
371
        # on user's data but on InjectTool itself
372
        if len(usi_result) > 0:
373
            # update counter for JSON
374
            submission_statuses.update({'JSON': len(usi_result)})
375
376
            # update model results
377
            self.mark_model(model, usi_result, NEED_REVISION)
378
379
            # It make no sense continue validation since JSON is wrong
380
            return
381
382
        # no check_duplicates: it checks against alias (that is a pk)
383
        # HINT: improve check_duplicates or implement database constraints
384
385
        # check against image metadata
386
        ruleset_result = self.ruleset.validate(data)
387
388
        # update status and track data in a overall variable
389
        self.update_statuses(submission_statuses, model, ruleset_result)
390
391
    # inspired from validation.deal_with_validation_results
392
    def update_statuses(self, submission_statuses, model, result):
393
        # get overall status (ie Pass, Error)
394
        overall = result.get_overall_status()
395
396
        # set model as valid even if has some warnings
397
        if overall in ["Pass", "Warning"]:
398
            self.mark_model(model, result, READY)
399
400
        else:
401
            self.mark_model(model, result, NEED_REVISION)
402
403
        # update a collections.Counter objects by key
404
        submission_statuses.update({overall})
405
406
    def has_errors_in_rules(self, submission_statuses):
407
        "Return True if there is any errors"""
408
409
        if submission_statuses["Error"] != 0:
410
            return True
411
        else:
412
            return False
413
414
    def has_warnings_in_rules(self, submission_statuses):
415
        "Return True if there is any warnings"""
416
417
        if submission_statuses["Warning"] != 0:
418
            return True
419
        else:
420
            return False
421
422
    def has_errors_in_json(self, submission_statuses):
423
        "Return True if there is any error in JSON"""
424
425
        return submission_statuses["JSON"] > 0
426
427
    def mark_model(self, model, result, status):
428
        """Set status to a model and instantiate a ValidationResult obj"""
429
430
        if isinstance(result, list):
431
            messages = result
432
            comparable_messages = result
433
            overall_status = "Wrong JSON structure"
434
435
        else:
436
            messages = result.get_messages()
437
            # get comparable messages for batch update
438
            comparable_messages = list()
439
            for result_set in result.result_set:
440
                comparable_messages.append(result_set.get_comparable_str())
441
            overall_status = result.get_overall_status()
442
443
        # Save all messages for validation summary
444
        if isinstance(model, Sample):
445
            # messages_samples might not exist when doing tests
446
            if not hasattr(self, 'messages_samples'):
447
                self.messages_samples = dict()
448
            for message in comparable_messages:
449
                self.messages_samples.setdefault(message, 0)
450
                self.messages_samples[message] += 1
451
        elif isinstance(model, Animal):
452
            # messages_animals might not exist when doing tests
453
            if not hasattr(self, 'messages_animals'):
454
                self.messages_animals = dict()
455
            for message in comparable_messages:
456
                self.messages_animals.setdefault(message, 0)
457
                self.messages_animals[message] += 1
458
459
        # get a validation result model or create a new one
460
        if hasattr(model.name, 'validationresult'):
461
            validationresult = model.name.validationresult
462
463
        else:
464
            validationresult = ValidationResultModel()
465
            model.name.validationresult = validationresult
466
467
        # setting valdiationtool results and save
468
        validationresult.messages = messages
469
        validationresult.status = overall_status
470
        validationresult.save()
471
472
        # ok, don't update Name statuses for submitted objects which
473
        # already are in biosamples and pass validation
474
        if model.name.status == COMPLETED and status == READY:
475
            logger.debug(
476
                "Ignoring %s: status was '%s' and validation is OK" % (
477
                    model, key2status[model.name.status]))
478
479
        else:
480
            logger.debug(
481
                "Marking %s with '%s' status (%s)" % (
482
                    model, key2status[status], messages))
483
484
            # update model status and save
485
            model.name.status = status
486
            model.name.save()
487
488
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
489
        """Mark a submission with NEED_REVISION status"""
490
491
        submission_obj.status = status
492
        submission_obj.message = ("Validation got errors: %s" % (message))
493
        submission_obj.save()
494
        self.send_message(status, submission_obj)
495
496
    def create_validation_summary(self, submission_obj,
497
                                  submission_statuses_animals,
498
                                  submission_statuses_samples):
499
        """
500
        This function will create ValidationSummary object that will be used
501
        on validation_summary view
502
        Args:
503
            submission_obj: submission ref which has gone through validation
504
            submission_statuses_animals: Counter with statuses for animals
505
            submission_statuses_samples: Counter with statuses for samples
506
        """
507
        for model_type in ['animal', 'sample']:
508
            try:
509
                validation_summary = submission_obj.validationsummary_set.get(
510
                    type=model_type
511
                )
512
            except ObjectDoesNotExist:
513
                validation_summary = ValidationSummary()
514
            if model_type == 'animal':
515
                messages = self.messages_animals
516
                submission_statuses = submission_statuses_animals
517
            elif model_type == 'sample':
518
                messages = self.messages_samples
519
                submission_statuses = submission_statuses_samples
520
            else:
521
                messages = dict()
522
                submission_statuses = dict()
523
            validation_summary.submission = submission_obj
524
            validation_summary.pass_count = submission_statuses.get('Pass', 0)
525
            validation_summary.warning_count = submission_statuses.get(
526
                'Warning', 0)
527
            validation_summary.error_count = submission_statuses.get(
528
                'Error', 0)
529
            validation_summary.json_count = submission_statuses.get('JSON', 0)
530
            validation_messages = list()
531
            for message, count in messages.items():
532
                validation_messages.append({
533
                    'message': message,
534
                    'count': count
535
                })
536
            validation_summary.messages = validation_messages
537
            validation_summary.type = model_type
538
            validation_summary.save()
539
540
541
# register explicitly tasks
542
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
543
celery_app.tasks.register(ValidateTask)
544