Completed
Pull Request — master (#30)
by
unknown
13:01
created

ValidateTask.__generic_error_report()   A

Complexity

Conditions 2

Size

Total Lines 44
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 44
rs 9.376
c 0
b 0
f 0
cc 2
nop 5
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
import asyncio
15
16
from collections import Counter
17
from celery.utils.log import get_task_logger
18
19
from django.conf import settings
20
from django.core.mail import send_mass_mail
21
from django.core.exceptions import ObjectDoesNotExist
22
23
from common.constants import (
24
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES)
25
from common.helpers import send_message_to_websocket
26
from validation.helpers import construct_validation_message
27
from image.celery import app as celery_app, MyTask
28
from image_app.helpers import get_admin_emails
29
from image_app.models import Submission, Sample, Animal
30
from validation.models import ValidationSummary
31
32
from .models import ValidationResult as ValidationResultModel
33
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
34
35
# Get an instance of a logger
36
logger = get_task_logger(__name__)
37
38
# get a dictionary from status name (ie {0: 'Waiting'})
39
key2status = dict([x.value for x in STATUSES])
40
41
42
# A class to deal with validation errors
43
class ValidationError(Exception):
44
    pass
45
46
47
class ValidateTask(MyTask):
48
    name = "Validate Submission"
49
    description = """Validate submission data against IMAGE rules"""
50
51
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
52
    # A task is not instantiated for every request, but is registered in
53
    # the task registry as a global instance. This means that the __init__
54
    # constructor will only be called once per process, and that the
55
    # task class is semantically closer to an Actor. if you have a task and
56
    # you route every request to the same process, then it will keep state
57
    # between requests. This can also be useful to cache resources, For
58
    # example, a base Task class that caches a database connection
59
60
    def send_message(self, status, submission_obj):
61
        """
62
        Update submission.status and submission message using django
63
        channels
64
65
        Args:
66
            status (int): a :py:class:`common.constants.STATUSES` object
67
            submission_obj (image_app.models.Submission): an UID submission
68
            object
69
        """
70
71
        asyncio.get_event_loop().run_until_complete(
72
            send_message_to_websocket(
73
                {
74
                    'message': STATUSES.get_value_display(status),
75
                    'notification_message': submission_obj.message,
76
                    'validation_message': construct_validation_message(
77
                            submission_obj)
78
                },
79
                submission_obj.pk
80
            )
81
        )
82
83
    def __generic_error_report(
84
            self, submission_obj, status, message, notify_admins=False):
85
        """
86
        Generic report for updating submission objects and send email after
87
        an exception is called
88
89
        Args:
90
            submission_obj (image_app.models.Submission): an UID submission
91
            object
92
            status (int): a :py:class:`common.constants.STATUSES` object
93
            message (str): a text object
94
            notify_admins (bool): send mail to the admins or not
95
        """
96
97
        # mark submission with its status
98
        submission_obj.status = status
99
        submission_obj.message = message
100
        submission_obj.save()
101
102
        self.send_message(status, submission_obj)
103
104
        # get exception info
105
        einfo = traceback.format_exc()
106
107
        # send a mail to the user with the stacktrace (einfo)
108
        email_subject = "Error in IMAGE Validation: %s" % (message)
109
        email_message = (
110
            "Something goes wrong with validation. Please report "
111
            "this to InjectTool team\n\n %s" % str(einfo))
112
113
        submission_obj.owner.email_user(
114
            email_subject,
115
            email_message,
116
        )
117
118
        if notify_admins:
119
            # submit mail to admins
120
            datatuple = (
121
                email_subject,
122
                email_message,
123
                settings.DEFAULT_FROM_EMAIL,
124
                get_admin_emails())
125
126
            send_mass_mail((datatuple, ))
127
128
    # Ovverride default on failure method
129
    # This is not a failed validation for a wrong value, this is an
130
    # error in task that mean an error in coding
131
    def on_failure(self, exc, task_id, args, kwargs, einfo):
132
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
133
134
        # define message
135
        message = "Unknown error in validation - %s" % str(exc)
136
137
        # get submissio object
138
        submission_obj = Submission.objects.get(pk=args[0])
139
140
        # call generic report which update submission and send email
141
        self.__generic_error_report(
142
            submission_obj, ERROR, message, notify_admins=True)
143
144
        # returns None: this task will have the ERROR status
145
146
    # TODO: define a method to inform user for error in validation (Task run
147
    # with success but errors in data)
148
149
    def temporary_error_report(self, exc, submission_obj):
150
        """
151
        Deal with known issues in validation task. Notify the user using
152
        email and set status as READY in order to recall this task
153
154
        Args:
155
            exc (Exception): an py:exc`Exception` object
156
            submission_obj (image_app.models.Submission): an UID submission
157
            object
158
159
        Return
160
            str: "success" since this task is correctly managed
161
        """
162
163
        logger.error("Error in validation: %s" % exc)
164
165
        message = "Errors in EBI API endpoints. Please try again later"
166
        logger.error(message)
167
168
        # call generic report which update submission and send email
169
        self.__generic_error_report(submission_obj, LOADED, message)
170
171
        return "success"
172
173
    def ruleset_error_report(self, exc, submission_obj):
174
        """
175
        Deal with ruleset issue in validation task. Notify the user using
176
        email and set status as ERROR, since he can't do anything without
177
        admin intervention
178
179
        Args:
180
            exc (Exception): an py:exc`Exception` object
181
            submission_obj (image_app.models.Submission): an UID submission
182
            object
183
184
        Return
185
            str: "success" since this task is correctly managed
186
        """
187
188
        logger.error("Error ruleset: %s" % exc)
189
190
        message = (
191
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
192
        logger.error(message)
193
194
        # call generic report which update submission and send email
195
        self.__generic_error_report(
196
            submission_obj, ERROR, message, notify_admins=True)
197
198
        return "success"
199
200
    def run(self, submission_id):
201
        """a function to perform validation steps"""
202
203
        logger.info("Validate Submission started")
204
205
        # collect all unique messages for samples and animals
206
        self.messages_samples = dict()
207
        self.messages_animals = dict()
208
209
        # get submissio object
210
        submission_obj = Submission.objects.get(pk=submission_id)
211
212
        # read rules when task starts. Model issues when starting
213
        # OntologyCache at start
214
        try:
215
            self.ruleset = MetaDataValidation()
216
217
        except OntologyCacheError as exc:
218
            return self.temporary_error_report(exc, submission_obj)
219
220
        except RulesetError as exc:
221
            return self.ruleset_error_report(exc, submission_obj)
222
223
        # track global statuses for animals and samples
224
        submission_statuses_animals = Counter(
225
            {'Pass': 0,
226
             'Warning': 0,
227
             'Error': 0,
228
             'JSON': 0})
229
230
        submission_statuses_samples = Counter(
231
            {'Pass': 0,
232
             'Warning': 0,
233
             'Error': 0,
234
             'JSON': 0})
235
236
        try:
237
            for animal in Animal.objects.filter(
238
                    name__submission=submission_obj).order_by('id'):
239
                self.validate_model(animal, submission_statuses_animals)
240
241
            for sample in Sample.objects.filter(
242
                    name__submission=submission_obj).order_by('id'):
243
                self.validate_model(sample, submission_statuses_samples)
244
245
        # TODO: errors in validation should raise custom exception
246
        except json.decoder.JSONDecodeError as exc:
247
            return self.temporary_error_report(exc, submission_obj)
248
249
        except Exception as exc:
250
            raise self.retry(exc=exc)
251
252
        # test for keys in submission_statuses
253
        statuses_animals = sorted(submission_statuses_animals.keys())
254
        statuses_samples = sorted(submission_statuses_samples.keys())
255
256
        # if error messages changes in IMAGE-ValidationTool, all this
257
        # stuff isn't valid and I throw an exception
258
        if statuses_animals != ['Error', 'JSON', 'Pass', 'Warning'] or \
259
                statuses_samples != ['Error', 'JSON', 'Pass', 'Warning']:
260
            message = "Error in statuses for submission %s: animals - %s, " \
261
                      "samples - %s" % (submission_obj, statuses_animals,
262
                                        statuses_samples)
263
264
            # debug: print error in log
265
            logger.error(message)
266
267
            # mark submission with ERROR (this is not related to user data)
268
            # calling the appropriate method passing ERROR as status
269
            self.submission_fail(submission_obj, message, status=ERROR)
270
271
            # raise an exception since is an InjectTool issue
272
            raise ValidationError(message)
273
274
        # If I have any error in JSON is a problem of injectool
275
        if self.has_errors_in_json(submission_statuses_animals) or \
276
                self.has_errors_in_json(submission_statuses_samples):
277
            # mark submission with NEED_REVISION
278
            self.submission_fail(submission_obj, "Wrong JSON structure")
279
            self.create_validation_summary(submission_obj,
280
                                           submission_statuses_animals,
281
                                           submission_statuses_samples)
282
283
            # debug
284
            logger.warning(
285
                "Wrong JSON structure for submission %s" % (submission_obj))
286
287
            logger.debug(
288
                "Results for submission %s: animals - %s, samples - %s" % (
289
                    submission_id, submission_statuses_animals,
290
                    submission_statuses_samples)
291
            )
292
293
        # set a proper value for status (READY or NEED_REVISION)
294
        # If I will found any error or warning, I will
295
        # return a message and I will set NEED_REVISION
296
        elif self.has_errors_in_rules(submission_statuses_animals) or \
297
                self.has_errors_in_rules(submission_statuses_samples):
298
            message = (
299
                "Error in metadata. Need revisions before submit")
300
301
            # mark submission with NEED_REVISION
302
            self.submission_fail(submission_obj, message)
303
            self.create_validation_summary(submission_obj,
304
                                           submission_statuses_animals,
305
                                           submission_statuses_samples)
306
307
            logger.warning(
308
                "Error in metadata for submission %s" % (submission_obj))
309
310
            logger.debug(
311
                "Results for submission %s: animals - %s, samples - %s" % (
312
                    submission_id, submission_statuses_animals,
313
                    submission_statuses_samples)
314
            )
315
316
        # WOW: I can submit those data
317
        elif self.has_warnings_in_rules(submission_statuses_animals) or \
318
                self.has_warnings_in_rules(submission_statuses_samples):
319
            submission_obj.status = READY
320
            submission_obj.message = "Submission validated with some warnings"
321
            submission_obj.save()
322
            self.create_validation_summary(submission_obj,
323
                                           submission_statuses_animals,
324
                                           submission_statuses_samples)
325
326
            # send message with channel
327
            self.send_message(READY, submission_obj)
328
329
            logger.info(
330
                "Submission %s validated with some warning" % (submission_obj))
331
332
            logger.debug(
333
                "Results for submission %s: animals - %s, samples - %s" % (
334
                    submission_id, submission_statuses_animals,
335
                    submission_statuses_samples)
336
            )
337
338
        else:
339
            submission_obj.status = READY
340
            submission_obj.message = "Submission validated with success"
341
            submission_obj.save()
342
            self.create_validation_summary(submission_obj,
343
                                           submission_statuses_animals,
344
                                           submission_statuses_samples)
345
346
            # send message with channel
347
            self.send_message(READY, submission_obj)
348
349
            logger.info(
350
                "Submission %s validated with success" % (submission_obj))
351
352
            logger.debug(
353
                "Results for submission %s: animals - %s, samples - %s" % (
354
                    submission_id, submission_statuses_animals,
355
                    submission_statuses_samples)
356
            )
357
358
        logger.info("Validate Submission completed")
359
360
        return "success"
361
362
    def validate_model(self, model, submission_statuses):
363
        logger.debug("Validating %s" % (model))
364
365
        # get data in biosample format
366
        data = model.to_biosample()
367
368
        # input is a list object
369
        usi_result = self.ruleset.check_usi_structure([data])
370
371
        # if I have errors here, JSON isn't valid: this is not an error
372
        # on user's data but on InjectTool itself
373
        if len(usi_result) > 0:
374
            # update counter for JSON
375
            submission_statuses.update({'JSON': len(usi_result)})
376
377
            # update model results
378
            self.mark_model(model, usi_result, NEED_REVISION)
379
380
            # It make no sense continue validation since JSON is wrong
381
            return
382
383
        # no check_duplicates: it checks against alias (that is a pk)
384
        # HINT: improve check_duplicates or implement database constraints
385
386
        # check against image metadata
387
        ruleset_result = self.ruleset.validate(data)
388
389
        # update status and track data in a overall variable
390
        self.update_statuses(submission_statuses, model, ruleset_result)
391
392
    # inspired from validation.deal_with_validation_results
393
    def update_statuses(self, submission_statuses, model, result):
394
        # get overall status (ie Pass, Error)
395
        overall = result.get_overall_status()
396
397
        # set model as valid even if has some warnings
398
        if overall in ["Pass", "Warning"]:
399
            self.mark_model(model, result, READY)
400
401
        else:
402
            self.mark_model(model, result, NEED_REVISION)
403
404
        # update a collections.Counter objects by key
405
        submission_statuses.update({overall})
406
407
    def has_errors_in_rules(self, submission_statuses):
408
        "Return True if there is any errors"""
409
410
        if submission_statuses["Error"] != 0:
411
            return True
412
        else:
413
            return False
414
415
    def has_warnings_in_rules(self, submission_statuses):
416
        "Return True if there is any warnings"""
417
418
        if submission_statuses["Warning"] != 0:
419
            return True
420
        else:
421
            return False
422
423
    def has_errors_in_json(self, submission_statuses):
424
        "Return True if there is any error in JSON"""
425
426
        return submission_statuses["JSON"] > 0
427
428
    def mark_model(self, model, result, status):
429
        """Set status to a model and instantiate a ValidationResult obj"""
430
431
        if isinstance(result, list):
432
            messages = result
433
            comparable_messages = result
434
            overall_status = "Wrong JSON structure"
435
436
        else:
437
            messages = result.get_messages()
438
            # get comparable messages for batch update
439
            comparable_messages = list()
440
            for result_set in result.result_set:
441
                comparable_messages.append(result_set.get_comparable_str())
442
            overall_status = result.get_overall_status()
443
444
        # Save all messages for validation summary
445
        if isinstance(model, Sample):
446
            # messages_samples might not exist when doing tests
447
            if not hasattr(self, 'messages_samples'):
448
                self.messages_samples = dict()
449
            for message in comparable_messages:
450
                self.messages_samples.setdefault(message, 0)
451
                self.messages_samples[message] += 1
452
        elif isinstance(model, Animal):
453
            # messages_animals might not exist when doing tests
454
            if not hasattr(self, 'messages_animals'):
455
                self.messages_animals = dict()
456
            for message in comparable_messages:
457
                self.messages_animals.setdefault(message, 0)
458
                self.messages_animals[message] += 1
459
460
        # get a validation result model or create a new one
461
        if hasattr(model.name, 'validationresult'):
462
            validationresult = model.name.validationresult
463
464
        else:
465
            validationresult = ValidationResultModel()
466
            model.name.validationresult = validationresult
467
468
        # setting valdiationtool results and save
469
        validationresult.messages = messages
470
        validationresult.status = overall_status
471
        validationresult.save()
472
473
        # ok, don't update Name statuses for submitted objects which
474
        # already are in biosamples and pass validation
475
        if model.name.status == COMPLETED and status == READY:
476
            logger.debug(
477
                "Ignoring %s: status was '%s' and validation is OK" % (
478
                    model, key2status[model.name.status]))
479
480
        else:
481
            logger.debug(
482
                "Marking %s with '%s' status (%s)" % (
483
                    model, key2status[status], messages))
484
485
            # update model status and save
486
            model.name.status = status
487
            model.name.save()
488
489
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
490
        """Mark a submission with NEED_REVISION status"""
491
492
        submission_obj.status = status
493
        submission_obj.message = ("Validation got errors: %s" % (message))
494
        submission_obj.save()
495
        self.send_message(status, submission_obj)
496
497
    def create_validation_summary(self, submission_obj,
498
                                  submission_statuses_animals,
499
                                  submission_statuses_samples):
500
        """
501
        This function will create ValidationSummary object that will be used
502
        on validation_summary view
503
        Args:
504
            submission_obj: submission ref which has gone through validation
505
            submission_statuses_animals: Counter with statuses for animals
506
            submission_statuses_samples: Counter with statuses for samples
507
        """
508
        for model_type in ['animal', 'sample']:
509
            try:
510
                validation_summary = submission_obj.validationsummary_set.get(
511
                    type=model_type
512
                )
513
            except ObjectDoesNotExist:
514
                validation_summary = ValidationSummary()
515
            if model_type == 'animal':
516
                messages = self.messages_animals
517
                submission_statuses = submission_statuses_animals
518
            elif model_type == 'sample':
519
                messages = self.messages_samples
520
                submission_statuses = submission_statuses_samples
521
            else:
522
                messages = dict()
523
                submission_statuses = dict()
524
            validation_summary.submission = submission_obj
525
            validation_summary.pass_count = submission_statuses.get('Pass', 0)
526
            validation_summary.warning_count = submission_statuses.get(
527
                'Warning', 0)
528
            validation_summary.error_count = submission_statuses.get(
529
                'Error', 0)
530
            validation_summary.json_count = submission_statuses.get('JSON', 0)
531
            validation_messages = list()
532
            for message, count in messages.items():
533
                validation_messages.append({
534
                    'message': message,
535
                    'count': count
536
                })
537
            validation_summary.messages = validation_messages
538
            validation_summary.type = model_type
539
            validation_summary.save()
540
541
542
# register explicitly tasks
543
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
544
celery_app.tasks.register(ValidateTask)
545