Passed
Pull Request — master (#35)
by Paolo
02:08
created

ValidateSubmission.__has_key_in_rules()   A

Complexity

Conditions 3

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 9
rs 10
c 0
b 0
f 0
cc 3
nop 2
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Oct  5 11:22:33 2018
5
6
@author: Paolo Cozzi <[email protected]>
7
8
Useful staff to deal with validation process
9
10
"""
11
12
import json
13
import traceback
14
import asyncio
15
16
from collections import Counter
17
from celery.utils.log import get_task_logger
18
19
from django.conf import settings
20
from django.core.mail import send_mass_mail
21
22
from common.constants import (
23
    READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES, KNOWN_STATUSES)
24
from common.helpers import send_message_to_websocket
25
from validation.helpers import construct_validation_message
26
from image.celery import app as celery_app, MyTask
27
from image_app.helpers import get_admin_emails
28
from image_app.models import Submission, Sample, Animal
29
from validation.models import ValidationSummary
30
31
from .models import ValidationResult as ValidationResultModel
32
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError
33
34
# Get an instance of a logger
35
logger = get_task_logger(__name__)
36
37
# get a dictionary from status name (ie {0: 'Waiting'})
38
key2status = dict([x.value for x in STATUSES])
39
40
41
# A class to deal with validation errors
42
class ValidationError(Exception):
43
    pass
44
45
46
class ValidateSubmission(object):
47
    """
48
    An helper class for submission task, useful to pass parameters like
49
    submission data between tasks"""
50
51
    # define my class attributes
52
    def __init__(self, submission_obj, ruleset):
53
        # track submission object
54
        self.submission_obj = submission_obj
55
56
        # track ruleset
57
        self.ruleset = ruleset
58
59
        # collect all unique messages for samples and animals
60
        self.messages_animals = Counter()
61
        self.messages_samples = Counter()
62
63
        # track global statuses for animals and samples
64
        # Don't set keys: if you take a key which doesn't exists, you will
65
        # get 0 instead of key errors. This is how Counter differ from a
66
        # default dictionary object
67
        self.statuses_animals = Counter()
68
        self.statuses_samples = Counter()
69
70
    def check_valid_statuses(self):
71
        """Check if validation return with an unsupported status message"""
72
73
        # test for keys in model_statuses
74
        for key in self.statuses_animals.keys():
75
            if key not in KNOWN_STATUSES:
76
                logger.error("Unsupported status '%s' from validation" % key)
77
                return False
78
79
        for key in self.statuses_samples.keys():
80
            if key not in KNOWN_STATUSES:
81
                logger.error("Unsupported status '%s' from validation" % key)
82
                return False
83
84
        # if I arrive here, all validation statuses are handled
85
        return True
86
87
    def __has_key_in_rules(self, key):
88
        """Generic function to test errors in validation rules"""
89
90
        if (self.statuses_animals[key] > 0 or
91
                self.statuses_samples[key] > 0):
92
            return True
93
94
        else:
95
            return False
96
97
    def has_errors_in_rules(self):
98
        "Return True if there is any errors in validation rules"""
99
100
        return self.__has_key_in_rules('Error')
101
102
    def has_warnings_in_rules(self):
103
        "Return True if there is any warnings in validation rules"""
104
105
        return self.__has_key_in_rules('Warning')
106
107
    def has_errors_in_json(self):
108
        "Return True if there is any error in JSON"""
109
110
        return self.__has_key_in_rules('JSON')
111
112
    def validate_model(self, model):
113
        logger.debug("Validating %s" % (model))
114
115
        # thsi could be animal or sample
116
        if isinstance(model, Sample):
117
            model_statuses = self.statuses_samples
118
119
        elif isinstance(model, Animal):
120
            model_statuses = self.statuses_animals
121
122
        # get data in biosample format
123
        data = model.to_biosample()
124
125
        # input is a list object
126
        usi_result = self.ruleset.check_usi_structure([data])
127
128
        # if I have errors here, JSON isn't valid: this is not an error
129
        # on user's data but on InjectTool itself
130
        if len(usi_result) > 0:
131
            # update counter for JSON
132
            model_statuses.update({'JSON': len(usi_result)})
0 ignored issues
show
introduced by
The variable model_statuses does not seem to be defined for all execution paths.
Loading history...
133
            model_statuses.update(['Issues', 'Known'])
134
135
            # update model results
136
            self.mark_model(model, usi_result, NEED_REVISION)
137
138
            # It make no sense continue validation since JSON is wrong
139
            return
140
141
        # no check_duplicates: it checks against alias (that is a pk)
142
        # HINT: improve check_duplicates or implement database constraints
143
144
        # check against image metadata
145
        ruleset_result = self.ruleset.validate(data)
146
147
        # update status and track data in a overall variable
148
        self.update_statuses(model_statuses, model, ruleset_result)
149
150
    # inspired from validation.deal_with_validation_results
151
    def update_statuses(self, model_statuses, model, result):
152
        # get overall status (ie Pass, Error)
153
        overall = result.get_overall_status()
154
155
        # set model as valid even if has some warnings
156
        if overall in ["Pass", "Warning"]:
157
            self.mark_model(model, result, READY)
158
159
        else:
160
            model_statuses['Issues'] += 1
161
            self.mark_model(model, result, NEED_REVISION)
162
163
        # update a collections.Counter objects by key
164
        model_statuses.update({overall})
165
        model_statuses['Known'] += 1
166
167
    def mark_model(self, model, result, status):
168
        """Set status to a model and instantiate a ValidationResult obj"""
169
170
        if isinstance(result, list):
171
            messages = result
172
            comparable_messages = result
173
            overall_status = "Wrong JSON structure"
174
175
        else:
176
            messages = result.get_messages()
177
178
            # get comparable messages for batch update
179
            comparable_messages = list()
180
            for result_set in result.result_set:
181
                comparable_messages.append(result_set.get_comparable_str())
182
            overall_status = result.get_overall_status()
183
184
        # Save all messages for validation summary
185
        if isinstance(model, Sample):
186
            for message in comparable_messages:
187
                # messages_samples iss a counter object
188
                self.messages_samples.update({message})
189
190
        # is as an animal object
191
        elif isinstance(model, Animal):
192
            for message in comparable_messages:
193
                self.messages_animals.update({message})
194
195
        # get a validation result model or create a new one
196
        if hasattr(model.name, 'validationresult'):
197
            validationresult = model.name.validationresult
198
199
        else:
200
            validationresult = ValidationResultModel()
201
            model.name.validationresult = validationresult
202
203
        # setting valdiationtool results and save
204
        validationresult.messages = messages
205
        validationresult.status = overall_status
206
        validationresult.save()
207
208
        # ok, don't update Name statuses for submitted objects which
209
        # already are in biosamples and pass validation
210
        if model.name.status == COMPLETED and status == READY:
211
            logger.debug(
212
                "Ignoring %s: status was '%s' and validation is OK" % (
213
                    model, key2status[model.name.status]))
214
215
        else:
216
            logger.debug(
217
                "Marking %s with '%s' status (%s)" % (
218
                    model, key2status[status], messages))
219
220
            # update model status and save
221
            model.name.status = status
222
            model.name.save()
223
224
    def create_validation_summary(self):
225
        """
226
        This function will create ValidationSummary object that will be used
227
        on validation_summary view
228
        """
229
230
        for model_type in ['animal', 'sample']:
231
            summary_obj, created = ValidationSummary.objects.get_or_create(
232
                submission=self.submission_obj, type=model_type)
233
234
            if created:
235
                logger.debug(
236
                    "Created %s validationSummary for %s" % (
237
                        model_type, self.submission_obj))
238
239
            # reset all_count
240
            summary_obj.reset_all_count()
241
242
            if model_type == 'animal':
243
                messages = self.messages_animals
244
                model_statuses = self.statuses_animals
245
246
            # Im cycling with animal and sample type
247
            else:
248
                messages = self.messages_samples
249
                model_statuses = self.statuses_samples
250
251
            summary_obj.submission = self.submission_obj
252
253
            # they are counter object, so no Keyerror and returns 0
254
            summary_obj.pass_count = model_statuses['Pass']
255
            summary_obj.warning_count = model_statuses['Warning']
256
            summary_obj.error_count = model_statuses['Error']
257
            summary_obj.json_count = model_statuses['JSON']
258
            summary_obj.issues_count = model_statuses['Issues']
259
            summary_obj.validation_known_count = model_statuses['Known']
260
261
            validation_messages = list()
262
263
            for message, count in messages.items():
264
                validation_messages.append({
265
                    'message': message,
266
                    'count': count
267
                })
268
269
            summary_obj.messages = validation_messages
270
            summary_obj.type = model_type
271
            summary_obj.save()
272
273
        logger.debug(
274
            "Results for submission %s: animals - %s, samples - %s" % (
275
                self.submission_obj,
276
                dict(self.statuses_animals),
277
                dict(self.statuses_samples))
278
        )
279
280
281
class ValidateTask(MyTask):
282
    name = "Validate Submission"
283
    description = """Validate submission data against IMAGE rules"""
284
285
    # http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation
286
    # A task is not instantiated for every request, but is registered in
287
    # the task registry as a global instance. This means that the __init__
288
    # constructor will only be called once per process, and that the
289
    # task class is semantically closer to an Actor. if you have a task and
290
    # you route every request to the same process, then it will keep state
291
    # between requests. This can also be useful to cache resources, For
292
    # example, a base Task class that caches a database connection
293
294
    # TODO: extract a generic send_message for all modules which need it
295
    def send_message(self, status, submission_obj):
296
        """
297
        Update submission.status and submission message using django
298
        channels
299
300
        Args:
301
            status (int): a :py:class:`common.constants.STATUSES` object
302
            submission_obj (image_app.models.Submission): an UID submission
303
            object
304
        """
305
306
        asyncio.get_event_loop().run_until_complete(
307
            send_message_to_websocket(
308
                {
309
                    'message': STATUSES.get_value_display(status),
310
                    'notification_message': submission_obj.message,
311
                    'validation_message': construct_validation_message(
312
                            submission_obj)
313
                },
314
                submission_obj.pk
315
            )
316
        )
317
318
    def __generic_error_report(
319
            self, submission_obj, status, message, notify_admins=False):
320
        """
321
        Generic report for updating submission objects and send email after
322
        an exception is called
323
324
        Args:
325
            submission_obj (image_app.models.Submission): an UID submission
326
            object
327
            status (int): a :py:class:`common.constants.STATUSES` object
328
            message (str): a text object
329
            notify_admins (bool): send mail to the admins or not
330
        """
331
332
        # mark submission with its status
333
        submission_obj.status = status
334
        submission_obj.message = message
335
        submission_obj.save()
336
337
        self.send_message(status, submission_obj)
338
339
        # get exception info
340
        einfo = traceback.format_exc()
341
342
        # send a mail to the user with the stacktrace (einfo)
343
        email_subject = "Error in IMAGE Validation: %s" % (message)
344
        email_message = (
345
            "Something goes wrong with validation. Please report "
346
            "this to InjectTool team\n\n %s" % str(einfo))
347
348
        submission_obj.owner.email_user(
349
            email_subject,
350
            email_message,
351
        )
352
353
        # TODO: should this be a common.helpers method?
354
        if notify_admins:
355
            # submit mail to admins
356
            datatuple = (
357
                email_subject,
358
                email_message,
359
                settings.DEFAULT_FROM_EMAIL,
360
                get_admin_emails())
361
362
            send_mass_mail((datatuple, ))
363
364
    # Ovverride default on failure method
365
    # This is not a failed validation for a wrong value, this is an
366
    # error in task that mean an error in coding
367
    def on_failure(self, exc, task_id, args, kwargs, einfo):
368
        logger.error('{0!r} failed: {1!r}'.format(task_id, exc))
369
370
        # define message
371
        message = "Unknown error in validation - %s" % str(exc)
372
373
        # get submissio object
374
        submission_obj = Submission.objects.get(pk=args[0])
375
376
        # call generic report which update submission and send email
377
        self.__generic_error_report(
378
            submission_obj, ERROR, message, notify_admins=True)
379
380
        # returns None: this task will have the ERROR status
381
382
    # TODO: define a method to inform user for error in validation (Task run
383
    # with success but errors in data)
384
385
    def temporary_error_report(self, exc, submission_obj):
386
        """
387
        Deal with known issues in validation task. Notify the user using
388
        email and set status as READY in order to recall this task
389
390
        Args:
391
            exc (Exception): an py:exc`Exception` object
392
            submission_obj (image_app.models.Submission): an UID submission
393
            object
394
395
        Return
396
            str: "success" since this task is correctly managed
397
        """
398
399
        logger.error("Error in validation: %s" % exc)
400
401
        message = "Errors in EBI API endpoints. Please try again later"
402
        logger.error(message)
403
404
        # call generic report which update submission and send email
405
        self.__generic_error_report(submission_obj, LOADED, message)
406
407
        return "success"
408
409
    def ruleset_error_report(self, exc, submission_obj):
410
        """
411
        Deal with ruleset issue in validation task. Notify the user using
412
        email and set status as ERROR, since he can't do anything without
413
        admin intervention
414
415
        Args:
416
            exc (Exception): an py:exc`Exception` object
417
            submission_obj (image_app.models.Submission): an UID submission
418
            object
419
420
        Return
421
            str: "success" since this task is correctly managed
422
        """
423
424
        logger.error("Error ruleset: %s" % exc)
425
426
        message = (
427
            "Error in IMAGE-metadata ruleset. Please inform InjectTool team")
428
        logger.error(message)
429
430
        # call generic report which update submission and send email
431
        self.__generic_error_report(
432
            submission_obj, ERROR, message, notify_admins=True)
433
434
        return "success"
435
436
    def run(self, submission_id):
437
        """a function to perform validation steps"""
438
439
        logger.info("Validate Submission started")
440
441
        # get submissio object
442
        submission_obj = Submission.objects.get(pk=submission_id)
443
444
        # read rules when task starts. Model issues when starting
445
        # OntologyCache at start
446
        try:
447
            self.ruleset = MetaDataValidation()
448
449
        except OntologyCacheError as exc:
450
            return self.temporary_error_report(exc, submission_obj)
451
452
        except RulesetError as exc:
453
            return self.ruleset_error_report(exc, submission_obj)
454
455
        # get a submission data helper instance
456
        validate_submission = ValidateSubmission(submission_obj, self.ruleset)
457
458
        try:
459
            for animal in Animal.objects.filter(
460
                    name__submission=submission_obj).order_by('id'):
461
                validate_submission.validate_model(animal)
462
463
            for sample in Sample.objects.filter(
464
                    name__submission=submission_obj).order_by('id'):
465
                validate_submission.validate_model(sample)
466
467
        # TODO: errors in validation should raise custom exception
468
        except json.decoder.JSONDecodeError as exc:
469
            return self.temporary_error_report(exc, submission_obj)
470
471
        except Exception as exc:
472
            raise self.retry(exc=exc)
473
474
        # if error messages changes in IMAGE-ValidationTool, all this
475
        # stuff isn't valid and I throw an exception
476
477
        if not validate_submission.check_valid_statuses():
478
            message = (
479
                "Unsupported validation status for submission %s" % (
480
                    submission_obj))
481
482
            # debug: print error in log
483
            logger.error(message)
484
485
            # create validation summary
486
            validate_submission.create_validation_summary()
487
488
            # mark submission with ERROR (this is not related to user data)
489
            # calling the appropriate method passing ERROR as status
490
            self.submission_fail(submission_obj, message, status=ERROR)
491
492
            # raise an exception since is an InjectTool issue
493
            raise ValidationError(message)
494
495
        # If I have any error in JSON is a problem of injectool
496
        if validate_submission.has_errors_in_json():
497
            # create validation summary
498
            validate_submission.create_validation_summary()
499
500
            # mark submission with NEED_REVISION
501
            self.submission_fail(submission_obj, "Wrong JSON structure")
502
503
            # debug
504
            logger.warning(
505
                "Wrong JSON structure for submission %s" % (submission_obj))
506
507
        # set a proper value for status (READY or NEED_REVISION)
508
        # If I will found any error or warning, I will
509
        # return a message and I will set NEED_REVISION
510
        elif validate_submission.has_errors_in_rules():
511
            # create validation summary
512
            validate_submission.create_validation_summary()
513
514
            message = (
515
                "Error in metadata. Need revisions before submit")
516
517
            # mark submission with NEED_REVISION
518
            self.submission_fail(submission_obj, message)
519
520
            logger.warning(
521
                "Error in metadata for submission %s" % (submission_obj))
522
523
        # WOW: I can submit those data
524
        elif validate_submission.has_warnings_in_rules():
525
            # create validation summary
526
            validate_submission.create_validation_summary()
527
528
            message = "Submission validated with some warnings"
529
530
            # mark submission with READY status
531
            self.submission_ready(submission_obj, message)
532
533
            logger.info(
534
                "Submission %s validated with some warning" % (submission_obj))
535
536
        else:
537
            # create validation summary
538
            validate_submission.create_validation_summary()
539
540
            message = "Submission validated with success"
541
542
            # mark submission with READY status
543
            self.submission_ready(submission_obj, message)
544
545
            logger.info(
546
                "Submission %s validated with success" % (submission_obj))
547
548
        logger.info("Validate Submission completed")
549
550
        return "success"
551
552
    def __mark_submission(self, submission_obj, message, status):
553
        """Mark submission with status and message"""
554
555
        submission_obj.status = status
556
        submission_obj.message = message
557
        submission_obj.save()
558
559
        self.send_message(status, submission_obj)
560
561
    def submission_fail(self, submission_obj, message, status=NEED_REVISION):
562
        """Mark a submission with NEED_REVISION status"""
563
564
        # ovverride message
565
        message = ("Validation got errors: %s" % (message))
566
        self.__mark_submission(submission_obj, message, status)
567
568
    def submission_ready(self, submission_obj, message):
569
        """Mark a submission with READY status"""
570
571
        self.__mark_submission(submission_obj, message, READY)
572
573
574
# register explicitly tasks
575
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
576
celery_app.tasks.register(ValidateTask)
577