1
|
|
|
#!/usr/bin/env python3 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
""" |
4
|
|
|
Created on Fri Oct 5 11:22:33 2018 |
5
|
|
|
|
6
|
|
|
@author: Paolo Cozzi <[email protected]> |
7
|
|
|
|
8
|
|
|
Useful staff to deal with validation process |
9
|
|
|
|
10
|
|
|
""" |
11
|
|
|
|
12
|
|
|
import json |
13
|
|
|
import traceback |
14
|
|
|
import asyncio |
15
|
|
|
|
16
|
|
|
from collections import Counter |
17
|
|
|
from celery.utils.log import get_task_logger |
18
|
|
|
|
19
|
|
|
from django.conf import settings |
20
|
|
|
from django.core.mail import send_mass_mail |
21
|
|
|
|
22
|
|
|
from common.constants import ( |
23
|
|
|
READY, ERROR, LOADED, NEED_REVISION, COMPLETED, STATUSES) |
24
|
|
|
from common.helpers import send_message_to_websocket |
25
|
|
|
from image.celery import app as celery_app, MyTask |
26
|
|
|
from image_app.helpers import get_admin_emails |
27
|
|
|
from image_app.models import Submission, Sample, Animal |
28
|
|
|
|
29
|
|
|
from .models import ValidationResult as ValidationResultModel |
30
|
|
|
from .helpers import MetaDataValidation, OntologyCacheError, RulesetError |
31
|
|
|
|
32
|
|
|
# Get an instance of a logger |
33
|
|
|
logger = get_task_logger(__name__) |
34
|
|
|
|
35
|
|
|
# get a dictionary from status name (ie {0: 'Waiting'}) |
36
|
|
|
key2status = dict([x.value for x in STATUSES]) |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
# A class to deal with validation errors |
40
|
|
|
class ValidationError(Exception): |
41
|
|
|
pass |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
class ValidateTask(MyTask): |
45
|
|
|
name = "Validate Submission" |
46
|
|
|
description = """Validate submission data against IMAGE rules""" |
47
|
|
|
|
48
|
|
|
# http://docs.celeryproject.org/en/latest/userguide/tasks.html#instantiation |
49
|
|
|
# A task is not instantiated for every request, but is registered in |
50
|
|
|
# the task registry as a global instance. This means that the __init__ |
51
|
|
|
# constructor will only be called once per process, and that the |
52
|
|
|
# task class is semantically closer to an Actor. if you have a task and |
53
|
|
|
# you route every request to the same process, then it will keep state |
54
|
|
|
# between requests. This can also be useful to cache resources, For |
55
|
|
|
# example, a base Task class that caches a database connection |
56
|
|
|
|
57
|
|
|
def send_message(self, status, submission_obj): |
58
|
|
|
""" |
59
|
|
|
Update submission.status and submission message using django |
60
|
|
|
channels |
61
|
|
|
|
62
|
|
|
Args: |
63
|
|
|
status (int): a :py:class:`common.constants.STATUSES` object |
64
|
|
|
submission_obj (image_app.models.Submission): an UID submission |
65
|
|
|
object |
66
|
|
|
""" |
67
|
|
|
|
68
|
|
|
asyncio.get_event_loop().run_until_complete( |
69
|
|
|
send_message_to_websocket( |
70
|
|
|
{ |
71
|
|
|
'message': STATUSES.get_value_display(status), |
72
|
|
|
'notification_message': submission_obj.message |
73
|
|
|
}, |
74
|
|
|
submission_obj.pk |
75
|
|
|
) |
76
|
|
|
) |
77
|
|
|
|
78
|
|
|
def __generic_error_report( |
79
|
|
|
self, submission_obj, status, message, notify_admins=False): |
80
|
|
|
""" |
81
|
|
|
Generic report for updating submission objects and send email after |
82
|
|
|
an exception is called |
83
|
|
|
|
84
|
|
|
Args: |
85
|
|
|
submission_obj (image_app.models.Submission): an UID submission |
86
|
|
|
object |
87
|
|
|
status (int): a :py:class:`common.constants.STATUSES` object |
88
|
|
|
message (str): a text object |
89
|
|
|
notify_admins (bool): send mail to the admins or not |
90
|
|
|
""" |
91
|
|
|
|
92
|
|
|
# mark submission with its status |
93
|
|
|
submission_obj.status = status |
94
|
|
|
submission_obj.message = message |
95
|
|
|
submission_obj.save() |
96
|
|
|
|
97
|
|
|
self.send_message(status, submission_obj) |
98
|
|
|
|
99
|
|
|
# get exception info |
100
|
|
|
einfo = traceback.format_exc() |
101
|
|
|
|
102
|
|
|
# send a mail to the user with the stacktrace (einfo) |
103
|
|
|
email_subject = "Error in IMAGE Validation: %s" % (message) |
104
|
|
|
email_message = ( |
105
|
|
|
"Something goes wrong with validation. Please report " |
106
|
|
|
"this to InjectTool team\n\n %s" % str(einfo)) |
107
|
|
|
|
108
|
|
|
submission_obj.owner.email_user( |
109
|
|
|
email_subject, |
110
|
|
|
email_message, |
111
|
|
|
) |
112
|
|
|
|
113
|
|
|
if notify_admins: |
114
|
|
|
# submit mail to admins |
115
|
|
|
datatuple = ( |
116
|
|
|
email_subject, |
117
|
|
|
email_message, |
118
|
|
|
settings.DEFAULT_FROM_EMAIL, |
119
|
|
|
get_admin_emails()) |
120
|
|
|
|
121
|
|
|
send_mass_mail((datatuple, )) |
122
|
|
|
|
123
|
|
|
# Ovverride default on failure method |
124
|
|
|
# This is not a failed validation for a wrong value, this is an |
125
|
|
|
# error in task that mean an error in coding |
126
|
|
|
def on_failure(self, exc, task_id, args, kwargs, einfo): |
127
|
|
|
logger.error('{0!r} failed: {1!r}'.format(task_id, exc)) |
128
|
|
|
|
129
|
|
|
# define message |
130
|
|
|
message = "Unknown error in validation - %s" % str(exc) |
131
|
|
|
|
132
|
|
|
# get submissio object |
133
|
|
|
submission_obj = Submission.objects.get(pk=args[0]) |
134
|
|
|
|
135
|
|
|
# call generic report which update submission and send email |
136
|
|
|
self.__generic_error_report( |
137
|
|
|
submission_obj, ERROR, message, notify_admins=True) |
138
|
|
|
|
139
|
|
|
# returns None: this task will have the ERROR status |
140
|
|
|
|
141
|
|
|
# TODO: define a method to inform user for error in validation (Task run |
142
|
|
|
# with success but errors in data) |
143
|
|
|
|
144
|
|
|
def temporary_error_report(self, exc, submission_obj): |
145
|
|
|
""" |
146
|
|
|
Deal with known issues in validation task. Notify the user using |
147
|
|
|
email and set status as READY in order to recall this task |
148
|
|
|
|
149
|
|
|
Args: |
150
|
|
|
exc (Exception): an py:exc`Exception` object |
151
|
|
|
submission_obj (image_app.models.Submission): an UID submission |
152
|
|
|
object |
153
|
|
|
|
154
|
|
|
Return |
155
|
|
|
str: "success" since this task is correctly managed |
156
|
|
|
""" |
157
|
|
|
|
158
|
|
|
logger.error("Error in validation: %s" % exc) |
159
|
|
|
|
160
|
|
|
message = "Errors in EBI API endpoints. Please try again later" |
161
|
|
|
logger.error(message) |
162
|
|
|
|
163
|
|
|
# call generic report which update submission and send email |
164
|
|
|
self.__generic_error_report(submission_obj, LOADED, message) |
165
|
|
|
|
166
|
|
|
return "success" |
167
|
|
|
|
168
|
|
|
def ruleset_error_report(self, exc, submission_obj): |
169
|
|
|
""" |
170
|
|
|
Deal with ruleset issue in validation task. Notify the user using |
171
|
|
|
email and set status as ERROR, since he can't do anything without |
172
|
|
|
admin intervention |
173
|
|
|
|
174
|
|
|
Args: |
175
|
|
|
exc (Exception): an py:exc`Exception` object |
176
|
|
|
submission_obj (image_app.models.Submission): an UID submission |
177
|
|
|
object |
178
|
|
|
|
179
|
|
|
Return |
180
|
|
|
str: "success" since this task is correctly managed |
181
|
|
|
""" |
182
|
|
|
|
183
|
|
|
logger.error("Error ruleset: %s" % exc) |
184
|
|
|
|
185
|
|
|
message = ( |
186
|
|
|
"Error in IMAGE-metadata ruleset. Please inform InjectTool team") |
187
|
|
|
logger.error(message) |
188
|
|
|
|
189
|
|
|
# call generic report which update submission and send email |
190
|
|
|
self.__generic_error_report( |
191
|
|
|
submission_obj, ERROR, message, notify_admins=True) |
192
|
|
|
|
193
|
|
|
return "success" |
194
|
|
|
|
195
|
|
|
def run(self, submission_id): |
196
|
|
|
"""a function to perform validation steps""" |
197
|
|
|
|
198
|
|
|
logger.info("Validate Submission started") |
199
|
|
|
|
200
|
|
|
# get submissio object |
201
|
|
|
submission_obj = Submission.objects.get(pk=submission_id) |
202
|
|
|
|
203
|
|
|
# read rules when task starts. Model issues when starting |
204
|
|
|
# OntologyCache at start |
205
|
|
|
try: |
206
|
|
|
self.ruleset = MetaDataValidation() |
207
|
|
|
|
208
|
|
|
except OntologyCacheError as exc: |
209
|
|
|
return self.temporary_error_report(exc, submission_obj) |
210
|
|
|
|
211
|
|
|
except RulesetError as exc: |
212
|
|
|
return self.ruleset_error_report(exc, submission_obj) |
213
|
|
|
|
214
|
|
|
# track global statuses |
215
|
|
|
submission_statuses = Counter( |
216
|
|
|
{'Pass': 0, |
217
|
|
|
'Warning': 0, |
218
|
|
|
'Error': 0, |
219
|
|
|
'JSON': 0}) |
220
|
|
|
|
221
|
|
|
try: |
222
|
|
|
for animal in Animal.objects.filter( |
223
|
|
|
name__submission=submission_obj).order_by('id'): |
224
|
|
|
self.validate_model(animal, submission_statuses) |
225
|
|
|
|
226
|
|
|
for sample in Sample.objects.filter( |
227
|
|
|
name__submission=submission_obj).order_by('id'): |
228
|
|
|
self.validate_model(sample, submission_statuses) |
229
|
|
|
|
230
|
|
|
# TODO: errors in validation should raise custom exception |
231
|
|
|
except json.decoder.JSONDecodeError as exc: |
232
|
|
|
return self.temporary_error_report(exc, submission_obj) |
233
|
|
|
|
234
|
|
|
except Exception as exc: |
235
|
|
|
raise self.retry(exc=exc) |
236
|
|
|
|
237
|
|
|
# test for keys in submission_statuses |
238
|
|
|
statuses = sorted(submission_statuses.keys()) |
239
|
|
|
|
240
|
|
|
# if error messages changes in IMAGE-ValidationTool, all this |
241
|
|
|
# stuff isn't valid and I throw an exception |
242
|
|
|
if statuses != ['Error', 'JSON', 'Pass', 'Warning']: |
243
|
|
|
message = "Error in statuses for submission %s: %s" % ( |
244
|
|
|
submission_obj, statuses) |
245
|
|
|
|
246
|
|
|
# debug: print error in log |
247
|
|
|
logger.error(message) |
248
|
|
|
|
249
|
|
|
# mark submission with ERROR (this is not related to user data) |
250
|
|
|
# calling the appropriate method passing ERROR as status |
251
|
|
|
self.submission_fail(submission_obj, message, status=ERROR) |
252
|
|
|
|
253
|
|
|
# raise an exception since is an InjectTool issue |
254
|
|
|
raise ValidationError(message) |
255
|
|
|
|
256
|
|
|
# If I have any error in JSON is a problem of injectool |
257
|
|
|
if self.has_errors_in_json(submission_statuses): |
258
|
|
|
# mark submission with NEED_REVISION |
259
|
|
|
self.submission_fail(submission_obj, "Wrong JSON structure") |
260
|
|
|
|
261
|
|
|
# debug |
262
|
|
|
logger.warning( |
263
|
|
|
"Wrong JSON structure for submission %s" % (submission_obj)) |
264
|
|
|
|
265
|
|
|
logger.debug("Results for submission %s: %s" % ( |
266
|
|
|
submission_id, submission_statuses)) |
267
|
|
|
|
268
|
|
|
# set a proper value for status (READY or NEED_REVISION) |
269
|
|
|
# If I will found any error or warning, I will |
270
|
|
|
# return a message and I will set NEED_REVISION |
271
|
|
|
elif self.has_errors_in_rules(submission_statuses): |
272
|
|
|
message = ( |
273
|
|
|
"Error in metadata. Need revisions before submit") |
274
|
|
|
|
275
|
|
|
# mark submission with NEED_REVISION |
276
|
|
|
self.submission_fail(submission_obj, message) |
277
|
|
|
|
278
|
|
|
logger.warning( |
279
|
|
|
"Error in metadata for submission %s" % (submission_obj)) |
280
|
|
|
|
281
|
|
|
logger.debug("Results for submission %s: %s" % ( |
282
|
|
|
submission_id, submission_statuses)) |
283
|
|
|
|
284
|
|
|
# WOW: I can submit those data |
285
|
|
|
elif self.has_warnings_in_rules(submission_statuses): |
286
|
|
|
submission_obj.status = READY |
287
|
|
|
submission_obj.message = "Submission validated with some warnings" |
288
|
|
|
submission_obj.save() |
289
|
|
|
|
290
|
|
|
# send message with channel |
291
|
|
|
self.send_message(READY, submission_obj) |
292
|
|
|
|
293
|
|
|
logger.info( |
294
|
|
|
"Submission %s validated with some warning" % (submission_obj)) |
295
|
|
|
|
296
|
|
|
logger.debug("Results for submission %s: %s" % ( |
297
|
|
|
submission_id, submission_statuses)) |
298
|
|
|
|
299
|
|
|
else: |
300
|
|
|
submission_obj.status = READY |
301
|
|
|
submission_obj.message = "Submission validated with success" |
302
|
|
|
submission_obj.save() |
303
|
|
|
|
304
|
|
|
# send message with channel |
305
|
|
|
self.send_message(READY, submission_obj) |
306
|
|
|
|
307
|
|
|
logger.info( |
308
|
|
|
"Submission %s validated with success" % (submission_obj)) |
309
|
|
|
|
310
|
|
|
logger.debug("Results for submission %s: %s" % ( |
311
|
|
|
submission_id, submission_statuses)) |
312
|
|
|
|
313
|
|
|
logger.info("Validate Submission completed") |
314
|
|
|
|
315
|
|
|
return "success" |
316
|
|
|
|
317
|
|
|
def validate_model(self, model, submission_statuses): |
318
|
|
|
logger.debug("Validating %s" % (model)) |
319
|
|
|
|
320
|
|
|
# get data in biosample format |
321
|
|
|
data = model.to_biosample() |
322
|
|
|
|
323
|
|
|
# input is a list object |
324
|
|
|
usi_result = self.ruleset.check_usi_structure([data]) |
325
|
|
|
|
326
|
|
|
# if I have errors here, JSON isn't valid: this is not an error |
327
|
|
|
# on user's data but on InjectTool itself |
328
|
|
|
if len(usi_result) > 0: |
329
|
|
|
# update counter for JSON |
330
|
|
|
submission_statuses.update({'JSON': len(usi_result)}) |
331
|
|
|
|
332
|
|
|
# update model results |
333
|
|
|
self.mark_model(model, usi_result, NEED_REVISION) |
334
|
|
|
|
335
|
|
|
# It make no sense continue validation since JSON is wrong |
336
|
|
|
return |
337
|
|
|
|
338
|
|
|
# no check_duplicates: it checks against alias (that is a pk) |
339
|
|
|
# HINT: improve check_duplicates or implement database constraints |
340
|
|
|
|
341
|
|
|
# check against image metadata |
342
|
|
|
ruleset_result = self.ruleset.validate(data) |
343
|
|
|
|
344
|
|
|
# update status and track data in a overall variable |
345
|
|
|
self.update_statuses(submission_statuses, model, ruleset_result) |
346
|
|
|
|
347
|
|
|
# inspired from validation.deal_with_validation_results |
348
|
|
|
def update_statuses(self, submission_statuses, model, result): |
349
|
|
|
# get overall status (ie Pass, Error) |
350
|
|
|
overall = result.get_overall_status() |
351
|
|
|
|
352
|
|
|
# set model as valid even if has some warnings |
353
|
|
|
if overall in ["Pass", "Warning"]: |
354
|
|
|
self.mark_model(model, result, READY) |
355
|
|
|
|
356
|
|
|
else: |
357
|
|
|
self.mark_model(model, result, NEED_REVISION) |
358
|
|
|
|
359
|
|
|
# update a collections.Counter objects by key |
360
|
|
|
submission_statuses.update({overall}) |
361
|
|
|
|
362
|
|
|
def has_errors_in_rules(self, submission_statuses): |
363
|
|
|
"Return True if there is any errors""" |
364
|
|
|
|
365
|
|
|
if submission_statuses["Error"] != 0: |
366
|
|
|
return True |
367
|
|
|
else: |
368
|
|
|
return False |
369
|
|
|
|
370
|
|
|
def has_warnings_in_rules(self, submission_statuses): |
371
|
|
|
"Return True if there is any warnings""" |
372
|
|
|
|
373
|
|
|
if submission_statuses["Warning"] != 0: |
374
|
|
|
return True |
375
|
|
|
else: |
376
|
|
|
return False |
377
|
|
|
|
378
|
|
|
def has_errors_in_json(self, submission_statuses): |
379
|
|
|
"Return True if there is any error in JSON""" |
380
|
|
|
|
381
|
|
|
return submission_statuses["JSON"] > 0 |
382
|
|
|
|
383
|
|
|
def mark_model(self, model, result, status): |
384
|
|
|
"""Set status to a model and instantiate a ValidationResult obj""" |
385
|
|
|
|
386
|
|
|
if isinstance(result, list): |
387
|
|
|
messages = result |
388
|
|
|
overall_status = "Wrong JSON structure" |
389
|
|
|
|
390
|
|
|
else: |
391
|
|
|
messages = result.get_messages() |
392
|
|
|
overall_status = result.get_overall_status() |
393
|
|
|
|
394
|
|
|
# get a validation result model or create a new one |
395
|
|
|
if hasattr(model.name, 'validationresult'): |
396
|
|
|
validationresult = model.name.validationresult |
397
|
|
|
|
398
|
|
|
else: |
399
|
|
|
validationresult = ValidationResultModel() |
400
|
|
|
model.name.validationresult = validationresult |
401
|
|
|
|
402
|
|
|
# setting valdiationtool results and save |
403
|
|
|
validationresult.messages = messages |
404
|
|
|
validationresult.status = overall_status |
405
|
|
|
validationresult.save() |
406
|
|
|
|
407
|
|
|
# ok, don't update Name statuses for submitted objects which |
408
|
|
|
# already are in biosamples and pass validation |
409
|
|
|
if model.name.status == COMPLETED and status == READY: |
410
|
|
|
logger.debug( |
411
|
|
|
"Ignoring %s: status was '%s' and validation is OK" % ( |
412
|
|
|
model, key2status[model.name.status])) |
413
|
|
|
|
414
|
|
|
else: |
415
|
|
|
logger.debug( |
416
|
|
|
"Marking %s with '%s' status (%s)" % ( |
417
|
|
|
model, key2status[status], messages)) |
418
|
|
|
|
419
|
|
|
# update model status and save |
420
|
|
|
model.name.status = status |
421
|
|
|
model.name.save() |
422
|
|
|
|
423
|
|
|
def submission_fail(self, submission_obj, message, status=NEED_REVISION): |
424
|
|
|
"""Mark a submission with NEED_REVISION status""" |
425
|
|
|
|
426
|
|
|
submission_obj.status = status |
427
|
|
|
submission_obj.message = ("Validation got errors: %s" % (message)) |
428
|
|
|
submission_obj.save() |
429
|
|
|
|
430
|
|
|
# send message with channel |
431
|
|
|
self.send_message(status, submission_obj) |
432
|
|
|
|
433
|
|
|
|
434
|
|
|
# register explicitly tasks |
435
|
|
|
# https://github.com/celery/celery/issues/3744#issuecomment-271366923 |
436
|
|
|
celery_app.tasks.register(ValidateTask) |
437
|
|
|
|