1
|
|
|
#!/usr/bin/env python3 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
""" |
4
|
|
|
Created on Tue Feb 19 16:15:35 2019 |
5
|
|
|
|
6
|
|
|
@author: Paolo Cozzi <[email protected]> |
7
|
|
|
""" |
8
|
|
|
|
9
|
|
|
import json |
10
|
|
|
import logging |
11
|
|
|
import requests |
12
|
|
|
|
13
|
|
|
from django.core.exceptions import ObjectDoesNotExist |
14
|
|
|
|
15
|
|
|
from image_validation import validation, ValidationResult |
16
|
|
|
from image_validation.static_parameters import ruleset_filename as \ |
17
|
|
|
IMAGE_RULESET |
18
|
|
|
|
19
|
|
|
from common.constants import BIOSAMPLE_URL |
20
|
|
|
from biosample.helpers import parse_image_alias, get_model_object |
21
|
|
|
from validation.models import ValidationSummary |
22
|
|
|
|
23
|
|
|
# Get an instance of a logger |
24
|
|
|
logger = logging.getLogger(__name__) |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
# a class to deal with temporary issues from EBI servers |
28
|
|
|
class OntologyCacheError(Exception): |
29
|
|
|
"""Identifies temporary issues with EBI servers and |
30
|
|
|
image_validation.use_ontology.OntologyCache objects""" |
31
|
|
|
|
32
|
|
|
|
33
|
|
|
# a class to deal with errors in ruleset (that are not user errors but |
34
|
|
|
# errors within InjectTool and image_validation library) |
35
|
|
|
class RulesetError(Exception): |
36
|
|
|
"""Indentifies errors in ruleset""" |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
class MetaDataValidation(): |
40
|
|
|
"""A class to deal with IMAGE-ValidationTool ruleset objects""" |
41
|
|
|
|
42
|
|
|
ruleset = None |
43
|
|
|
|
44
|
|
|
def __init__(self, ruleset_filename=IMAGE_RULESET): |
45
|
|
|
self.read_in_ruleset(ruleset_filename) |
46
|
|
|
|
47
|
|
|
# check validation rules |
48
|
|
|
ruleset_errors = self.check_ruleset() |
49
|
|
|
|
50
|
|
|
if ruleset_errors != []: |
51
|
|
|
raise RulesetError( |
52
|
|
|
"Error with ruleset: %s" % "; ".join(ruleset_errors)) |
53
|
|
|
|
54
|
|
|
def read_in_ruleset(self, ruleset_filename): |
55
|
|
|
try: |
56
|
|
|
self.ruleset = validation.read_in_ruleset(ruleset_filename) |
57
|
|
|
|
58
|
|
|
except json.JSONDecodeError as message: |
59
|
|
|
logger.error( |
60
|
|
|
"Error with 'https://www.ebi.ac.uk/ols/api/': %s" % ( |
61
|
|
|
str(message))) |
62
|
|
|
|
63
|
|
|
raise OntologyCacheError( |
64
|
|
|
"Issue with 'https://www.ebi.ac.uk/ols/api/'") |
65
|
|
|
|
66
|
|
|
def check_usi_structure(self, record: object) -> object: |
67
|
|
|
"""Check data against USI rules""" |
68
|
|
|
|
69
|
|
|
# this function need its input as a list |
70
|
|
|
return validation.check_usi_structure(record) |
71
|
|
|
|
72
|
|
|
def check_ruleset(self): |
73
|
|
|
"""Check ruleset""" |
74
|
|
|
|
75
|
|
|
return validation.check_ruleset(self.ruleset) |
76
|
|
|
|
77
|
|
|
def check_duplicates(self, record): |
78
|
|
|
"""Check duplicates in data""" |
79
|
|
|
|
80
|
|
|
return validation.check_duplicates(record) |
81
|
|
|
|
82
|
|
|
def check_biosample_id_target( |
83
|
|
|
self, biosample_id, record_id, record_result): |
84
|
|
|
|
85
|
|
|
""" |
86
|
|
|
Check if a target biosample_id exists or not. If it is present, ok. |
87
|
|
|
Otherwise a ValidationResultColumn with a warning |
88
|
|
|
|
89
|
|
|
Args: |
90
|
|
|
biosample_id (str): the desidered biosample id |
91
|
|
|
record_id (str): is the name of the object in the original data |
92
|
|
|
source |
93
|
|
|
record_result (ValidationResult.ValidationResultRecord): |
94
|
|
|
an image_validation result object |
95
|
|
|
|
96
|
|
|
Returns: |
97
|
|
|
ValidationResult.ValidationResultRecord: an updated |
98
|
|
|
image_validation object |
99
|
|
|
""" |
100
|
|
|
|
101
|
|
|
url = f"{BIOSAMPLE_URL}/{biosample_id}" |
102
|
|
|
response = requests.get(url) |
103
|
|
|
status = response.status_code |
104
|
|
|
if status != 200: |
105
|
|
|
record_result.add_validation_result_column( |
106
|
|
|
ValidationResult.ValidationResultColumn( |
107
|
|
|
"Warning", |
108
|
|
|
f"Fail to retrieve record {biosample_id} from " |
109
|
|
|
f"BioSamples as required in the relationship", |
110
|
|
|
record_id, |
111
|
|
|
'sampleRelationships')) |
112
|
|
|
|
113
|
|
|
return record_result |
114
|
|
|
|
115
|
|
|
def check_relationship(self, record, record_result): |
116
|
|
|
""" |
117
|
|
|
Check relationship for an Animal/Sample record and return a list |
118
|
|
|
of dictionaries (to_biosample() objects) of related object |
119
|
|
|
|
120
|
|
|
Args: |
121
|
|
|
record (dict): An Animal/Sample.to_biosample() dictionary object |
122
|
|
|
record_result (ValidationResult.ValidationResultRecord): |
123
|
|
|
an image_validation result object |
124
|
|
|
|
125
|
|
|
Returns: |
126
|
|
|
list: a list of dictionaries of relate objects |
127
|
|
|
ValidationResult.ValidationResultRecord: an updated |
128
|
|
|
image_validation object |
129
|
|
|
""" |
130
|
|
|
|
131
|
|
|
# get relationship from a to_biosample() dictionary object |
132
|
|
|
relationships = record.get('sampleRelationships', []) |
133
|
|
|
|
134
|
|
|
# as described in image_validation.Submission.Submission |
135
|
|
|
# same as record["title"], is the original name of the object id DS |
136
|
|
|
record_id = record['attributes']["Data source ID"][0]['value'] |
137
|
|
|
|
138
|
|
|
# related objects (from UID goes here) |
139
|
|
|
related = [] |
140
|
|
|
|
141
|
|
|
for relationship in relationships: |
142
|
|
|
if 'accession' in relationship: |
143
|
|
|
target = relationship['accession'] |
144
|
|
|
|
145
|
|
|
# check biosample target and update record_result if necessary |
146
|
|
|
record_result = self.check_biosample_id_target( |
147
|
|
|
target, record_id, record_result) |
148
|
|
|
|
149
|
|
|
# HINT: should I check aliases? they came from PK and are related |
150
|
|
|
# in the same submission. I can't have a sample without an animal |
151
|
|
|
# since animal is a foreign key of sample (which doesn't tolerate |
152
|
|
|
# NULL). Even mother and father are related through keys. If |
153
|
|
|
# missing, no information about mother and father could be |
154
|
|
|
# determined |
155
|
|
|
else: |
156
|
|
|
# could be a parent relationship for an animal, or the animal |
157
|
|
|
# where this sample comes from |
158
|
|
|
target = relationship['alias'] |
159
|
|
|
|
160
|
|
|
# test for object existence in db. Use biosample.helpers |
161
|
|
|
# method to derive a model object from database, then get |
162
|
|
|
# its related data |
163
|
|
|
try: |
164
|
|
|
material_obj = get_model_object( |
165
|
|
|
*parse_image_alias(target)) |
166
|
|
|
related.append(material_obj.to_biosample()) |
167
|
|
|
|
168
|
|
|
except ObjectDoesNotExist: |
169
|
|
|
record_result.add_validation_result_column( |
170
|
|
|
ValidationResult.ValidationResultColumn( |
171
|
|
|
"Error", |
172
|
|
|
f"Could not locate the referenced record {target}", |
173
|
|
|
record_id, 'sampleRelationships')) |
174
|
|
|
|
175
|
|
|
return related, record_result |
176
|
|
|
|
177
|
|
|
def validate(self, record): |
178
|
|
|
""" |
179
|
|
|
Check attributes for record by calling image_validation methods |
180
|
|
|
|
181
|
|
|
Args: |
182
|
|
|
record (dict): An Animal/Sample.to_biosample() dictionary object |
183
|
|
|
|
184
|
|
|
Returns: |
185
|
|
|
ValidationResult.ValidationResultRecord: an image_validation |
186
|
|
|
object |
187
|
|
|
""" |
188
|
|
|
|
189
|
|
|
# this validated in general way |
190
|
|
|
result = self.ruleset.validate(record) |
191
|
|
|
|
192
|
|
|
# as defined in image_valdiation.Submission, I will skip further |
193
|
|
|
# validation check |
194
|
|
|
if result.get_overall_status() == "Error": |
195
|
|
|
logger.warning( |
196
|
|
|
"record: %s has errors. Skipping context validation" % ( |
197
|
|
|
record["title"])) |
198
|
|
|
|
199
|
|
|
else: |
200
|
|
|
# context validation evaluate relationships. Get them |
201
|
|
|
related, result = self.check_relationship(record, result) |
202
|
|
|
|
203
|
|
|
# this validate context (attributes that depends on another one) |
204
|
|
|
result = validation.context_validation(record, result, related) |
205
|
|
|
|
206
|
|
|
return result |
207
|
|
|
|
208
|
|
|
|
209
|
|
|
def construct_validation_message(submission): |
210
|
|
|
""" |
211
|
|
|
Function will return dict with all the data required to construct |
212
|
|
|
validation message |
213
|
|
|
|
214
|
|
|
Args: |
215
|
|
|
submission (image_app.models.Submission) : submission to get data from |
216
|
|
|
|
217
|
|
|
Returns: |
218
|
|
|
dict: dictionary with all required data for validation message |
219
|
|
|
""" |
220
|
|
|
try: |
221
|
|
|
validation_summary_animal = ValidationSummary.objects.get( |
222
|
|
|
submission=submission, type='animal') |
223
|
|
|
validation_summary_sample = ValidationSummary.objects.get( |
224
|
|
|
submission=submission, type='sample') |
225
|
|
|
validation_message = dict() |
226
|
|
|
|
227
|
|
|
# Number of animal and samples |
228
|
|
|
validation_message[ |
229
|
|
|
'animals'] = validation_summary_animal.all_count |
230
|
|
|
validation_message[ |
231
|
|
|
'samples'] = validation_summary_sample.all_count |
232
|
|
|
|
233
|
|
|
# Number of unknow validations |
234
|
|
|
validation_message['animal_unkn'] = validation_summary_animal \ |
235
|
|
|
.get_unknown_count() |
236
|
|
|
validation_message['sample_unkn'] = validation_summary_sample \ |
237
|
|
|
.get_unknown_count() |
238
|
|
|
|
239
|
|
|
# Number of problem validations |
240
|
|
|
validation_message['animal_issues'] = validation_summary_animal. \ |
241
|
|
|
issues_count |
242
|
|
|
validation_message['sample_issues'] = validation_summary_sample. \ |
243
|
|
|
issues_count |
244
|
|
|
|
245
|
|
|
return validation_message |
246
|
|
|
|
247
|
|
|
except ObjectDoesNotExist: |
248
|
|
|
return None |
249
|
|
|
|