Passed
Pull Request — master (#35)
by Paolo
02:58
created

validation.helpers.create_validation_summary_object()   A

Complexity

Conditions 1

Size

Total Lines 12
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 12
rs 10
c 0
b 0
f 0
cc 1
nop 3
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Tue Feb 19 16:15:35 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import json
10
import logging
11
import requests
12
13
from django.core.exceptions import ObjectDoesNotExist
14
15
from image_validation import validation, ValidationResult
16
from image_validation.static_parameters import ruleset_filename as \
17
    IMAGE_RULESET
18
19
from common.constants import BIOSAMPLE_URL
20
from biosample.helpers import parse_image_alias, get_model_object
21
from validation.models import ValidationSummary
22
23
# Get an instance of a logger
24
logger = logging.getLogger(__name__)
25
26
27
# a class to deal with temporary issues from EBI servers
28
class OntologyCacheError(Exception):
29
    """Identifies temporary issues with EBI servers and
30
    image_validation.use_ontology.OntologyCache objects"""
31
32
33
# a class to deal with errors in ruleset (that are not user errors but
34
# errors within InjectTool and image_validation library)
35
class RulesetError(Exception):
36
    """Indentifies errors in ruleset"""
37
38
39
class MetaDataValidation():
40
    """A class to deal with IMAGE-ValidationTool ruleset objects"""
41
42
    ruleset = None
43
44
    def __init__(self, ruleset_filename=IMAGE_RULESET):
45
        self.read_in_ruleset(ruleset_filename)
46
47
        # check validation rules
48
        ruleset_errors = self.check_ruleset()
49
50
        if ruleset_errors != []:
51
            raise RulesetError(
52
                "Error with ruleset: %s" % "; ".join(ruleset_errors))
53
54
    def read_in_ruleset(self, ruleset_filename):
55
        try:
56
            self.ruleset = validation.read_in_ruleset(ruleset_filename)
57
58
        except json.JSONDecodeError as message:
59
            logger.error(
60
                "Error with 'https://www.ebi.ac.uk/ols/api/': %s" % (
61
                    str(message)))
62
63
            raise OntologyCacheError(
64
                "Issue with 'https://www.ebi.ac.uk/ols/api/'")
65
66
    def check_usi_structure(self, record: object) -> object:
67
        """Check data against USI rules"""
68
69
        # this function need its input as a list
70
        return validation.check_usi_structure(record)
71
72
    def check_ruleset(self):
73
        """Check ruleset"""
74
75
        return validation.check_ruleset(self.ruleset)
76
77
    def check_duplicates(self, record):
78
        """Check duplicates in data"""
79
80
        return validation.check_duplicates(record)
81
82
    def check_biosample_id_target(
83
            self, biosample_id, record_id, record_result):
84
85
        """
86
        Check if a target biosample_id exists or not. If it is present, ok.
87
        Otherwise a ValidationResultColumn with a warning
88
89
        Args:
90
            biosample_id (str): the desidered biosample id
91
            record_id (str): is the name of the object in the original data
92
                source
93
            record_result (ValidationResult.ValidationResultRecord):
94
                an image_validation result object
95
96
        Returns:
97
            ValidationResult.ValidationResultRecord: an updated
98
            image_validation object
99
        """
100
101
        url = f"{BIOSAMPLE_URL}/{biosample_id}"
102
        response = requests.get(url)
103
        status = response.status_code
104
        if status != 200:
105
            record_result.add_validation_result_column(
106
                ValidationResult.ValidationResultColumn(
107
                    "Warning",
108
                    f"Fail to retrieve record {biosample_id} from "
109
                    f"BioSamples as required in the relationship",
110
                    record_id,
111
                    'sampleRelationships'))
112
113
        return record_result
114
115
    def check_relationship(self, record, record_result):
116
        """
117
        Check relationship for an Animal/Sample record and return a list
118
        of dictionaries (to_biosample() objects) of related object
119
120
        Args:
121
            record (dict): An Animal/Sample.to_biosample() dictionary object
122
            record_result (ValidationResult.ValidationResultRecord):
123
                an image_validation result object
124
125
        Returns:
126
            list: a list of dictionaries of relate objects
127
            ValidationResult.ValidationResultRecord: an updated
128
            image_validation object
129
        """
130
131
        # get relationship from a to_biosample() dictionary object
132
        relationships = record.get('sampleRelationships', [])
133
134
        # as described in image_validation.Submission.Submission
135
        # same as record["title"], is the original name of the object id DS
136
        record_id = record['attributes']["Data source ID"][0]['value']
137
138
        # related objects (from UID goes here)
139
        related = []
140
141
        for relationship in relationships:
142
            if 'accession' in relationship:
143
                target = relationship['accession']
144
145
                # check biosample target and update record_result if necessary
146
                record_result = self.check_biosample_id_target(
147
                    target, record_id, record_result)
148
149
            # HINT: should I check aliases? they came from PK and are related
150
            # in the same submission. I can't have a sample without an animal
151
            # since animal is a foreign key of sample (which doesn't tolerate
152
            # NULL). Even mother and father are related through keys. If
153
            # missing, no information about mother and father could be
154
            # determined
155
            else:
156
                # could be a parent relationship for an animal, or the animal
157
                # where this sample comes from
158
                target = relationship['alias']
159
160
                # test for object existence in db. Use biosample.helpers
161
                # method to derive a model object from database, then get
162
                # its related data
163
                try:
164
                    material_obj = get_model_object(
165
                        *parse_image_alias(target))
166
                    related.append(material_obj.to_biosample())
167
168
                except ObjectDoesNotExist:
169
                    record_result.add_validation_result_column(
170
                        ValidationResult.ValidationResultColumn(
171
                            "Error",
172
                            f"Could not locate the referenced record {target}",
173
                            record_id, 'sampleRelationships'))
174
175
        return related, record_result
176
177
    def validate(self, record):
178
        """
179
        Check attributes for record by calling image_validation methods
180
181
        Args:
182
            record (dict): An Animal/Sample.to_biosample() dictionary object
183
184
        Returns:
185
            ValidationResult.ValidationResultRecord: an image_validation
186
            object
187
        """
188
189
        # this validated in general way
190
        result = self.ruleset.validate(record)
191
192
        # as defined in image_valdiation.Submission, I will skip further
193
        # validation check
194
        if result.get_overall_status() == "Error":
195
            logger.warning(
196
                "record: %s has errors. Skipping context validation" % (
197
                        record["title"]))
198
199
        else:
200
            # context validation evaluate relationships. Get them
201
            related, result = self.check_relationship(record, result)
202
203
            # this validate context (attributes that depends on another one)
204
            result = validation.context_validation(record, result, related)
205
206
        return result
207
208
209
def construct_validation_message(submission):
210
    """
211
    Function will return dict with all the data required to construct
212
    validation message
213
214
    Args:
215
        submission (image_app.models.Submission) : submission to get data from
216
217
    Returns:
218
        dict: dictionary with all required data for validation message
219
    """
220
    try:
221
        validation_summary_animal = ValidationSummary.objects.get(
222
            submission=submission, type='animal')
223
        validation_summary_sample = ValidationSummary.objects.get(
224
            submission=submission, type='sample')
225
        validation_message = dict()
226
227
        # Number of animal and samples
228
        validation_message[
229
            'animals'] = validation_summary_animal.all_count
230
        validation_message[
231
            'samples'] = validation_summary_sample.all_count
232
233
        # Number of unknow validations
234
        validation_message['animal_unkn'] = validation_summary_animal \
235
            .get_unknown_count()
236
        validation_message['sample_unkn'] = validation_summary_sample \
237
            .get_unknown_count()
238
239
        # Number of problem validations
240
        validation_message['animal_issues'] = validation_summary_animal. \
241
            issues_count
242
        validation_message['sample_issues'] = validation_summary_sample. \
243
            issues_count
244
245
        return validation_message
246
247
    except ObjectDoesNotExist:
248
        return None
249