Passed
Pull Request — master (#30)
by Paolo
01:17
created

validation.helpers.ValidationSummary.parse3()   A

Complexity

Conditions 2

Size

Total Lines 15
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 15
rs 9.9
c 0
b 0
f 0
cc 2
nop 3
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Tue Feb 19 16:15:35 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import json
10
import logging
11
import requests
12
13
from django.db.models import Q
14
from django.core.exceptions import ObjectDoesNotExist
15
16
from image_validation import validation, ValidationResult
17
from image_validation.static_parameters import ruleset_filename as \
18
    IMAGE_RULESET
19
20
from common.constants import BIOSAMPLE_URL
21
from image_app.models import Name
22
from biosample.helpers import parse_image_alias, get_model_object
23
24
# Get an instance of a logger
25
logger = logging.getLogger(__name__)
26
27
28
# a class to deal with temporary issues from EBI servers
29
class OntologyCacheError(Exception):
30
    """Identifies temporary issues with EBI servers and
31
    image_validation.use_ontology.OntologyCache objects"""
32
33
34
# a class to deal with errors in ruleset (that are not user errors but
35
# errors within InjectTool and image_validation library)
36
class RulesetError(Exception):
37
    """Indentifies errors in ruleset"""
38
39
40
class MetaDataValidation():
41
    """A class to deal with IMAGE-ValidationTool ruleset objects"""
42
43
    ruleset = None
44
45
    def __init__(self, ruleset_filename=IMAGE_RULESET):
46
        self.read_in_ruleset(ruleset_filename)
47
48
        # check validation rules
49
        ruleset_errors = self.check_ruleset()
50
51
        if ruleset_errors != []:
52
            raise RulesetError(
53
                "Error with ruleset: %s" % "; ".join(ruleset_errors))
54
55
    def read_in_ruleset(self, ruleset_filename):
56
        try:
57
            self.ruleset = validation.read_in_ruleset(ruleset_filename)
58
59
        except json.JSONDecodeError as message:
60
            logger.error(
61
                "Error with 'https://www.ebi.ac.uk/ols/api/': %s" % (
62
                    str(message)))
63
64
            raise OntologyCacheError(
65
                "Issue with 'https://www.ebi.ac.uk/ols/api/'")
66
67
    def check_usi_structure(self, record: object) -> object:
68
        """Check data against USI rules"""
69
70
        # this function need its input as a list
71
        return validation.check_usi_structure(record)
72
73
    def check_ruleset(self):
74
        """Check ruleset"""
75
76
        return validation.check_ruleset(self.ruleset)
77
78
    def check_duplicates(self, record):
79
        """Check duplicates in data"""
80
81
        return validation.check_duplicates(record)
82
83
    def check_biosample_id_target(
84
            self, biosample_id, record_id, record_result):
85
86
        """
87
        Check if a target biosample_id exists or not. If it is present, ok.
88
        Otherwise a ValidationResultColumn with a warning
89
90
        Args:
91
            biosample_id (str): the desidered biosample id
92
            record_id (str): is the name of the object in the original data
93
                source
94
            record_result (ValidationResult.ValidationResultRecord):
95
                an image_validation result object
96
97
        Returns:
98
            ValidationResult.ValidationResultRecord: an updated
99
            image_validation object
100
        """
101
102
        url = f"{BIOSAMPLE_URL}/{biosample_id}"
103
        response = requests.get(url)
104
        status = response.status_code
105
        if status != 200:
106
            record_result.add_validation_result_column(
107
                ValidationResult.ValidationResultColumn(
108
                    "Warning",
109
                    f"Fail to retrieve record {biosample_id} from "
110
                    f"BioSamples as required in the relationship",
111
                    record_id,
112
                    'sampleRelationships'))
113
114
        return record_result
115
116
    def check_relationship(self, record, record_result):
117
        """
118
        Check relationship for an Animal/Sample record and return a list
119
        of dictionaries (to_biosample() objects) of related object
120
121
        Args:
122
            record (dict): An Animal/Sample.to_biosample() dictionary object
123
            record_result (ValidationResult.ValidationResultRecord):
124
                an image_validation result object
125
126
        Returns:
127
            list: a list of dictionaries of relate objects
128
            ValidationResult.ValidationResultRecord: an updated
129
            image_validation object
130
        """
131
132
        # get relationship from a to_biosample() dictionary object
133
        relationships = record.get('sampleRelationships', [])
134
135
        # as described in image_validation.Submission.Submission
136
        # same as record["title"], is the original name of the object id DS
137
        record_id = record['attributes']["Data source ID"][0]['value']
138
139
        # related objects (from UID goes here)
140
        related = []
141
142
        for relationship in relationships:
143
            if 'accession' in relationship:
144
                target = relationship['accession']
145
146
                # check biosample target and update record_result if necessary
147
                record_result = self.check_biosample_id_target(
148
                    target, record_id, record_result)
149
150
            # HINT: should I check aliases? they came from PK and are related
151
            # in the same submission. I can't have a sample without an animal
152
            # since animal is a foreign key of sample (which doesn't tolerate
153
            # NULL). Even mother and father are related through keys. If
154
            # missing, no information about mother and father could be
155
            # determined
156
            else:
157
                # could be a parent relationship for an animal, or the animal
158
                # where this sample comes from
159
                target = relationship['alias']
160
161
                # test for object existence in db. Use biosample.helpers
162
                # method to derive a model object from database, then get
163
                # its related data
164
                try:
165
                    material_obj = get_model_object(
166
                        *parse_image_alias(target))
167
                    related.append(material_obj.to_biosample())
168
169
                except ObjectDoesNotExist:
170
                    record_result.add_validation_result_column(
171
                        ValidationResult.ValidationResultColumn(
172
                            "Error",
173
                            f"Could not locate the referenced record {target}",
174
                            record_id, 'sampleRelationships'))
175
176
        return related, record_result
177
178
    def validate(self, record):
179
        """
180
        Check attributes for record by calling image_validation methods
181
182
        Args:
183
            record (dict): An Animal/Sample.to_biosample() dictionary object
184
185
        Returns:
186
            ValidationResult.ValidationResultRecord: an image_validation
187
            object
188
        """
189
190
        # this validated in general way
191
        result = self.ruleset.validate(record)
192
193
        # as defined in image_valdiation.Submission, I will skip further
194
        # validation check
195
        if result.get_overall_status() == "Error":
196
            logger.warning(
197
                "record: %s has errors. Skipping context validation" % (
198
                        record["title"]))
199
200
        else:
201
            # context validation evaluate relationships. Get them
202
            related, result = self.check_relationship(record, result)
203
204
            # this validate context (attributes that depends on another one)
205
            result = validation.context_validation(record, result, related)
206
207
        return result
208
209
210
class ValidationSummary:
211
    """A class to deal with error messages and submission"""
212
213
    def __init__(self, submission_obj):
214
        """Istantiate a report object from Submission"""
215
216
        # get all names belonging to this submission
217
        self.names = Name.objects.select_related(
218
                "validationresult",
219
                "animal",
220
                "sample").filter(
221
                    submission=submission_obj)
222
223
        # here I will have 5 queries, each one executed when calling count
224
        # or when iterating queryset
225
226
        # count animal and samples
227
        self.n_animals = self.names.filter(animal__isnull=False).count()
228
        self.n_samples = self.names.filter(sample__isnull=False).count()
229
230
        logger.debug("Got %s animal and %s samples in total" % (
231
            self.n_animals, self.n_samples))
232
233
        # count animal and samples with unknown validation
234
        self.n_animal_unknown = self.names.filter(
235
            animal__isnull=False, validationresult__isnull=True).count()
236
        self.n_sample_unknown = self.names.filter(
237
            sample__isnull=False, validationresult__isnull=True).count()
238
239
        logger.debug("Got %s animal and %s samples with unknown validation" % (
240
            self.n_animal_unknown, self.n_sample_unknown))
241
242
        # filter names which have errors
243
        self.errors = self.names.exclude(
244
            Q(validationresult__status="Pass") |
245
            Q(validationresult__isnull=True)
246
        )
247
248
        # count animal and samples with issues
249
        self.n_animal_issues = self.errors.filter(animal__isnull=False).count()
250
        self.n_sample_issues = self.errors.filter(sample__isnull=False).count()
251
252
        logger.debug("Got %s animal and %s samples with issues" % (
253
            self.n_animal_issues, self.n_sample_issues))
254