validation.helpers.ValidationSummary.__update_report() - Code Metrics - Inspection of "Validation summary" - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#30)

by Paolo

created 2019-06-12 16:31 UTC

validation.helpers.ValidationSummary.__update_report() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines	7
Code Lines	5

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	5
dl	0
loc	7
rs	10
c	0
b	0
f	0
cc	2
nop	3

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 19 16:15:35 2019

@author: Paolo Cozzi <[email protected]>
"""

import json
import logging
import requests

from django.db.models import Q
from django.core.exceptions import ObjectDoesNotExist

from image_validation import validation, ValidationResult
from image_validation.static_parameters import ruleset_filename as \
    IMAGE_RULESET

from common.constants import BIOSAMPLE_URL
from image_app.models import Name
from biosample.helpers import parse_image_alias, get_model_object

# Get an instance of a logger
logger = logging.getLogger(__name__)


# a class to deal with temporary issues from EBI servers
class OntologyCacheError(Exception):
    """Identifies temporary issues with EBI servers and
    image_validation.use_ontology.OntologyCache objects"""


# a class to deal with errors in ruleset (that are not user errors but
# errors within InjectTool and image_validation library)
class RulesetError(Exception):
    """Indentifies errors in ruleset"""


class MetaDataValidation():
    """A class to deal with IMAGE-ValidationTool ruleset objects"""

    ruleset = None

    def __init__(self, ruleset_filename=IMAGE_RULESET):
        self.read_in_ruleset(ruleset_filename)

        # check validation rules
        ruleset_errors = self.check_ruleset()

        if ruleset_errors != []:
            raise RulesetError(
                "Error with ruleset: %s" % "; ".join(ruleset_errors))

    def read_in_ruleset(self, ruleset_filename):
        try:
            self.ruleset = validation.read_in_ruleset(ruleset_filename)

        except json.JSONDecodeError as message:
            logger.error(
                "Error with 'https://www.ebi.ac.uk/ols/api/': %s" % (
                    str(message)))

            raise OntologyCacheError(
                "Issue with 'https://www.ebi.ac.uk/ols/api/'")

    def check_usi_structure(self, record: object) -> object:
        """Check data against USI rules"""

        # this function need its input as a list
        return validation.check_usi_structure(record)

    def check_ruleset(self):
        """Check ruleset"""

        return validation.check_ruleset(self.ruleset)

    def check_duplicates(self, record):
        """Check duplicates in data"""

        return validation.check_duplicates(record)

    def check_biosample_id_target(
            self, biosample_id, record_id, record_result):

        """
        Check if a target biosample_id exists or not. If it is present, ok.
        Otherwise a ValidationResultColumn with a warning

        Args:
            biosample_id (str): the desidered biosample id
            record_id (str): is the name of the object in the original data
                source
            record_result (ValidationResult.ValidationResultRecord):
                an image_validation result object

        Returns:
            ValidationResult.ValidationResultRecord: an updated
            image_validation object
        """

        url = f"{BIOSAMPLE_URL}/{biosample_id}"
        response = requests.get(url)
        status = response.status_code
        if status != 200:
            record_result.add_validation_result_column(
                ValidationResult.ValidationResultColumn(
                    "Warning",
                    f"Fail to retrieve record {biosample_id} from "
                    f"BioSamples as required in the relationship",
                    record_id,
                    'sampleRelationships'))

        return record_result

    def check_relationship(self, record, record_result):
        """
        Check relationship for an Animal/Sample record and return a list
        of dictionaries (to_biosample() objects) of related object

        Args:
            record (dict): An Animal/Sample.to_biosample() dictionary object
            record_result (ValidationResult.ValidationResultRecord):
                an image_validation result object

        Returns:
            list: a list of dictionaries of relate objects
            ValidationResult.ValidationResultRecord: an updated
            image_validation object
        """

        # get relationship from a to_biosample() dictionary object
        relationships = record.get('sampleRelationships', [])

        # as described in image_validation.Submission.Submission
        # same as record["title"], is the original name of the object id DS
        record_id = record['attributes']["Data source ID"][0]['value']

        # related objects (from UID goes here)
        related = []

        for relationship in relationships:
            if 'accession' in relationship:
                target = relationship['accession']

                # check biosample target and update record_result if necessary
                record_result = self.check_biosample_id_target(
                    target, record_id, record_result)

            # HINT: should I check aliases? they came from PK and are related
            # in the same submission. I can't have a sample without an animal
            # since animal is a foreign key of sample (which doesn't tolerate
            # NULL). Even mother and father are related through keys. If
            # missing, no information about mother and father could be
            # determined
            else:
                # could be a parent relationship for an animal, or the animal
                # where this sample comes from
                target = relationship['alias']

                # test for object existence in db. Use biosample.helpers
                # method to derive a model object from database, then get
                # its related data
                try:
                    material_obj = get_model_object(
                        *parse_image_alias(target))
                    related.append(material_obj.to_biosample())

                except ObjectDoesNotExist:
                    record_result.add_validation_result_column(
                        ValidationResult.ValidationResultColumn(
                            "Error",
                            f"Could not locate the referenced record {target}",
                            record_id, 'sampleRelationships'))

        return related, record_result

    def validate(self, record):
        """
        Check attributes for record by calling image_validation methods

        Args:
            record (dict): An Animal/Sample.to_biosample() dictionary object

        Returns:
            ValidationResult.ValidationResultRecord: an image_validation
            object
        """

        # this validated in general way
        result = self.ruleset.validate(record)

        # as defined in image_valdiation.Submission, I will skip further
        # validation check
        if result.get_overall_status() == "Error":
            logger.warning(
                "record: %s has errors. Skipping context validation" % (
                        record["title"]))

        else:
            # context validation evaluate relationships. Get them
            related, result = self.check_relationship(record, result)

            # this validate context (attributes that depends on another one)
            result = validation.context_validation(record, result, related)

        return result


class ValidationSummary:
    """A class to deal with error messages and submission"""

    def __init__(self, submission_obj):
        """Istantiate a report object from Submission"""

        # get all names belonging to this submission
        self.names = Name.objects.select_related(
                "validationresult",
                "animal",
                "sample").filter(
                    submission=submission_obj)

        # here I will have 5 queries, each one executed when calling count
        # or when iterating queryset

        # count animal and samples
        self.n_animals = self.names.filter(animal__isnull=False).count()
        self.n_samples = self.names.filter(sample__isnull=False).count()

        logger.debug("Got %s animal and %s samples in total" % (
            self.n_animals, self.n_samples))

        # count animal and samples with unknown validation
        self.n_animal_unknown = self.names.filter(
            animal__isnull=False, validationresult__isnull=True).count()
        self.n_sample_unknown = self.names.filter(
            sample__isnull=False, validationresult__isnull=True).count()

        logger.debug("Got %s animal and %s samples with unknown validation" % (
            self.n_animal_unknown, self.n_sample_unknown))

        # filter names which have errors
        self.errors = self.names.exclude(
            Q(validationresult__status="Pass") |
            Q(validationresult__isnull=True)
        )

        # count animal and samples with issues
        self.n_animal_issues = self.errors.filter(animal__isnull=False).count()
        self.n_sample_issues = self.errors.filter(sample__isnull=False).count()

        logger.debug("Got %s animal and %s samples with issues" % (
            self.n_animal_issues, self.n_sample_issues))


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Tue Feb 19 16:15:35 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import json
10			import logging
11			import requests
12
13			from django.db.models import Q
14			from django.core.exceptions import ObjectDoesNotExist
15
16			from image_validation import validation, ValidationResult
17			from image_validation.static_parameters import ruleset_filename as \
18			IMAGE_RULESET
19
20			from common.constants import BIOSAMPLE_URL
21			from image_app.models import Name
22			from biosample.helpers import parse_image_alias, get_model_object
23
24			# Get an instance of a logger
25			logger = logging.getLogger(__name__)
26
27
28			# a class to deal with temporary issues from EBI servers
29			class OntologyCacheError(Exception):
30			"""Identifies temporary issues with EBI servers and
31			image_validation.use_ontology.OntologyCache objects"""
32
33
34			# a class to deal with errors in ruleset (that are not user errors but
35			# errors within InjectTool and image_validation library)
36			class RulesetError(Exception):
37			"""Indentifies errors in ruleset"""
38
39
40			class MetaDataValidation():
41			"""A class to deal with IMAGE-ValidationTool ruleset objects"""
42
43			ruleset = None
44
45			def __init__(self, ruleset_filename=IMAGE_RULESET):
46			self.read_in_ruleset(ruleset_filename)
47
48			# check validation rules
49			ruleset_errors = self.check_ruleset()
50
51			if ruleset_errors != []:
52			raise RulesetError(
53			"Error with ruleset: %s" % "; ".join(ruleset_errors))
54
55			def read_in_ruleset(self, ruleset_filename):
56			try:
57			self.ruleset = validation.read_in_ruleset(ruleset_filename)
58
59			except json.JSONDecodeError as message:
60			logger.error(
61			"Error with 'https://www.ebi.ac.uk/ols/api/': %s" % (
62			str(message)))
63
64			raise OntologyCacheError(
65			"Issue with 'https://www.ebi.ac.uk/ols/api/'")
66
67			def check_usi_structure(self, record: object) -> object:
68			"""Check data against USI rules"""
69
70			# this function need its input as a list
71			return validation.check_usi_structure(record)
72
73			def check_ruleset(self):
74			"""Check ruleset"""
75
76			return validation.check_ruleset(self.ruleset)
77
78			def check_duplicates(self, record):
79			"""Check duplicates in data"""
80
81			return validation.check_duplicates(record)
82
83			def check_biosample_id_target(
84			self, biosample_id, record_id, record_result):
85
86			"""
87			Check if a target biosample_id exists or not. If it is present, ok.
88			Otherwise a ValidationResultColumn with a warning
89
90			Args:
91			biosample_id (str): the desidered biosample id
92			record_id (str): is the name of the object in the original data
93			source
94			record_result (ValidationResult.ValidationResultRecord):
95			an image_validation result object
96
97			Returns:
98			ValidationResult.ValidationResultRecord: an updated
99			image_validation object
100			"""
101
102			url = f"{BIOSAMPLE_URL}/{biosample_id}"
103			response = requests.get(url)
104			status = response.status_code
105			if status != 200:
106			record_result.add_validation_result_column(
107			ValidationResult.ValidationResultColumn(
108			"Warning",
109			f"Fail to retrieve record {biosample_id} from "
110			f"BioSamples as required in the relationship",
111			record_id,
112			'sampleRelationships'))
113
114			return record_result
115
116			def check_relationship(self, record, record_result):
117			"""
118			Check relationship for an Animal/Sample record and return a list
119			of dictionaries (to_biosample() objects) of related object
120
121			Args:
122			record (dict): An Animal/Sample.to_biosample() dictionary object
123			record_result (ValidationResult.ValidationResultRecord):
124			an image_validation result object
125
126			Returns:
127			list: a list of dictionaries of relate objects
128			ValidationResult.ValidationResultRecord: an updated
129			image_validation object
130			"""
131
132			# get relationship from a to_biosample() dictionary object
133			relationships = record.get('sampleRelationships', [])
134
135			# as described in image_validation.Submission.Submission
136			# same as record["title"], is the original name of the object id DS
137			record_id = record['attributes']["Data source ID"][0]['value']
138
139			# related objects (from UID goes here)
140			related = []
141
142			for relationship in relationships:
143			if 'accession' in relationship:
144			target = relationship['accession']
145
146			# check biosample target and update record_result if necessary
147			record_result = self.check_biosample_id_target(
148			target, record_id, record_result)
149
150			# HINT: should I check aliases? they came from PK and are related
151			# in the same submission. I can't have a sample without an animal
152			# since animal is a foreign key of sample (which doesn't tolerate
153			# NULL). Even mother and father are related through keys. If
154			# missing, no information about mother and father could be
155			# determined
156			else:
157			# could be a parent relationship for an animal, or the animal
158			# where this sample comes from
159			target = relationship['alias']
160
161			# test for object existence in db. Use biosample.helpers
162			# method to derive a model object from database, then get
163			# its related data
164			try:
165			material_obj = get_model_object(
166			*parse_image_alias(target))
167			related.append(material_obj.to_biosample())
168
169			except ObjectDoesNotExist:
170			record_result.add_validation_result_column(
171			ValidationResult.ValidationResultColumn(
172			"Error",
173			f"Could not locate the referenced record {target}",
174			record_id, 'sampleRelationships'))
175
176			return related, record_result
177
178			def validate(self, record):
179			"""
180			Check attributes for record by calling image_validation methods
181
182			Args:
183			record (dict): An Animal/Sample.to_biosample() dictionary object
184
185			Returns:
186			ValidationResult.ValidationResultRecord: an image_validation
187			object
188			"""
189
190			# this validated in general way
191			result = self.ruleset.validate(record)
192
193			# as defined in image_valdiation.Submission, I will skip further
194			# validation check
195			if result.get_overall_status() == "Error":
196			logger.warning(
197			"record: %s has errors. Skipping context validation" % (
198			record["title"]))
199
200			else:
201			# context validation evaluate relationships. Get them
202			related, result = self.check_relationship(record, result)
203
204			# this validate context (attributes that depends on another one)
205			result = validation.context_validation(record, result, related)
206
207			return result
208
209
210			class ValidationSummary:
211			"""A class to deal with error messages and submission"""
212
213			def __init__(self, submission_obj):
214			"""Istantiate a report object from Submission"""
215
216			# get all names belonging to this submission
217			self.names = Name.objects.select_related(
218			"validationresult",
219			"animal",
220			"sample").filter(
221			submission=submission_obj)
222
223			# here I will have 5 queries, each one executed when calling count
224			# or when iterating queryset
225
226			# count animal and samples
227			self.n_animals = self.names.filter(animal__isnull=False).count()
228			self.n_samples = self.names.filter(sample__isnull=False).count()
229
230			logger.debug("Got %s animal and %s samples in total" % (
231			self.n_animals, self.n_samples))
232
233			# count animal and samples with unknown validation
234			self.n_animal_unknown = self.names.filter(
235			animal__isnull=False, validationresult__isnull=True).count()
236			self.n_sample_unknown = self.names.filter(
237			sample__isnull=False, validationresult__isnull=True).count()
238
239			logger.debug("Got %s animal and %s samples with unknown validation" % (
240			self.n_animal_unknown, self.n_sample_unknown))
241
242			# filter names which have errors
243			self.errors = self.names.exclude(
244			Q(validationresult__status="Pass") \|
245			Q(validationresult__isnull=True)
246			)
247
248			# count animal and samples with issues
249			self.n_animal_issues = self.errors.filter(animal__isnull=False).count()
250			self.n_sample_issues = self.errors.filter(sample__isnull=False).count()
251
252			logger.debug("Got %s animal and %s samples with issues" % (
253			self.n_animal_issues, self.n_sample_issues))
254

cnr-ibba / IMAGE-InjectTool

Pull Request — master (#30)

validation.helpers.ValidationSummary.__update_report() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like