excel.helpers.fill_uid.upload_template() - Code Metrics - Inspection of ":sparkles: import from Template file" - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#40)

by Paolo

created 2019-07-08 13:21 UTC

excel.helpers.fill_uid.upload_template() B

↳ Parent: excel.helpers.fill_uid

Complexity

Conditions

Size

Total Lines	87
Code Lines	47

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	47
dl	0
loc	87
rs	7.8012
c	0
b	0
f	0
cc	6
nop	1

How to fix Long Method

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul  5 16:37:48 2019

@author: Paolo Cozzi <[email protected]>
"""

import logging

from common.constants import (
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
from common.helpers import image_timedelta
from image_app.models import (
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
    Sample)
from submissions.helpers import send_message
from validation.helpers import construct_validation_message
from validation.models import ValidationSummary

from .exceptions import ExcelImportError
from .exceltemplate import ExcelTemplate

# Get an instance of a logger
logger = logging.getLogger(__name__)


def fill_uid_breeds(submission_obj, template):
    """Fill DictBreed from a excel record"""

    logger.info("fill_uid_breeds() started")

    # ok get languages from submission (useful for translation)
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_breed_records():
        # TODO: move this in a helper module (image_app.helpers?)
        # get a DictSpecie object. Species are in latin names, but I can
        # find also a common name in translation tables
        try:
            specie = DictSpecie.objects.get(label=record.species)

        except DictSpecie.DoesNotExist:
            logger.info("Search %s in synonyms" % (record.species))
            # search for language synonym (if I arrived here a synonym should
            # exists)
            specie = DictSpecie.get_by_synonym(
                synonym=record.species,
                language=language)

        # get country for breeds. Ideally will be the same of submission,
        # however, it could be possible to store data from other contries
        country, created = DictCountry.objects.get_or_create(
            label=record.efabis_breed_country)

        # I could create a country from a v_breed_specie instance. That's
        # ok, maybe I could have a lot of breed from different countries and
        # a few organizations submitting them
        if created:
            logger.info("Created %s" % country)

        else:
            logger.debug("Found %s" % country)

        breed, created = DictBreed.objects.get_or_create(
            supplied_breed=record.supplied_breed,
            specie=specie,
            country=country)

        if created:
            logger.info("Created %s" % breed)

        else:
            logger.debug("Found %s" % breed)

    logger.info("fill_uid_breeds() completed")


def fill_uid_names(submission_obj, template):
    """fill Names table from crbanim record"""

    # debug
    logger.info("called fill_uid_names()")

    # iterate among excel template
    for record in template.get_animal_records():
        # in the same record I have the sample identifier and animal identifier
        # a name record for animal
        animal_name, created = Name.objects.get_or_create(
            name=record.animal_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

        if created:
            logger.debug("Created animal name %s" % animal_name)

        else:
            logger.debug("Found animal name %s" % animal_name)

    # iterate among excel template
    for record in template.get_sample_records():
        # name record for sample
        sample_name, created = Name.objects.get_or_create(
            name=record.sample_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

        if created:
            logger.debug("Created sample name %s" % sample_name)

        else:
            logger.debug("Found sample name %s" % sample_name)

    logger.info("fill_uid_names() completed")


def fill_uid_animals(submission_obj, template):
    # debug
    logger.info("called fill_uid_animals()")

    # get submission language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_animal_records():
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # get specie
        specie = DictSpecie.objects.get(label=record.species)

        # how I can get breed from my data?
        breeds = [breed for breed in template.get_breed_records()
                  if breed.supplied_breed == record.breed and
                  breed.species == record.species]

        # breed is supposed to be unique, from UID constraints. However
        # I could place the same breed name for two countries. In that case,
        # I cant derive a unique breed from users data
        if len(breeds) != 1:
            raise ExcelImportError(
                "Can't determine a unique breed for '%s:%s' from user data" %
                (record.breed, record.species))

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breeds[0].efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=record.breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        # define names
        name, mother, father = None, None, None

        # get name for this animal and for mother and father
        logger.debug("Getting %s as my name" % (
            record.animal_id_in_data_source))

        name = Name.objects.get(
            name=record.animal_id_in_data_source,
            submission=submission_obj)

        if record.father_id_in_data_source:
            logger.debug("Getting %s as father" % (
                record.father_id_in_data_source))

            father = Name.objects.get(
                name=record.father_id_in_data_source,
                submission=submission_obj)

        if record.mother_id_in_data_source:
            logger.debug("Getting %s as mother" % (
                record.mother_id_in_data_source))

            mother = Name.objects.get(
                name=record.mother_id_in_data_source,
                submission=submission_obj)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.birth_location_accuracy)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_animal_id,
            'description': record.animal_description,
            'breed': breed,
            'sex': sex,
            'father': father,
            'mother': mother,
            'birth_date': record.birth_date,
            'birth_location': record.birth_location,
            'birth_location_latitude': record.birth_location_latitude,
            'birth_location_longitude': record.birth_location_longitude,
            'birth_location_accuracy': accuracy,
            'owner': submission_obj.owner
        }

        animal, created = Animal.objects.update_or_create(
            name=name,
            defaults=defaults)

        if created:
            logger.debug("Created %s" % animal)

        else:
            logger.debug("Updating %s" % animal)

    # create a validation summary object and set all_count
    validation_summary, created = ValidationSummary.objects.get_or_create(
        submission=submission_obj, type="animal")

    if created:
        logger.debug(
            "ValidationSummary animal created for submission %s" %
            submission_obj)

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_animals() completed")


def fill_uid_samples(submission_obj, template):
    # debug
    logger.info("called fill_uid_samples()")

    # iterate among excel template
    for record in template.get_sample_records():
        # get name for this sample
        name = Name.objects.get(
            name=record.sample_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

        # get animal by reading record
        animal = Animal.objects.get(
            name__name=record.animal_id_in_data_source,
            name__submission=submission_obj)

        # get a organism part. Organism parts need to be in lowercases
        organism_part, created = DictUberon.objects.get_or_create(
            label=record.organism_part
        )

        if created:
            logger.info("Created %s" % organism_part)

        else:
            logger.debug("Found %s" % organism_part)

        # TODO: get developmental_stage and physiological_stage terms

        # animal age could be present or not
        if record.animal_age_at_collection:
            # TODO: do something
            pass

        else:
            # derive animal age at collection
            animal_age_at_collection, time_units = image_timedelta(
                record.collection_date, animal.birth_date)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.collection_place_accuracy)

        # now get storage and storage processing
        # TODO; check those values in excel columns
        storage = SAMPLE_STORAGE.get_value_by_desc(
            record.sample_storage)

        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
            record.sample_storage_processing)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_sample_id,
            'description': record.sample_description,
            'animal': animal,
            'protocol': record.specimen_collection_protocol,
            'collection_date': record.collection_date,
            'collection_place_latitude': record.collection_place_latitude,
            'collection_place_longitude': record.collection_place_longitude,
            'collection_place': record.collection_place,
            'collection_place_accuracy': accuracy,
            'organism_part': organism_part,
            # 'developmental_stage': None,
            # 'physiological_stage': None,
            'animal_age_at_collection': animal_age_at_collection,

            'animal_age_at_collection_units': time_units,

            'availability': record.availability,
            'storage': storage,
            'storage_processing': storage_processing,
            # TODO: this is a time unit column
            'preparation_interval': record.sampling_to_preparation_interval,
            'owner': submission_obj.owner,
        }

        sample, created = Sample.objects.update_or_create(
            name=name,
            defaults=defaults)

        if created:
            logger.debug("Created %s" % sample)

        else:
            logger.debug("Updating %s" % sample)

    # create a validation summary object and set all_count
    validation_summary, created = ValidationSummary.objects.get_or_create(
        submission=submission_obj, type="sample")

    if created:
        logger.debug(
            "ValidationSummary animal created for submission %s" %
            submission_obj)

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_samples() completed")


def upload_template(submission_obj):
    # debug
    logger.info("Importing from Excel template file")

    # this is the full path in docker container
    fullpath = submission_obj.get_uploaded_file_path()

    # read submission data
    reader = ExcelTemplate()
    reader.read_file(fullpath)

    # start data loading
    try:
        # check for species and sex in a similar way as cryoweb does
        # TODO: identical to CRBanim. Move to a mixin
        check, not_found = reader.check_sex()

        if not check:
            message = (
                "Not all Sex terms are loaded into database: "
                "check for %s in your dataset" % (not_found))

            raise ExcelImportError(message)

        check, not_found = reader.check_species(
            submission_obj.gene_bank_country)

        if not check:
            raise ExcelImportError(
                "Some species are not loaded in UID database: "
                "%s" % (not_found))

        check, not_found = reader.check_accuracies()

        if not check:
            message = (
                "Not all accuracy levels are defined in database: "
                "check for %s in your dataset" % (not_found))

            raise ExcelImportError(message)

        # BREEDS
        fill_uid_breeds(submission_obj, reader)

        # NAME
        fill_uid_names(submission_obj, reader)

        # ANIMALS
        fill_uid_animals(submission_obj, reader)

        # SAMPLES
        fill_uid_samples(submission_obj, reader)

    except Exception as exc:
        # set message:
        message = "Error in importing data: %s" % (str(exc))

        # save a message in database
        submission_obj.status = ERROR
        submission_obj.message = message
        submission_obj.save()

        # send async message
        send_message(submission_obj)

        # debug
        logger.error("Error in importing from Template: %s" % (exc))
        logger.exception(exc)

        return False

    else:
        message = "Template import completed for submission: %s" % (
            submission_obj.id)

        submission_obj.message = message
        submission_obj.status = LOADED
        submission_obj.save()

        # send async message
        send_message(
            submission_obj,
            validation_message=construct_validation_message(submission_obj))

    logger.info("Import from Template is complete")

    return True


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Fri Jul 5 16:37:48 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import logging
10
11			from common.constants import (
12			ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13			from common.helpers import image_timedelta
14			from image_app.models import (
15			DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
16			Sample)
17			from submissions.helpers import send_message
18			from validation.helpers import construct_validation_message
19			from validation.models import ValidationSummary
20
21			from .exceptions import ExcelImportError
22			from .exceltemplate import ExcelTemplate
23
24			# Get an instance of a logger
25			logger = logging.getLogger(__name__)
26
27
28			def fill_uid_breeds(submission_obj, template):
29			"""Fill DictBreed from a excel record"""
30
31			logger.info("fill_uid_breeds() started")
32
33			# ok get languages from submission (useful for translation)
34			language = submission_obj.gene_bank_country.label
35
36			# iterate among excel template
37			for record in template.get_breed_records():
38			# TODO: move this in a helper module (image_app.helpers?)
39			# get a DictSpecie object. Species are in latin names, but I can
40			# find also a common name in translation tables
41			try:
42			specie = DictSpecie.objects.get(label=record.species)
43
44			except DictSpecie.DoesNotExist:
45			logger.info("Search %s in synonyms" % (record.species))
46			# search for language synonym (if I arrived here a synonym should
47			# exists)
48			specie = DictSpecie.get_by_synonym(
49			synonym=record.species,
50			language=language)
51
52			# get country for breeds. Ideally will be the same of submission,
53			# however, it could be possible to store data from other contries
54			country, created = DictCountry.objects.get_or_create(
55			label=record.efabis_breed_country)
56
57			# I could create a country from a v_breed_specie instance. That's
58			# ok, maybe I could have a lot of breed from different countries and
59			# a few organizations submitting them
60			if created:
61			logger.info("Created %s" % country)
62
63			else:
64			logger.debug("Found %s" % country)
65
66			breed, created = DictBreed.objects.get_or_create(
67			supplied_breed=record.supplied_breed,
68			specie=specie,
69			country=country)
70
71			if created:
72			logger.info("Created %s" % breed)
73
74			else:
75			logger.debug("Found %s" % breed)
76
77			logger.info("fill_uid_breeds() completed")
78
79
80			def fill_uid_names(submission_obj, template):
81			"""fill Names table from crbanim record"""
82
83			# debug
84			logger.info("called fill_uid_names()")
85
86			# iterate among excel template
87			for record in template.get_animal_records():
88			# in the same record I have the sample identifier and animal identifier
89			# a name record for animal
90			animal_name, created = Name.objects.get_or_create(
91			name=record.animal_id_in_data_source,
92			submission=submission_obj,
93			owner=submission_obj.owner)
94
95			if created:
96			logger.debug("Created animal name %s" % animal_name)
97
98			else:
99			logger.debug("Found animal name %s" % animal_name)
100
101			# iterate among excel template
102			for record in template.get_sample_records():
103			# name record for sample
104			sample_name, created = Name.objects.get_or_create(
105			name=record.sample_id_in_data_source,
106			submission=submission_obj,
107			owner=submission_obj.owner)
108
109			if created:
110			logger.debug("Created sample name %s" % sample_name)
111
112			else:
113			logger.debug("Found sample name %s" % sample_name)
114
115			logger.info("fill_uid_names() completed")
116
117
118			def fill_uid_animals(submission_obj, template):
119			# debug
120			logger.info("called fill_uid_animals()")
121
122			# get submission language
123			language = submission_obj.gene_bank_country.label
124
125			# iterate among excel template
126			for record in template.get_animal_records():
127			# determine sex. Check for values
128			sex = DictSex.objects.get(label__iexact=record.sex)
129
130			# get specie
131			specie = DictSpecie.objects.get(label=record.species)
132
133			# how I can get breed from my data?
134			breeds = [breed for breed in template.get_breed_records()
135			if breed.supplied_breed == record.breed and
136			breed.species == record.species]
137
138			# breed is supposed to be unique, from UID constraints. However
139			# I could place the same breed name for two countries. In that case,
140			# I cant derive a unique breed from users data
141			if len(breeds) != 1:
142			raise ExcelImportError(
143			"Can't determine a unique breed for '%s:%s' from user data" %
144			(record.breed, record.species))
145
146			# get a country for this breed
147			country = DictCountry.objects.get(
148			label=breeds[0].efabis_breed_country)
149
150			# ok get a real dictbreed object
151			breed = DictBreed.objects.get(
152			supplied_breed=record.breed,
153			specie=specie,
154			country=country)
155
156			logger.debug("Selected breed is %s" % (breed))
157
158			# define names
159			name, mother, father = None, None, None
160
161			# get name for this animal and for mother and father
162			logger.debug("Getting %s as my name" % (
163			record.animal_id_in_data_source))
164
165			name = Name.objects.get(
166			name=record.animal_id_in_data_source,
167			submission=submission_obj)
168
169			if record.father_id_in_data_source:
170			logger.debug("Getting %s as father" % (
171			record.father_id_in_data_source))
172
173			father = Name.objects.get(
174			name=record.father_id_in_data_source,
175			submission=submission_obj)
176
177			if record.mother_id_in_data_source:
178			logger.debug("Getting %s as mother" % (
179			record.mother_id_in_data_source))
180
181			mother = Name.objects.get(
182			name=record.mother_id_in_data_source,
183			submission=submission_obj)
184
185			# now get accuracy
186			accuracy = ACCURACIES.get_value_by_desc(
187			record.birth_location_accuracy)
188
189			# create a new object. Using defaults to avoid collisions when
190			# updating data
191			defaults = {
192			'alternative_id': record.alternative_animal_id,
193			'description': record.animal_description,
194			'breed': breed,
195			'sex': sex,
196			'father': father,
197			'mother': mother,
198			'birth_date': record.birth_date,
199			'birth_location': record.birth_location,
200			'birth_location_latitude': record.birth_location_latitude,
201			'birth_location_longitude': record.birth_location_longitude,
202			'birth_location_accuracy': accuracy,
203			'owner': submission_obj.owner
204			}
205
206			animal, created = Animal.objects.update_or_create(
207			name=name,
208			defaults=defaults)
209
210			if created:
211			logger.debug("Created %s" % animal)
212
213			else:
214			logger.debug("Updating %s" % animal)
215
216			# create a validation summary object and set all_count
217			validation_summary, created = ValidationSummary.objects.get_or_create(
218			submission=submission_obj, type="animal")
219
220			if created:
221			logger.debug(
222			"ValidationSummary animal created for submission %s" %
223			submission_obj)
224
225			# reset counts
226			validation_summary.reset_all_count()
227
228			# debug
229			logger.info("fill_uid_animals() completed")
230
231
232			def fill_uid_samples(submission_obj, template):
233			# debug
234			logger.info("called fill_uid_samples()")
235
236			# iterate among excel template
237			for record in template.get_sample_records():
238			# get name for this sample
239			name = Name.objects.get(
240			name=record.sample_id_in_data_source,
241			submission=submission_obj,
242			owner=submission_obj.owner)
243
244			# get animal by reading record
245			animal = Animal.objects.get(
246			name__name=record.animal_id_in_data_source,
247			name__submission=submission_obj)
248
249			# get a organism part. Organism parts need to be in lowercases
250			organism_part, created = DictUberon.objects.get_or_create(
251			label=record.organism_part
252			)
253
254			if created:
255			logger.info("Created %s" % organism_part)
256
257			else:
258			logger.debug("Found %s" % organism_part)
259
260			# TODO: get developmental_stage and physiological_stage terms
261
262			# animal age could be present or not
263			if record.animal_age_at_collection:
264			# TODO: do something
265			pass
266
267			else:
268			# derive animal age at collection
269			animal_age_at_collection, time_units = image_timedelta(
270			record.collection_date, animal.birth_date)
271
272			# now get accuracy
273			accuracy = ACCURACIES.get_value_by_desc(
274			record.collection_place_accuracy)
275
276			# now get storage and storage processing
277			# TODO; check those values in excel columns
278			storage = SAMPLE_STORAGE.get_value_by_desc(
279			record.sample_storage)
280
281			storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
282			record.sample_storage_processing)
283
284			# create a new object. Using defaults to avoid collisions when
285			# updating data
286			defaults = {
287			'alternative_id': record.alternative_sample_id,
288			'description': record.sample_description,
289			'animal': animal,
290			'protocol': record.specimen_collection_protocol,
291			'collection_date': record.collection_date,
292			'collection_place_latitude': record.collection_place_latitude,
293			'collection_place_longitude': record.collection_place_longitude,
294			'collection_place': record.collection_place,
295			'collection_place_accuracy': accuracy,
296			'organism_part': organism_part,
297			# 'developmental_stage': None,
298			# 'physiological_stage': None,
299			'animal_age_at_collection': animal_age_at_collection,
			0 ignored issues – show introduced 2019-07-05 14:49 UTC by Report Bug Copy Issue Report The variable `animal_age_at_collection` does not seem to be defined for all execution paths. Loading history...
300			'animal_age_at_collection_units': time_units,
			0 ignored issues – show introduced 2019-07-05 14:49 UTC by Report Bug Copy Issue Report The variable `time_units` does not seem to be defined for all execution paths. Loading history...
301			'availability': record.availability,
302			'storage': storage,
303			'storage_processing': storage_processing,
304			# TODO: this is a time unit column
305			'preparation_interval': record.sampling_to_preparation_interval,
306			'owner': submission_obj.owner,
307			}
308
309			sample, created = Sample.objects.update_or_create(
310			name=name,
311			defaults=defaults)
312
313			if created:
314			logger.debug("Created %s" % sample)
315
316			else:
317			logger.debug("Updating %s" % sample)
318
319			# create a validation summary object and set all_count
320			validation_summary, created = ValidationSummary.objects.get_or_create(
321			submission=submission_obj, type="sample")
322
323			if created:
324			logger.debug(
325			"ValidationSummary animal created for submission %s" %
326			submission_obj)
327
328			# reset counts
329			validation_summary.reset_all_count()
330
331			# debug
332			logger.info("fill_uid_samples() completed")
333
334
335			def upload_template(submission_obj):
336			# debug
337			logger.info("Importing from Excel template file")
338
339			# this is the full path in docker container
340			fullpath = submission_obj.get_uploaded_file_path()
341
342			# read submission data
343			reader = ExcelTemplate()
344			reader.read_file(fullpath)
345
346			# start data loading
347			try:
348			# check for species and sex in a similar way as cryoweb does
349			# TODO: identical to CRBanim. Move to a mixin
350			check, not_found = reader.check_sex()
351
352			if not check:
353			message = (
354			"Not all Sex terms are loaded into database: "
355			"check for %s in your dataset" % (not_found))
356
357			raise ExcelImportError(message)
358
359			check, not_found = reader.check_species(
360			submission_obj.gene_bank_country)
361
362			if not check:
363			raise ExcelImportError(
364			"Some species are not loaded in UID database: "
365			"%s" % (not_found))
366
367			check, not_found = reader.check_accuracies()
368
369			if not check:
370			message = (
371			"Not all accuracy levels are defined in database: "
372			"check for %s in your dataset" % (not_found))
373
374			raise ExcelImportError(message)
375
376			# BREEDS
377			fill_uid_breeds(submission_obj, reader)
378
379			# NAME
380			fill_uid_names(submission_obj, reader)
381
382			# ANIMALS
383			fill_uid_animals(submission_obj, reader)
384
385			# SAMPLES
386			fill_uid_samples(submission_obj, reader)
387
388			except Exception as exc:
389			# set message:
390			message = "Error in importing data: %s" % (str(exc))
391
392			# save a message in database
393			submission_obj.status = ERROR
394			submission_obj.message = message
395			submission_obj.save()
396
397			# send async message
398			send_message(submission_obj)
399
400			# debug
401			logger.error("Error in importing from Template: %s" % (exc))
402			logger.exception(exc)
403
404			return False
405
406			else:
407			message = "Template import completed for submission: %s" % (
408			submission_obj.id)
409
410			submission_obj.message = message
411			submission_obj.status = LOADED
412			submission_obj.save()
413
414			# send async message
415			send_message(
416			submission_obj,
417			validation_message=construct_validation_message(submission_obj))
418
419			logger.info("Import from Template is complete")
420
421			return True
422

cnr-ibba / IMAGE-InjectTool

Pull Request — master (#40)

excel.helpers.fill_uid.upload_template() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like