excel.helpers.fill_uid.upload_template() - Code Metrics - Inspection of ":twisted_rightwards_arrows: Merge pull request #59..." - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( eecf08...313cfe )

by Paolo

created 2019-10-03 15:07 UTC

excel.helpers.fill_uid.upload_template() A

↳ Parent: excel.helpers.fill_uid

Complexity

Conditions

Size

Total Lines	62
Code Lines	30

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	30
dl	0
loc	62
rs	9.16
c	0
b	0
f	0
cc	3
nop	1

How to fix Long Method

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul  5 16:37:48 2019

@author: Paolo Cozzi <[email protected]>
"""

import logging

from common.constants import (
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
from common.helpers import image_timedelta, parse_image_timedelta
from image_app.helpers import get_or_create_obj, update_or_create_obj
from image_app.models import (
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
    Sample, DictDevelStage, DictPhysioStage)
from submissions.helpers import send_message
from validation.helpers import construct_validation_message
from validation.models import ValidationSummary

from .exceptions import ExcelImportError
from .exceltemplate import ExcelTemplateReader

# Get an instance of a logger
logger = logging.getLogger(__name__)


def fill_uid_breeds(submission_obj, template):
    """Fill DictBreed from a excel record"""

    logger.info("fill_uid_breeds() started")

    # ok get languages from submission (useful for translation)
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_breed_records():
        # get a DictSpecie object. Species are in latin names, but I can
        # find also a common name in translation tables
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species,
            language=language)

        # get country for breeds. Ideally will be the same of submission,
        # however, it could be possible to store data from other contries
        country = DictCountry.objects.get(label=record.efabis_breed_country)

        get_or_create_obj(
            DictBreed,
            supplied_breed=record.supplied_breed,
            specie=specie,
            country=country)

    logger.info("fill_uid_breeds() completed")


def fill_uid_names(submission_obj, template):
    """fill Names table from crbanim record"""

    # debug
    logger.info("called fill_uid_names()")

    # iterate among excel template
    for record in template.get_animal_records():
        # in the same record I have the sample identifier and animal identifier
        # a name record for animal
        get_or_create_obj(
            Name,
            name=record.animal_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

    # iterate among excel template
    for record in template.get_sample_records():
        # name record for sample
        get_or_create_obj(
            Name,
            name=record.sample_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

    logger.info("fill_uid_names() completed")


def fill_uid_animals(submission_obj, template):
    # debug
    logger.info("called fill_uid_animals()")

    # get language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_animal_records():
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # get specie (mind synonyms)
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species, language=language)

        logger.debug("Found '%s' as specie" % (specie))

        # how I can get breed from my data?
        breeds = [breed for breed in template.get_breed_records()
                  if breed.supplied_breed == record.breed and
                  breed.species == record.species]

        # breed is supposed to be unique, from UID constraints. However
        # I could place the same breed name for two countries. In that case,
        # I cant derive a unique breed from users data
        if len(breeds) != 1:
            raise ExcelImportError(
                "Can't determine a unique breed for '%s:%s' from user data" %
                (record.breed, record.species))

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breeds[0].efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=record.breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        # define names
        name, mother, father = None, None, None

        # get name for this animal and for mother and father
        logger.debug("Getting %s as my name" % (
            record.animal_id_in_data_source))

        name = Name.objects.get(
            name=record.animal_id_in_data_source,
            submission=submission_obj)

        if record.father_id_in_data_source:
            logger.debug("Getting %s as father" % (
                record.father_id_in_data_source))

            father = Name.objects.get(
                name=record.father_id_in_data_source,
                submission=submission_obj)

        if record.mother_id_in_data_source:
            logger.debug("Getting %s as mother" % (
                record.mother_id_in_data_source))

            mother = Name.objects.get(
                name=record.mother_id_in_data_source,
                submission=submission_obj)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.birth_location_accuracy)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_animal_id,
            'description': record.animal_description,
            'breed': breed,
            'sex': sex,
            'father': father,
            'mother': mother,
            'birth_date': record.birth_date,
            'birth_location': record.birth_location,
            'birth_location_latitude': record.birth_location_latitude,
            'birth_location_longitude': record.birth_location_longitude,
            'birth_location_accuracy': accuracy,
            'owner': submission_obj.owner
        }

        # creating or updating an object
        update_or_create_obj(
            Animal,
            name=name,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="animal")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_animals() completed")


def fill_uid_samples(submission_obj, template):
    # debug
    logger.info("called fill_uid_samples()")

    # iterate among excel template
    for record in template.get_sample_records():
        # get name for this sample
        name = Name.objects.get(
            name=record.sample_id_in_data_source,
            submission=submission_obj,
            owner=submission_obj.owner)

        # get animal by reading record
        animal = Animal.objects.get(
            name__name=record.animal_id_in_data_source,
            name__submission=submission_obj)

        # get a organism part. Organism parts need to be in lowercases
        organism_part = get_or_create_obj(
            DictUberon,
            label=record.organism_part
        )

        # get developmental_stage and physiological_stage terms
        # they are not mandatory
        devel_stage, physio_stage = None, None

        if record.developmental_stage:
            devel_stage = get_or_create_obj(
                DictDevelStage,
                label=record.developmental_stage
            )

        if record.physiological_stage:
            physio_stage = get_or_create_obj(
                DictPhysioStage,
                label=record.physiological_stage
            )

        # animal age could be present or not
        if record.animal_age_at_collection:
            animal_age_at_collection, time_units = parse_image_timedelta(
                record.animal_age_at_collection)

        else:
            # derive animal age at collection
            animal_age_at_collection, time_units = image_timedelta(
                record.collection_date, animal.birth_date)

        # another time column
        preparation_interval, preparation_interval_units = None, None

        if record.sampling_to_preparation_interval:
            preparation_interval, preparation_interval_units = \
                parse_image_timedelta(record.sampling_to_preparation_interval)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.collection_place_accuracy)

        # now get storage and storage processing
        # TODO; check those values in excel columns
        storage = SAMPLE_STORAGE.get_value_by_desc(
            record.sample_storage)

        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
            record.sample_storage_processing)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_sample_id,
            'description': record.sample_description,
            'animal': animal,
            'protocol': record.specimen_collection_protocol,
            'collection_date': record.collection_date,
            'collection_place_latitude': record.collection_place_latitude,
            'collection_place_longitude': record.collection_place_longitude,
            'collection_place': record.collection_place,
            'collection_place_accuracy': accuracy,
            'organism_part': organism_part,
            'developmental_stage': devel_stage,
            'physiological_stage': physio_stage,
            'animal_age_at_collection': animal_age_at_collection,
            'animal_age_at_collection_units': time_units,
            'availability': record.availability,
            'storage': storage,
            'storage_processing': storage_processing,
            'preparation_interval': preparation_interval,
            'preparation_interval_units': preparation_interval_units,
            'owner': submission_obj.owner,
        }

        update_or_create_obj(
            Sample,
            name=name,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="sample")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_samples() completed")


def check_UID(submission_obj, reader):
    # check for species and sex in a similar way as cryoweb does
    # TODO: identical to CRBanim. Move to a mixin
    check, not_found = reader.check_sex()

    # check sex
    if not check:
        message = (
            "Not all Sex terms are loaded into database: "
            "check for '%s' in your dataset" % (not_found))

        raise ExcelImportError(message)

    check, not_found = reader.check_species(
        submission_obj.gene_bank_country)

    # check species and related
    if not check:
        raise ExcelImportError(
            "Some species are not loaded into database: "
            "check for '%s' in your dataset" % (not_found))

    check, not_found = reader.check_species_in_animal_sheet()

    if not check:
        raise ExcelImportError(
            "Some species are not defined in breed sheet: "
            "check for '%s' in your dataset" % (not_found))

    # check countries
    check, not_found = reader.check_countries()

    if not check:
        raise ExcelImportError(
            "Those countries are not loaded in database: "
            "check for '%s' in your dataset" % (not_found))

    # check accuracies
    check, not_found = reader.check_accuracies()

    if not check:
        message = (
            "Not all accuracy levels are defined in database: "
            "check for '%s' in your dataset" % (not_found))

        raise ExcelImportError(message)


def upload_template(submission_obj):
    # debug
    logger.info("Importing from Excel template file")

    # this is the full path in docker container
    fullpath = submission_obj.get_uploaded_file_path()

    # read submission data
    reader = ExcelTemplateReader()
    reader.read_file(fullpath)

    # start data loading
    try:
        # check UID data like cryoweb does
        check_UID(submission_obj, reader)

        # BREEDS
        fill_uid_breeds(submission_obj, reader)

        # NAME
        fill_uid_names(submission_obj, reader)

        # ANIMALS
        fill_uid_animals(submission_obj, reader)

        # SAMPLES
        fill_uid_samples(submission_obj, reader)

    except Exception as exc:
        # set message:
        message = "Error in importing data: %s" % (str(exc))

        # save a message in database
        submission_obj.status = ERROR
        submission_obj.message = message
        submission_obj.save()

        # send async message
        send_message(submission_obj)

        # debug
        logger.error("Error in importing from Template: %s" % (exc))
        logger.exception(exc)

        return False

    else:
        message = "Template import completed for submission: %s" % (
            submission_obj.id)

        submission_obj.message = message
        submission_obj.status = LOADED
        submission_obj.save()

        # send async message
        send_message(
            submission_obj,
            validation_message=construct_validation_message(submission_obj))

    logger.info("Import from Template is complete")

    return True


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Fri Jul 5 16:37:48 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import logging
10
11			from common.constants import (
12			ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13			from common.helpers import image_timedelta, parse_image_timedelta
14			from image_app.helpers import get_or_create_obj, update_or_create_obj
15			from image_app.models import (
16			DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
17			Sample, DictDevelStage, DictPhysioStage)
18			from submissions.helpers import send_message
19			from validation.helpers import construct_validation_message
20			from validation.models import ValidationSummary
21
22			from .exceptions import ExcelImportError
23			from .exceltemplate import ExcelTemplateReader
24
25			# Get an instance of a logger
26			logger = logging.getLogger(__name__)
27
28
29			def fill_uid_breeds(submission_obj, template):
30			"""Fill DictBreed from a excel record"""
31
32			logger.info("fill_uid_breeds() started")
33
34			# ok get languages from submission (useful for translation)
35			language = submission_obj.gene_bank_country.label
36
37			# iterate among excel template
38			for record in template.get_breed_records():
39			# get a DictSpecie object. Species are in latin names, but I can
40			# find also a common name in translation tables
41			specie = DictSpecie.get_specie_check_synonyms(
42			species_label=record.species,
43			language=language)
44
45			# get country for breeds. Ideally will be the same of submission,
46			# however, it could be possible to store data from other contries
47			country = DictCountry.objects.get(label=record.efabis_breed_country)
48
49			get_or_create_obj(
50			DictBreed,
51			supplied_breed=record.supplied_breed,
52			specie=specie,
53			country=country)
54
55			logger.info("fill_uid_breeds() completed")
56
57
58			def fill_uid_names(submission_obj, template):
59			"""fill Names table from crbanim record"""
60
61			# debug
62			logger.info("called fill_uid_names()")
63
64			# iterate among excel template
65			for record in template.get_animal_records():
66			# in the same record I have the sample identifier and animal identifier
67			# a name record for animal
68			get_or_create_obj(
69			Name,
70			name=record.animal_id_in_data_source,
71			submission=submission_obj,
72			owner=submission_obj.owner)
73
74			# iterate among excel template
75			for record in template.get_sample_records():
76			# name record for sample
77			get_or_create_obj(
78			Name,
79			name=record.sample_id_in_data_source,
80			submission=submission_obj,
81			owner=submission_obj.owner)
82
83			logger.info("fill_uid_names() completed")
84
85
86			def fill_uid_animals(submission_obj, template):
87			# debug
88			logger.info("called fill_uid_animals()")
89
90			# get language
91			language = submission_obj.gene_bank_country.label
92
93			# iterate among excel template
94			for record in template.get_animal_records():
95			# determine sex. Check for values
96			sex = DictSex.objects.get(label__iexact=record.sex)
97
98			# get specie (mind synonyms)
99			specie = DictSpecie.get_specie_check_synonyms(
100			species_label=record.species, language=language)
101
102			logger.debug("Found '%s' as specie" % (specie))
103
104			# how I can get breed from my data?
105			breeds = [breed for breed in template.get_breed_records()
106			if breed.supplied_breed == record.breed and
107			breed.species == record.species]
108
109			# breed is supposed to be unique, from UID constraints. However
110			# I could place the same breed name for two countries. In that case,
111			# I cant derive a unique breed from users data
112			if len(breeds) != 1:
113			raise ExcelImportError(
114			"Can't determine a unique breed for '%s:%s' from user data" %
115			(record.breed, record.species))
116
117			# get a country for this breed
118			country = DictCountry.objects.get(
119			label=breeds[0].efabis_breed_country)
120
121			# ok get a real dictbreed object
122			breed = DictBreed.objects.get(
123			supplied_breed=record.breed,
124			specie=specie,
125			country=country)
126
127			logger.debug("Selected breed is %s" % (breed))
128
129			# define names
130			name, mother, father = None, None, None
131
132			# get name for this animal and for mother and father
133			logger.debug("Getting %s as my name" % (
134			record.animal_id_in_data_source))
135
136			name = Name.objects.get(
137			name=record.animal_id_in_data_source,
138			submission=submission_obj)
139
140			if record.father_id_in_data_source:
141			logger.debug("Getting %s as father" % (
142			record.father_id_in_data_source))
143
144			father = Name.objects.get(
145			name=record.father_id_in_data_source,
146			submission=submission_obj)
147
148			if record.mother_id_in_data_source:
149			logger.debug("Getting %s as mother" % (
150			record.mother_id_in_data_source))
151
152			mother = Name.objects.get(
153			name=record.mother_id_in_data_source,
154			submission=submission_obj)
155
156			# now get accuracy
157			accuracy = ACCURACIES.get_value_by_desc(
158			record.birth_location_accuracy)
159
160			# create a new object. Using defaults to avoid collisions when
161			# updating data
162			defaults = {
163			'alternative_id': record.alternative_animal_id,
164			'description': record.animal_description,
165			'breed': breed,
166			'sex': sex,
167			'father': father,
168			'mother': mother,
169			'birth_date': record.birth_date,
170			'birth_location': record.birth_location,
171			'birth_location_latitude': record.birth_location_latitude,
172			'birth_location_longitude': record.birth_location_longitude,
173			'birth_location_accuracy': accuracy,
174			'owner': submission_obj.owner
175			}
176
177			# creating or updating an object
178			update_or_create_obj(
179			Animal,
180			name=name,
181			defaults=defaults)
182
183			# create a validation summary object and set all_count
184			validation_summary = get_or_create_obj(
185			ValidationSummary,
186			submission=submission_obj,
187			type="animal")
188
189			# reset counts
190			validation_summary.reset_all_count()
191
192			# debug
193			logger.info("fill_uid_animals() completed")
194
195
196			def fill_uid_samples(submission_obj, template):
197			# debug
198			logger.info("called fill_uid_samples()")
199
200			# iterate among excel template
201			for record in template.get_sample_records():
202			# get name for this sample
203			name = Name.objects.get(
204			name=record.sample_id_in_data_source,
205			submission=submission_obj,
206			owner=submission_obj.owner)
207
208			# get animal by reading record
209			animal = Animal.objects.get(
210			name__name=record.animal_id_in_data_source,
211			name__submission=submission_obj)
212
213			# get a organism part. Organism parts need to be in lowercases
214			organism_part = get_or_create_obj(
215			DictUberon,
216			label=record.organism_part
217			)
218
219			# get developmental_stage and physiological_stage terms
220			# they are not mandatory
221			devel_stage, physio_stage = None, None
222
223			if record.developmental_stage:
224			devel_stage = get_or_create_obj(
225			DictDevelStage,
226			label=record.developmental_stage
227			)
228
229			if record.physiological_stage:
230			physio_stage = get_or_create_obj(
231			DictPhysioStage,
232			label=record.physiological_stage
233			)
234
235			# animal age could be present or not
236			if record.animal_age_at_collection:
237			animal_age_at_collection, time_units = parse_image_timedelta(
238			record.animal_age_at_collection)
239
240			else:
241			# derive animal age at collection
242			animal_age_at_collection, time_units = image_timedelta(
243			record.collection_date, animal.birth_date)
244
245			# another time column
246			preparation_interval, preparation_interval_units = None, None
247
248			if record.sampling_to_preparation_interval:
249			preparation_interval, preparation_interval_units = \
250			parse_image_timedelta(record.sampling_to_preparation_interval)
251
252			# now get accuracy
253			accuracy = ACCURACIES.get_value_by_desc(
254			record.collection_place_accuracy)
255
256			# now get storage and storage processing
257			# TODO; check those values in excel columns
258			storage = SAMPLE_STORAGE.get_value_by_desc(
259			record.sample_storage)
260
261			storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
262			record.sample_storage_processing)
263
264			# create a new object. Using defaults to avoid collisions when
265			# updating data
266			defaults = {
267			'alternative_id': record.alternative_sample_id,
268			'description': record.sample_description,
269			'animal': animal,
270			'protocol': record.specimen_collection_protocol,
271			'collection_date': record.collection_date,
272			'collection_place_latitude': record.collection_place_latitude,
273			'collection_place_longitude': record.collection_place_longitude,
274			'collection_place': record.collection_place,
275			'collection_place_accuracy': accuracy,
276			'organism_part': organism_part,
277			'developmental_stage': devel_stage,
278			'physiological_stage': physio_stage,
279			'animal_age_at_collection': animal_age_at_collection,
280			'animal_age_at_collection_units': time_units,
281			'availability': record.availability,
282			'storage': storage,
283			'storage_processing': storage_processing,
284			'preparation_interval': preparation_interval,
285			'preparation_interval_units': preparation_interval_units,
286			'owner': submission_obj.owner,
287			}
288
289			update_or_create_obj(
290			Sample,
291			name=name,
292			defaults=defaults)
293
294			# create a validation summary object and set all_count
295			validation_summary = get_or_create_obj(
296			ValidationSummary,
297			submission=submission_obj,
298			type="sample")
299
300			# reset counts
301			validation_summary.reset_all_count()
302
303			# debug
304			logger.info("fill_uid_samples() completed")
305
306
307			def check_UID(submission_obj, reader):
308			# check for species and sex in a similar way as cryoweb does
309			# TODO: identical to CRBanim. Move to a mixin
310			check, not_found = reader.check_sex()
311
312			# check sex
313			if not check:
314			message = (
315			"Not all Sex terms are loaded into database: "
316			"check for '%s' in your dataset" % (not_found))
317
318			raise ExcelImportError(message)
319
320			check, not_found = reader.check_species(
321			submission_obj.gene_bank_country)
322
323			# check species and related
324			if not check:
325			raise ExcelImportError(
326			"Some species are not loaded into database: "
327			"check for '%s' in your dataset" % (not_found))
328
329			check, not_found = reader.check_species_in_animal_sheet()
330
331			if not check:
332			raise ExcelImportError(
333			"Some species are not defined in breed sheet: "
334			"check for '%s' in your dataset" % (not_found))
335
336			# check countries
337			check, not_found = reader.check_countries()
338
339			if not check:
340			raise ExcelImportError(
341			"Those countries are not loaded in database: "
342			"check for '%s' in your dataset" % (not_found))
343
344			# check accuracies
345			check, not_found = reader.check_accuracies()
346
347			if not check:
348			message = (
349			"Not all accuracy levels are defined in database: "
350			"check for '%s' in your dataset" % (not_found))
351
352			raise ExcelImportError(message)
353
354
355			def upload_template(submission_obj):
356			# debug
357			logger.info("Importing from Excel template file")
358
359			# this is the full path in docker container
360			fullpath = submission_obj.get_uploaded_file_path()
361
362			# read submission data
363			reader = ExcelTemplateReader()
364			reader.read_file(fullpath)
365
366			# start data loading
367			try:
368			# check UID data like cryoweb does
369			check_UID(submission_obj, reader)
370
371			# BREEDS
372			fill_uid_breeds(submission_obj, reader)
373
374			# NAME
375			fill_uid_names(submission_obj, reader)
376
377			# ANIMALS
378			fill_uid_animals(submission_obj, reader)
379
380			# SAMPLES
381			fill_uid_samples(submission_obj, reader)
382
383			except Exception as exc:
384			# set message:
385			message = "Error in importing data: %s" % (str(exc))
386
387			# save a message in database
388			submission_obj.status = ERROR
389			submission_obj.message = message
390			submission_obj.save()
391
392			# send async message
393			send_message(submission_obj)
394
395			# debug
396			logger.error("Error in importing from Template: %s" % (exc))
397			logger.exception(exc)
398
399			return False
400
401			else:
402			message = "Template import completed for submission: %s" % (
403			submission_obj.id)
404
405			submission_obj.message = message
406			submission_obj.status = LOADED
407			submission_obj.save()
408
409			# send async message
410			send_message(
411			submission_obj,
412			validation_message=construct_validation_message(submission_obj))
413
414			logger.info("Import from Template is complete")
415
416			return True
417

cnr-ibba / IMAGE-InjectTool

Push — master ( eecf08...313cfe )

excel.helpers.fill_uid.upload_template() A

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like