crbanim.helpers.fill_uid_sample() - Code Metrics - Inspection of ":sparkles: import from Template file" - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#40)

by Paolo

created 2019-07-05 14:47 UTC

crbanim.helpers.fill_uid_sample() B

↳ Parent: crbanim.helpers

Complexity

Conditions

Size

Total Lines	60
Code Lines	34

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	34
dl	0
loc	60
rs	8.5973
c	0
b	0
f	0
cc	5
nop	4

How to fix Long Method

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 21 15:37:16 2019

@author: Paolo Cozzi <[email protected]>
"""

import io
import csv
import logging
import pycountry

from collections import defaultdict, namedtuple

from django.utils.dateparse import parse_date

from common.constants import LOADED, ERROR, MISSING, SAMPLE_STORAGE
from common.helpers import image_timedelta
from image_app.helpers import FileDataSourceMixin
from image_app.models import (
    DictSpecie, DictSex, DictCountry, DictBreed, Name, Animal, Sample,
    DictUberon, Publication)
from submissions.helpers import send_message
from validation.helpers import construct_validation_message
from validation.models import ValidationSummary

# Get an instance of a logger
logger = logging.getLogger(__name__)


# A class to deal with cryoweb import errors
class CRBAnimImportError(Exception):
    pass


class CRBAnimReader(FileDataSourceMixin):
    mandatory_columns = [
            'sex',
            'species_latin_name',
            'country_of_origin',
            'breed_name',
            'animal_ID',
            'sample_bibliographic_references',
            'sample_identifier',
            'animal_birth_date',
            'sample_storage_temperature',
            'sample_type_name',
            'body_part_name',
            'sampling_date',
            'sampling_protocol_url',
            'sample_availability',
            'EBI_Biosample_identifier',
        ]

    def __init__(self):
        self.data = None
        self.header = None
        self.dialect = None
        self.items = None
        self.filename = None

    @classmethod
    def get_dialect(cls, chunk):
        """Determine dialect of a CSV from a chunk"""

        return csv.Sniffer().sniff(chunk)

    @classmethod
    def is_valid(cls, chunk):
        """Try to determine if CRBanim has at least the required columns
        or not"""

        dialect = cls.get_dialect(chunk)

        # get a handle from a string
        handle = io.StringIO(chunk)

        # read chunk
        reader = csv.reader(handle, dialect)
        header = next(reader)

        not_found = []

        for column in cls.mandatory_columns:
            if column not in header:
                not_found.append(column)

        if len(not_found) == 0:
            logger.debug("This seems to be a valid CRBanim file")
            return True, []

        else:
            logger.error("Couldn't not find mandatory CRBanim columns %s" % (
                not_found))
            return False, not_found

    def read_file(self, filename):
        """Read crb anim files and set tit to class attribute"""

        with open(filename, newline='') as handle:
            # initialize data
            self.filename = filename
            self.data = []

            # get dialect
            chunk = handle.read(2048)
            self.dialect = self.get_dialect(chunk)

            # restart filename from the beginning
            handle.seek(0)

            # read csv file
            reader = csv.reader(handle, self.dialect)
            self.header = next(reader)

            # find sex index column
            sex_idx = self.header.index('sex')

            # create a namedtuple object
            Data = namedtuple("Data", self.header)

            # add records to data
            for record in reader:
                # replace all "\\N" occurences in a list
                record = [None if col in ["\\N", ""]
                          else col for col in record]

                # 'unknown' sex should be replaced with 'record of unknown sex'
                if record[sex_idx].lower() == 'unknown':
                    logger.debug(
                        "Changing '%s' with '%s'" % (
                            record[sex_idx], 'record of unknown sex'))
                    record[sex_idx] = 'record of unknown sex'

                record = Data._make(record)
                self.data.append(record)

        self.items = self.eval_columns()

    def eval_columns(self):
        """define a set from column data"""

        # target_columns = ['sex', 'species_latin_name', 'breed_name']
        target_columns = self.header

        items = defaultdict(list)

        for line in self.data:
            for column in target_columns:
                idx = self.header.index(column)
                items[column].append(line[idx])

        # now get a set of object
        for column in target_columns:
            items[column] = set(items[column])

        return items

    def print_line(self, num):
        """print a record with its column names"""

        for i, column in enumerate(self.header):
            logger.debug("%s: %s" % (column, self.data[num][i]))

    def filter_by_column_values(self, column, values, ignorecase=False):
        if ignorecase is True:
            # lower values
            values = [value.lower() for value in values]

        for line in self.data:
            # search for case insensitive value (lower attrib in lower values)
            if ignorecase is True:
                if getattr(line, column).lower() in values:
                    yield line

                else:
                    logger.debug("Filtering: %s" % (str(line)))

            else:
                if getattr(line, column) in values:
                    yield line

                else:
                    logger.debug("Filtering: %s" % (str(line)))

            # ignore case or not

        # cicle for line

    # a function to detect if crbanim species are in UID database or not
    def check_species(self, country):
        """Check if all species are defined in UID DictSpecies"""

        # CRBAnim usually have species in the form required for UID
        # However sometimes there could be a common name, not a DictSpecie one
        column = 'species_latin_name'
        item_set = self.items[column]

        # call FileDataSourceMixin.check_species
        return super().check_species(column, item_set, country)

    # check that dict sex table contains data
    def check_sex(self):
        """check that dict sex table contains data"""

        # item.sex are in uppercase
        column = 'sex'
        item_set = [item.lower() for item in self.items[column]]

        # call FileDataSourceMixin.check_items
        return self.check_items(item_set, DictSex, column)


def fill_uid_breed(record, language):
    """Fill DictBreed from a crbanim record"""

    # get a DictSpecie object. Species are in latin names, but I can
    # find also a common name in translation tables
    try:
        specie = DictSpecie.objects.get(label=record.species_latin_name)

    except DictSpecie.DoesNotExist:
        logger.info("Search %s in synonyms" % (record.species_latin_name))
        # search for language synonym (if I arrived here a synonym should
        # exists)
        specie = DictSpecie.get_by_synonym(
            synonym=record.species_latin_name,
            language=language)

    # get country name using pycountries
    country_name = pycountry.countries.get(
        alpha_2=record.country_of_origin).name

    # get country for breeds. Ideally will be the same of submission,
    # however, it could be possible to store data from other contries
    country, created = DictCountry.objects.get_or_create(
        label=country_name)

    # I could create a country from a v_breed_specie instance. That's
    # ok, maybe I could have a lot of breed from different countries and
    # a few organizations submitting them
    if created:
        logger.info("Created %s" % country)

    else:
        logger.debug("Found %s" % country)

    breed, created = DictBreed.objects.get_or_create(
        supplied_breed=record.breed_name,
        specie=specie,
        country=country)

    if created:
        logger.info("Created %s" % breed)

    else:
        logger.debug("Found %s" % breed)

    # return a DictBreed object
    return breed


def fill_uid_names(record, submission):
    """fill Names table from crbanim record"""

    # in the same record I have the sample identifier and animal identifier
    # a name record for animal
    animal_name, created = Name.objects.get_or_create(
        name=record.animal_ID,
        submission=submission,
        owner=submission.owner)

    if created:
        logger.debug("Created animal name %s" % animal_name)

    else:
        logger.debug("Found animal name %s" % animal_name)

    # get a publication (if present)
    publication = None

    if record.sample_bibliographic_references:
        publication, created = Publication.objects.get_or_create(
            doi=record.sample_bibliographic_references)

        if created:
            logger.debug("Created publication %s" % publication)

    # name record for sample
    sample_name, created = Name.objects.get_or_create(
        name=record.sample_identifier,
        submission=submission,
        owner=submission.owner,
        publication=publication)

    if created:
        logger.debug("Created sample name %s" % sample_name)

    else:
        logger.debug("Found sample name %s" % sample_name)

    # returning 2 Name instances
    return animal_name, sample_name


def fill_uid_animal(record, animal_name, breed, submission, animals):
    """Helper function to fill animal data in UID animal table"""

    # HINT: does CRBAnim models mother and father?

    # check if such animal is already beed updated
    if animal_name.name in animals:
        logger.debug(
            "Ignoring %s: already created or updated" % (animal_name))

        # return an animal object
        animal = animals[animal_name.name]

    else:
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # there's no birth_location for animal in CRBAnim
        accuracy = MISSING

        # create a new object. Using defaults to avoid collisions when
        # updating data
        # HINT: CRBanim has less attribute than cryoweb
        defaults = {
            # HINT: is a duplication of name. Can this be non-mandatory?
            'alternative_id': animal_name.name,
            'breed': breed,
            'sex': sex,
            'birth_date': record.animal_birth_date,
            'birth_location_accuracy': accuracy,
            'owner': submission.owner
        }

        # HINT: I could have the same animal again and again. Should I update
        # every times?
        animal, created = Animal.objects.update_or_create(
            name=animal_name,
            defaults=defaults)

        if created:
            logger.debug("Created animal %s" % animal)

        else:
            logger.debug("Updating animal %s" % animal)

        # track this animal in dictionary
        animals[animal_name.name] = animal

    # I need to track animal to relate the sample
    return animal


def find_storage_type(record):
    """Determine a sample storage relying on a dictionary"""

    mapping = {
        '-196°C': 'frozen, liquid nitrogen',
        '-20°C': 'frozen, -20 degrees Celsius freezer',
        '-30°C': 'frozen, -20 degrees Celsius freezer',
        '-80°C': 'frozen, -80 degrees Celsius freezer'}

    if record.sample_storage_temperature in mapping:
        # get ENUM conversion
        storage = SAMPLE_STORAGE.get_value_by_desc(
            mapping[record.sample_storage_temperature])

        return storage

    else:
        logging.warning("Couldn't find %s in storage types mapping" % (
            record.sample_storage_temperature))

        return None


def fill_uid_sample(record, sample_name, animal, submission):
    """Helper function to fill animal data in UID sample table"""

    # name and animal name come from parameters
    organism_part_label = None
    sample_type_name = record.sample_type_name.lower()
    body_part_name = record.body_part_name.lower()

    # sylvain has proposed to apply the following decision rule:
    if body_part_name != "unknown" and body_part_name != "not relevant":
        organism_part_label = body_part_name

    else:
        organism_part_label = sample_type_name

    # get a organism part. Organism parts need to be in lowercases
    organism_part, created = DictUberon.objects.get_or_create(
        label=organism_part_label
    )

    if created:
        logger.info("Created uberon %s" % organism_part)

    else:
        logger.debug("Found uberon %s" % organism_part)

    # calculate animal age at collection
    animal_birth_date = parse_date(record.animal_birth_date)
    sampling_date = parse_date(record.sampling_date)
    animal_age_at_collection, time_units = image_timedelta(
        sampling_date, animal_birth_date)

    # create a new object. Using defaults to avoid collisions when
    # updating data
    defaults = {
        # HINT: is a duplication of name. Can this be non-mandatory?
        'alternative_id': sample_name.name,
        'collection_date': record.sampling_date,
        'protocol': record.sampling_protocol_url,
        'organism_part': organism_part,
        'animal': animal,
        # 'description': v_vessel.comment,
        'owner': submission.owner,
        'storage': find_storage_type(record),
        'availability': record.sample_availability,
        'animal_age_at_collection': animal_age_at_collection,
        'animal_age_at_collection_units': time_units
    }

    sample, created = Sample.objects.update_or_create(
        name=sample_name,
        defaults=defaults)

    if created:
        logger.debug("Created sample %s" % sample)

    else:
        logger.debug("Updating sample %s" % sample)

    return sample


def process_record(record, submission, animals, language):
    # Peter mail 26/02/19 18:30: I agree that it sounds like we will
    # need to create sameAs BioSamples for the IMAGE project, and it makes
    # sense that the inject tool is able to do this.  It may be that we
    # tackle these cases after getting the main part of the inject tool
    # functioning and hold or ignore these existing BioSamples for now.
    # HINT: record with a biosample id should be ignored, for the moment
    if record.EBI_Biosample_identifier is not None:
        logger.warning("Ignoring %s: already in biosample!" % str(record))
        return

    # filling breeds
    breed = fill_uid_breed(record, language)

    # filling name tables
    animal_name, sample_name = fill_uid_names(record, submission)

    # fill animal
    animal = fill_uid_animal(record, animal_name, breed, submission, animals)

    # fill sample
    fill_uid_sample(record, sample_name, animal, submission)


def upload_crbanim(submission):
    # debug
    logger.info("Importing from CRB-Anim file")

    # this is the full path in docker container
    fullpath = submission.get_uploaded_file_path()

    # read submission data
    reader = CRBAnimReader()
    reader.read_file(fullpath)

    # start data loading
    try:
        # check for species and sex in a similar way as cryoweb does
        check, not_found = reader.check_sex()

        if not check:
            message = (
                "Not all Sex terms are loaded into database: "
                "check for %s in your dataset" % (not_found))

            raise CRBAnimImportError(message)

        check, not_found = reader.check_species(submission.gene_bank_country)

        if not check:
            raise CRBAnimImportError(
                "Some species are not loaded in UID database: "
                "%s" % (not_found))

        # ok get languages from submission (useful for translation)
        # HINT: no traslations implemented, at the moment
        language = submission.gene_bank_country.label

        # a dictionary in which store animal data
        animals = {}

        for record in reader.data:
            process_record(record, submission, animals, language)

        # after processing records, initilize validationsummary objects
        # create a validation summary object and set all_count
        vs_animal, created = ValidationSummary.objects.get_or_create(
            submission=submission, type="animal")

        if created:
            logger.debug(
                "ValidationSummary animal created for "
                "submission %s" % submission)

        # reset counts
        vs_animal.reset_all_count()

        vs_sample, created = ValidationSummary.objects.get_or_create(
            submission=submission, type="sample")

        if created:
            logger.debug(
                "ValidationSummary sample created for "
                "submission %s" % submission)

        # reset counts
        vs_sample.reset_all_count()

    except Exception as exc:
        # set message:
        message = "Error in importing data: %s" % (str(exc))

        # save a message in database
        submission.status = ERROR
        submission.message = message
        submission.save()

        # send async message
        send_message(submission)

        # debug
        logger.error("error in importing from crbanim: %s" % (exc))
        logger.exception(exc)

        return False

    else:
        message = "CRBAnim import completed for submission: %s" % (
            submission.id)

        submission.message = message
        submission.status = LOADED
        submission.save()

        # send async message
        send_message(
            submission,
            validation_message=construct_validation_message(submission))

    logger.info("Import from CRBAnim is complete")

    return True


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Thu Feb 21 15:37:16 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import io
10			import csv
11			import logging
12			import pycountry
13
14			from collections import defaultdict, namedtuple
15
16			from django.utils.dateparse import parse_date
17
18			from common.constants import LOADED, ERROR, MISSING, SAMPLE_STORAGE
19			from common.helpers import image_timedelta
20			from image_app.helpers import FileDataSourceMixin
21			from image_app.models import (
22			DictSpecie, DictSex, DictCountry, DictBreed, Name, Animal, Sample,
23			DictUberon, Publication)
24			from submissions.helpers import send_message
25			from validation.helpers import construct_validation_message
26			from validation.models import ValidationSummary
27
28			# Get an instance of a logger
29			logger = logging.getLogger(__name__)
30
31
32			# A class to deal with cryoweb import errors
33			class CRBAnimImportError(Exception):
34			pass
35
36
37			class CRBAnimReader(FileDataSourceMixin):
38			mandatory_columns = [
39			'sex',
40			'species_latin_name',
41			'country_of_origin',
42			'breed_name',
43			'animal_ID',
44			'sample_bibliographic_references',
45			'sample_identifier',
46			'animal_birth_date',
47			'sample_storage_temperature',
48			'sample_type_name',
49			'body_part_name',
50			'sampling_date',
51			'sampling_protocol_url',
52			'sample_availability',
53			'EBI_Biosample_identifier',
54			]
55
56			def __init__(self):
57			self.data = None
58			self.header = None
59			self.dialect = None
60			self.items = None
61			self.filename = None
62
63			@classmethod
64			def get_dialect(cls, chunk):
65			"""Determine dialect of a CSV from a chunk"""
66
67			return csv.Sniffer().sniff(chunk)
68
69			@classmethod
70			def is_valid(cls, chunk):
71			"""Try to determine if CRBanim has at least the required columns
72			or not"""
73
74			dialect = cls.get_dialect(chunk)
75
76			# get a handle from a string
77			handle = io.StringIO(chunk)
78
79			# read chunk
80			reader = csv.reader(handle, dialect)
81			header = next(reader)
82
83			not_found = []
84
85			for column in cls.mandatory_columns:
86			if column not in header:
87			not_found.append(column)
88
89			if len(not_found) == 0:
90			logger.debug("This seems to be a valid CRBanim file")
91			return True, []
92
93			else:
94			logger.error("Couldn't not find mandatory CRBanim columns %s" % (
95			not_found))
96			return False, not_found
97
98			def read_file(self, filename):
99			"""Read crb anim files and set tit to class attribute"""
100
101			with open(filename, newline='') as handle:
102			# initialize data
103			self.filename = filename
104			self.data = []
105
106			# get dialect
107			chunk = handle.read(2048)
108			self.dialect = self.get_dialect(chunk)
109
110			# restart filename from the beginning
111			handle.seek(0)
112
113			# read csv file
114			reader = csv.reader(handle, self.dialect)
115			self.header = next(reader)
116
117			# find sex index column
118			sex_idx = self.header.index('sex')
119
120			# create a namedtuple object
121			Data = namedtuple("Data", self.header)
122
123			# add records to data
124			for record in reader:
125			# replace all "\\N" occurences in a list
126			record = [None if col in ["\\N", ""]
127			else col for col in record]
128
129			# 'unknown' sex should be replaced with 'record of unknown sex'
130			if record[sex_idx].lower() == 'unknown':
131			logger.debug(
132			"Changing '%s' with '%s'" % (
133			record[sex_idx], 'record of unknown sex'))
134			record[sex_idx] = 'record of unknown sex'
135
136			record = Data._make(record)
137			self.data.append(record)
138
139			self.items = self.eval_columns()
140
141			def eval_columns(self):
142			"""define a set from column data"""
143
144			# target_columns = ['sex', 'species_latin_name', 'breed_name']
145			target_columns = self.header
146
147			items = defaultdict(list)
148
149			for line in self.data:
150			for column in target_columns:
151			idx = self.header.index(column)
152			items[column].append(line[idx])
153
154			# now get a set of object
155			for column in target_columns:
156			items[column] = set(items[column])
157
158			return items
159
160			def print_line(self, num):
161			"""print a record with its column names"""
162
163			for i, column in enumerate(self.header):
164			logger.debug("%s: %s" % (column, self.data[num][i]))
165
166			def filter_by_column_values(self, column, values, ignorecase=False):
167			if ignorecase is True:
168			# lower values
169			values = [value.lower() for value in values]
170
171			for line in self.data:
172			# search for case insensitive value (lower attrib in lower values)
173			if ignorecase is True:
174			if getattr(line, column).lower() in values:
175			yield line
176
177			else:
178			logger.debug("Filtering: %s" % (str(line)))
179
180			else:
181			if getattr(line, column) in values:
182			yield line
183
184			else:
185			logger.debug("Filtering: %s" % (str(line)))
186
187			# ignore case or not
188
189			# cicle for line
190
191			# a function to detect if crbanim species are in UID database or not
192			def check_species(self, country):
193			"""Check if all species are defined in UID DictSpecies"""
194
195			# CRBAnim usually have species in the form required for UID
196			# However sometimes there could be a common name, not a DictSpecie one
197			column = 'species_latin_name'
198			item_set = self.items[column]
199
200			# call FileDataSourceMixin.check_species
201			return super().check_species(column, item_set, country)
202
203			# check that dict sex table contains data
204			def check_sex(self):
205			"""check that dict sex table contains data"""
206
207			# item.sex are in uppercase
208			column = 'sex'
209			item_set = [item.lower() for item in self.items[column]]
210
211			# call FileDataSourceMixin.check_items
212			return self.check_items(item_set, DictSex, column)
213
214
215			def fill_uid_breed(record, language):
216			"""Fill DictBreed from a crbanim record"""
217
218			# get a DictSpecie object. Species are in latin names, but I can
219			# find also a common name in translation tables
220			try:
221			specie = DictSpecie.objects.get(label=record.species_latin_name)
222
223			except DictSpecie.DoesNotExist:
224			logger.info("Search %s in synonyms" % (record.species_latin_name))
225			# search for language synonym (if I arrived here a synonym should
226			# exists)
227			specie = DictSpecie.get_by_synonym(
228			synonym=record.species_latin_name,
229			language=language)
230
231			# get country name using pycountries
232			country_name = pycountry.countries.get(
233			alpha_2=record.country_of_origin).name
234
235			# get country for breeds. Ideally will be the same of submission,
236			# however, it could be possible to store data from other contries
237			country, created = DictCountry.objects.get_or_create(
238			label=country_name)
239
240			# I could create a country from a v_breed_specie instance. That's
241			# ok, maybe I could have a lot of breed from different countries and
242			# a few organizations submitting them
243			if created:
244			logger.info("Created %s" % country)
245
246			else:
247			logger.debug("Found %s" % country)
248
249			breed, created = DictBreed.objects.get_or_create(
250			supplied_breed=record.breed_name,
251			specie=specie,
252			country=country)
253
254			if created:
255			logger.info("Created %s" % breed)
256
257			else:
258			logger.debug("Found %s" % breed)
259
260			# return a DictBreed object
261			return breed
262
263
264			def fill_uid_names(record, submission):
265			"""fill Names table from crbanim record"""
266
267			# in the same record I have the sample identifier and animal identifier
268			# a name record for animal
269			animal_name, created = Name.objects.get_or_create(
270			name=record.animal_ID,
271			submission=submission,
272			owner=submission.owner)
273
274			if created:
275			logger.debug("Created animal name %s" % animal_name)
276
277			else:
278			logger.debug("Found animal name %s" % animal_name)
279
280			# get a publication (if present)
281			publication = None
282
283			if record.sample_bibliographic_references:
284			publication, created = Publication.objects.get_or_create(
285			doi=record.sample_bibliographic_references)
286
287			if created:
288			logger.debug("Created publication %s" % publication)
289
290			# name record for sample
291			sample_name, created = Name.objects.get_or_create(
292			name=record.sample_identifier,
293			submission=submission,
294			owner=submission.owner,
295			publication=publication)
296
297			if created:
298			logger.debug("Created sample name %s" % sample_name)
299
300			else:
301			logger.debug("Found sample name %s" % sample_name)
302
303			# returning 2 Name instances
304			return animal_name, sample_name
305
306
307			def fill_uid_animal(record, animal_name, breed, submission, animals):
308			"""Helper function to fill animal data in UID animal table"""
309
310			# HINT: does CRBAnim models mother and father?
311
312			# check if such animal is already beed updated
313			if animal_name.name in animals:
314			logger.debug(
315			"Ignoring %s: already created or updated" % (animal_name))
316
317			# return an animal object
318			animal = animals[animal_name.name]
319
320			else:
321			# determine sex. Check for values
322			sex = DictSex.objects.get(label__iexact=record.sex)
323
324			# there's no birth_location for animal in CRBAnim
325			accuracy = MISSING
326
327			# create a new object. Using defaults to avoid collisions when
328			# updating data
329			# HINT: CRBanim has less attribute than cryoweb
330			defaults = {
331			# HINT: is a duplication of name. Can this be non-mandatory?
332			'alternative_id': animal_name.name,
333			'breed': breed,
334			'sex': sex,
335			'birth_date': record.animal_birth_date,
336			'birth_location_accuracy': accuracy,
337			'owner': submission.owner
338			}
339
340			# HINT: I could have the same animal again and again. Should I update
341			# every times?
342			animal, created = Animal.objects.update_or_create(
343			name=animal_name,
344			defaults=defaults)
345
346			if created:
347			logger.debug("Created animal %s" % animal)
348
349			else:
350			logger.debug("Updating animal %s" % animal)
351
352			# track this animal in dictionary
353			animals[animal_name.name] = animal
354
355			# I need to track animal to relate the sample
356			return animal
357
358
359			def find_storage_type(record):
360			"""Determine a sample storage relying on a dictionary"""
361
362			mapping = {
363			'-196°C': 'frozen, liquid nitrogen',
364			'-20°C': 'frozen, -20 degrees Celsius freezer',
365			'-30°C': 'frozen, -20 degrees Celsius freezer',
366			'-80°C': 'frozen, -80 degrees Celsius freezer'}
367
368			if record.sample_storage_temperature in mapping:
369			# get ENUM conversion
370			storage = SAMPLE_STORAGE.get_value_by_desc(
371			mapping[record.sample_storage_temperature])
372
373			return storage
374
375			else:
376			logging.warning("Couldn't find %s in storage types mapping" % (
377			record.sample_storage_temperature))
378
379			return None
380
381
382			def fill_uid_sample(record, sample_name, animal, submission):
383			"""Helper function to fill animal data in UID sample table"""
384
385			# name and animal name come from parameters
386			organism_part_label = None
387			sample_type_name = record.sample_type_name.lower()
388			body_part_name = record.body_part_name.lower()
389
390			# sylvain has proposed to apply the following decision rule:
391			if body_part_name != "unknown" and body_part_name != "not relevant":
392			organism_part_label = body_part_name
393
394			else:
395			organism_part_label = sample_type_name
396
397			# get a organism part. Organism parts need to be in lowercases
398			organism_part, created = DictUberon.objects.get_or_create(
399			label=organism_part_label
400			)
401
402			if created:
403			logger.info("Created uberon %s" % organism_part)
404
405			else:
406			logger.debug("Found uberon %s" % organism_part)
407
408			# calculate animal age at collection
409			animal_birth_date = parse_date(record.animal_birth_date)
410			sampling_date = parse_date(record.sampling_date)
411			animal_age_at_collection, time_units = image_timedelta(
412			sampling_date, animal_birth_date)
413
414			# create a new object. Using defaults to avoid collisions when
415			# updating data
416			defaults = {
417			# HINT: is a duplication of name. Can this be non-mandatory?
418			'alternative_id': sample_name.name,
419			'collection_date': record.sampling_date,
420			'protocol': record.sampling_protocol_url,
421			'organism_part': organism_part,
422			'animal': animal,
423			# 'description': v_vessel.comment,
424			'owner': submission.owner,
425			'storage': find_storage_type(record),
426			'availability': record.sample_availability,
427			'animal_age_at_collection': animal_age_at_collection,
428			'animal_age_at_collection_units': time_units
429			}
430
431			sample, created = Sample.objects.update_or_create(
432			name=sample_name,
433			defaults=defaults)
434
435			if created:
436			logger.debug("Created sample %s" % sample)
437
438			else:
439			logger.debug("Updating sample %s" % sample)
440
441			return sample
442
443
444			def process_record(record, submission, animals, language):
445			# Peter mail 26/02/19 18:30: I agree that it sounds like we will
446			# need to create sameAs BioSamples for the IMAGE project, and it makes
447			# sense that the inject tool is able to do this. It may be that we
448			# tackle these cases after getting the main part of the inject tool
449			# functioning and hold or ignore these existing BioSamples for now.
450			# HINT: record with a biosample id should be ignored, for the moment
451			if record.EBI_Biosample_identifier is not None:
452			logger.warning("Ignoring %s: already in biosample!" % str(record))
453			return
454
455			# filling breeds
456			breed = fill_uid_breed(record, language)
457
458			# filling name tables
459			animal_name, sample_name = fill_uid_names(record, submission)
460
461			# fill animal
462			animal = fill_uid_animal(record, animal_name, breed, submission, animals)
463
464			# fill sample
465			fill_uid_sample(record, sample_name, animal, submission)
466
467
468			def upload_crbanim(submission):
469			# debug
470			logger.info("Importing from CRB-Anim file")
471
472			# this is the full path in docker container
473			fullpath = submission.get_uploaded_file_path()
474
475			# read submission data
476			reader = CRBAnimReader()
477			reader.read_file(fullpath)
478
479			# start data loading
480			try:
481			# check for species and sex in a similar way as cryoweb does
482			check, not_found = reader.check_sex()
483
484			if not check:
485			message = (
486			"Not all Sex terms are loaded into database: "
487			"check for %s in your dataset" % (not_found))
488
489			raise CRBAnimImportError(message)
490
491			check, not_found = reader.check_species(submission.gene_bank_country)
492
493			if not check:
494			raise CRBAnimImportError(
495			"Some species are not loaded in UID database: "
496			"%s" % (not_found))
497
498			# ok get languages from submission (useful for translation)
499			# HINT: no traslations implemented, at the moment
500			language = submission.gene_bank_country.label
501
502			# a dictionary in which store animal data
503			animals = {}
504
505			for record in reader.data:
506			process_record(record, submission, animals, language)
507
508			# after processing records, initilize validationsummary objects
509			# create a validation summary object and set all_count
510			vs_animal, created = ValidationSummary.objects.get_or_create(
511			submission=submission, type="animal")
512
513			if created:
514			logger.debug(
515			"ValidationSummary animal created for "
516			"submission %s" % submission)
517
518			# reset counts
519			vs_animal.reset_all_count()
520
521			vs_sample, created = ValidationSummary.objects.get_or_create(
522			submission=submission, type="sample")
523
524			if created:
525			logger.debug(
526			"ValidationSummary sample created for "
527			"submission %s" % submission)
528
529			# reset counts
530			vs_sample.reset_all_count()
531
532			except Exception as exc:
533			# set message:
534			message = "Error in importing data: %s" % (str(exc))
535
536			# save a message in database
537			submission.status = ERROR
538			submission.message = message
539			submission.save()
540
541			# send async message
542			send_message(submission)
543
544			# debug
545			logger.error("error in importing from crbanim: %s" % (exc))
546			logger.exception(exc)
547
548			return False
549
550			else:
551			message = "CRBAnim import completed for submission: %s" % (
552			submission.id)
553
554			submission.message = message
555			submission.status = LOADED
556			submission.save()
557
558			# send async message
559			send_message(
560			submission,
561			validation_message=construct_validation_message(submission))
562
563			logger.info("Import from CRBAnim is complete")
564
565			return True
566

cnr-ibba / IMAGE-InjectTool

Pull Request — master (#40)

crbanim.helpers.fill_uid_sample() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like