crbanim.helpers - Code Metrics - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

crbanim.helpers B
last analyzed 2023-06-12 09:55 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	524
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	46
eloc	276
dl	0
loc	524
rs	8.72
c	0
b	0
f	0

10 Methods

Rating	Name	Size	Complexity
A	CRBAnimReader.print_line()	5	2
A	CRBAnimReader.is_valid()	28	4
A	CRBAnimReader.get_dialect()	5	1
A	CRBAnimReader.check_sex()	9	1
B	CRBAnimReader.read_file()	42	5
A	CRBAnimReader.check_species()	10	1
A	CRBAnimReader.__init__()	6	1
A	CRBAnimReader.eval_columns()	18	4
B	CRBAnimReader.filter_by_column_values()	20	6
A	CRBAnimReader.check_countries()	12	1

8 Functions

Rating	Name	Size	Complexity
A	fill_uid_breed()	25	1
A	check_UID()	27	4
A	find_storage_type()	21	2
A	fill_uid_animal()	46	2
A	process_record()	19	2
B	upload_crbanim()	78	4
A	sanitize_url()	4	1
B	fill_uid_sample()	60	4

How to fix Complexity

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 21 15:37:16 2019

@author: Paolo Cozzi <[email protected]>
"""

import io
import csv
import urllib
import logging
import pycountry

from collections import defaultdict, namedtuple

from django.utils.dateparse import parse_date

from common.constants import LOADED, ERROR, MISSING, SAMPLE_STORAGE
from common.helpers import image_timedelta
from uid.helpers import (
    FileDataSourceMixin, get_or_create_obj, update_or_create_obj)
from uid.models import (
    DictSpecie, DictSex, DictCountry, DictBreed, Animal, Sample,
    DictUberon, Publication)
from submissions.helpers import send_message
from validation.helpers import construct_validation_message
from validation.models import ValidationSummary

# Get an instance of a logger
logger = logging.getLogger(__name__)


# A class to deal with cryoweb import errors
class CRBAnimImportError(Exception):
    pass


class CRBAnimReader(FileDataSourceMixin):
    mandatory_columns = [
            'sex',
            'species_latin_name',
            'country_of_origin',
            'breed_name',
            'animal_ID',
            'sample_bibliographic_references',
            'sample_identifier',
            'animal_birth_date',
            'sample_storage_temperature',
            'sample_type_name',
            'body_part_name',
            'sampling_date',
            'sampling_protocol_url',
            'sample_availability',
            'EBI_Biosample_identifier',
        ]

    def __init__(self):
        self.data = None
        self.header = None
        self.dialect = None
        self.items = None
        self.filename = None

    @classmethod
    def get_dialect(cls, chunk):
        """Determine dialect of a CSV from a chunk"""

        return csv.Sniffer().sniff(chunk)

    @classmethod
    def is_valid(cls, chunk):
        """Try to determine if CRBanim has at least the required columns
        or not"""

        dialect = cls.get_dialect(chunk)

        # get a handle from a string
        handle = io.StringIO(chunk)

        # read chunk
        reader = csv.reader(handle, dialect)
        header = next(reader)

        not_found = []

        for column in cls.mandatory_columns:
            if column not in header:
                not_found.append(column)

        if len(not_found) == 0:
            logger.debug("This seems to be a valid CRBanim file")
            return True, []

        else:
            logger.error("Couldn't not find mandatory CRBanim columns %s" % (
                not_found))
            return False, not_found

    def read_file(self, filename):
        """Read crb anim files and set tit to class attribute"""

        with open(filename, newline='') as handle:
            # initialize data
            self.filename = filename
            self.data = []

            # get dialect
            chunk = handle.read(2048)
            self.dialect = self.get_dialect(chunk)

            # restart filename from the beginning
            handle.seek(0)

            # read csv file
            reader = csv.reader(handle, self.dialect)
            self.header = next(reader)

            # find sex index column
            sex_idx = self.header.index('sex')

            # create a namedtuple object
            Data = namedtuple("Data", self.header)

            # add records to data
            for record in reader:
                # replace all "\\N" occurences in a list
                record = [None if col in ["\\N", ""]
                          else col for col in record]

                # 'unknown' sex should be replaced with 'record of unknown sex'
                if record[sex_idx].lower() == 'unknown':
                    logger.debug(
                        "Changing '%s' with '%s'" % (
                            record[sex_idx], 'record of unknown sex'))
                    record[sex_idx] = 'record of unknown sex'

                record = Data._make(record)
                self.data.append(record)

        self.items = self.eval_columns()

    def eval_columns(self):
        """define a set from column data"""

        # target_columns = ['sex', 'species_latin_name', 'breed_name']
        target_columns = self.header

        items = defaultdict(list)

        for line in self.data:
            for column in target_columns:
                idx = self.header.index(column)
                items[column].append(line[idx])

        # now get a set of object
        for column in target_columns:
            items[column] = set(items[column])

        return items

    def print_line(self, num):
        """print a record with its column names"""

        for i, column in enumerate(self.header):
            logger.debug("%s: %s" % (column, self.data[num][i]))

    def filter_by_column_values(self, column, values, ignorecase=False):
        if ignorecase is True:
            # lower values
            values = [value.lower() for value in values]

        for line in self.data:
            # search for case insensitive value (lower attrib in lower values)
            if ignorecase is True:
                if getattr(line, column).lower() in values:
                    yield line

                else:
                    logger.debug("Filtering: %s" % (str(line)))

            else:
                if getattr(line, column) in values:
                    yield line

                else:
                    logger.debug("Filtering: %s" % (str(line)))

            # ignore case or not

        # cicle for line

    # a function to detect if crbanim species are in UID database or not
    def check_species(self, country):
        """Check if all species are defined in UID DictSpecies"""

        # CRBAnim usually have species in the form required for UID
        # However sometimes there could be a common name, not a DictSpecie one
        column = 'species_latin_name'
        item_set = self.items[column]

        # call FileDataSourceMixin.check_species
        return super().check_species(column, item_set, country)

    # check that dict sex table contains data
    def check_sex(self):
        """check that dict sex table contains data"""

        # item.sex are in uppercase
        column = 'sex'
        item_set = [item.lower() for item in self.items[column]]

        # call FileDataSourceMixin.check_items
        return self.check_items(item_set, DictSex, column)

    def check_countries(self):
        """Check that all efabis countries are present in database"""

        def get_label(country_of_origin):
            return pycountry.countries.get(
                alpha_2=country_of_origin).name

        column = "country_of_origin"
        item_set = [get_label(item) for item in self.items[column]]

        # call FileDataSourceMixin.check_items
        return self.check_items(item_set, DictCountry, column)


def fill_uid_breed(record, language):
    """Fill DictBreed from a crbanim record"""

    # get a DictSpecie object. Species are in latin names, but I can
    # find also a common name in translation tables
    specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species_latin_name,
            language=language)

    # get country name using pycountries
    country_name = pycountry.countries.get(
        alpha_2=record.country_of_origin).name

    # get country for breeds. Ideally will be the same of submission,
    # however, it could be possible to store data from other contries
    country = DictCountry.objects.get(label=country_name)

    breed = get_or_create_obj(
        DictBreed,
        supplied_breed=record.breed_name,
        specie=specie,
        country=country)

    # return a DictBreed object
    return breed


def fill_uid_animal(record, breed, submission, animals):
    """Helper function to fill animal data in UID animal table"""

    # HINT: does CRBAnim models mother and father?

    # check if such animal is already beed updated
    if record.animal_ID in animals:
        logger.debug(
            "Ignoring %s: already created or updated" % (record.animal_ID))

        # return an animal object
        animal = animals[record.animal_ID]

    else:
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # there's no birth_location for animal in CRBAnim
        accuracy = MISSING

        # create a new object. Using defaults to avoid collisions when
        # updating data
        # HINT: CRBanim has less attribute than cryoweb
        defaults = {
            # HINT: is a duplication of name. Can this be non-mandatory?
            'alternative_id': record.animal_ID,
            'sex': sex,
            'birth_date': record.animal_birth_date,
            'birth_location_accuracy': accuracy,
        }

        # I could have the same animal again and again. by tracking it in a
        # dictionary, I will change animal once
        animal = update_or_create_obj(
            Animal,
            name=record.animal_ID,
            breed=breed,
            owner=submission.owner,
            submission=submission,
            defaults=defaults)

        # track this animal in dictionary
        animals[record.animal_ID] = animal

    # I need to track animal to relate the sample
    return animal


def find_storage_type(record):
    """Determine a sample storage relying on a dictionary"""

    mapping = {
        '-196°C': 'frozen, liquid nitrogen',
        '-20°C': 'frozen, -20 degrees Celsius freezer',
        '-30°C': 'frozen, -20 degrees Celsius freezer',
        '-80°C': 'frozen, -80 degrees Celsius freezer'}

    if record.sample_storage_temperature in mapping:
        # get ENUM conversion
        storage = SAMPLE_STORAGE.get_value_by_desc(
            mapping[record.sample_storage_temperature])

        return storage

    else:
        logging.warning("Couldn't find %s in storage types mapping" % (
            record.sample_storage_temperature))

        return None


def sanitize_url(url):
    """Quote URLs for accession"""

    return urllib.parse.quote(url, ':/#?=')


def fill_uid_sample(record, animal, submission):
    """Helper function to fill animal data in UID sample table"""

    # name and animal name come from parameters
    organism_part_label = None
    sample_type_name = record.sample_type_name.lower()
    body_part_name = record.body_part_name.lower()

    # sylvain has proposed to apply the following decision rule:
    if body_part_name != "unknown" and body_part_name != "not relevant":
        organism_part_label = body_part_name

    else:
        organism_part_label = sample_type_name

    # get a organism part. Organism parts need to be in lowercases
    organism_part = get_or_create_obj(
        DictUberon,
        label=organism_part_label
    )

    # calculate animal age at collection
    animal_birth_date = parse_date(record.animal_birth_date)
    sampling_date = parse_date(record.sampling_date)
    animal_age_at_collection, time_units = image_timedelta(
        sampling_date, animal_birth_date)

    # get a publication (if present)
    publication = None

    if record.sample_bibliographic_references:
        publication = get_or_create_obj(
            Publication,
            doi=record.sample_bibliographic_references)

    # create a new object. Using defaults to avoid collisions when
    # updating data
    defaults = {
        # HINT: is a duplication of name. Can this be non-mandatory?
        'alternative_id': record.sample_identifier,
        'collection_date': record.sampling_date,
        'protocol': record.sampling_protocol_url,
        'organism_part': organism_part,
        # 'description': v_vessel.comment,
        'storage': find_storage_type(record),
        'availability': sanitize_url(record.sample_availability),
        'animal_age_at_collection': animal_age_at_collection,
        'animal_age_at_collection_units': time_units,
        'publication': publication,
    }

    sample = update_or_create_obj(
        Sample,
        name=record.sample_identifier,
        animal=animal,
        owner=submission.owner,
        submission=submission,
        defaults=defaults)

    return sample


def process_record(record, submission, animals, language):
    # Peter mail 26/02/19 18:30: I agree that it sounds like we will
    # need to create sameAs BioSamples for the IMAGE project, and it makes
    # sense that the inject tool is able to do this.  It may be that we
    # tackle these cases after getting the main part of the inject tool
    # functioning and hold or ignore these existing BioSamples for now.
    # HINT: record with a biosample id should be ignored, for the moment
    if record.EBI_Biosample_identifier is not None:
        logger.warning("Ignoring %s: already in biosample!" % str(record))
        return

    # filling breeds
    breed = fill_uid_breed(record, language)

    # fill animal
    animal = fill_uid_animal(record, breed, submission, animals)

    # fill sample
    fill_uid_sample(record, animal, submission)


def check_UID(submission, reader):
    # check for species and sex in a similar way as cryoweb does
    check, not_found = reader.check_sex()

    if not check:
        message = (
            "Not all Sex terms are loaded into database: "
            "check for '%s' in your dataset" % (not_found))

        raise CRBAnimImportError(message)

    # check for countries
    check, not_found = reader.check_countries()

    if not check:
        message = (
            "Not all countries are loaded into database: "
            "check for '%s' in your dataset" % (not_found))

        raise CRBAnimImportError(message)

    check, not_found = reader.check_species(submission.gene_bank_country)

    if not check:
        raise CRBAnimImportError(
            "Some species are not loaded in UID database: "
            "check for '%s' in your dataset" % (not_found))


def upload_crbanim(submission):
    # debug
    logger.info("Importing from CRB-Anim file")

    # this is the full path in docker container
    fullpath = submission.get_uploaded_file_path()

    # read submission data
    reader = CRBAnimReader()
    reader.read_file(fullpath)

    # start data loading
    try:
        # check UID data like cryoweb does
        check_UID(submission, reader)

        # ok get languages from submission (useful for translation)
        # HINT: no traslations implemented, at the moment
        language = submission.gene_bank_country.label

        # a dictionary in which store animal data
        animals = {}

        for record in reader.data:
            process_record(record, submission, animals, language)

        # after processing records, initilize validationsummary objects
        # create a validation summary object and set all_count
        vs_animal = get_or_create_obj(
            ValidationSummary,
            submission=submission,
            type="animal")

        # reset counts
        vs_animal.reset_all_count()

        vs_sample = get_or_create_obj(
            ValidationSummary,
            submission=submission,
            type="sample")

        # reset counts
        vs_sample.reset_all_count()

    except Exception as exc:
        # set message:
        message = "Error in importing data: %s" % (str(exc))

        # save a message in database
        submission.status = ERROR
        submission.message = message
        submission.save()

        # send async message
        send_message(submission)

        # debug
        logger.error("error in importing from crbanim: %s" % (exc))
        logger.exception(exc)

        return False

    else:
        message = "CRBAnim import completed for submission: %s" % (
            submission.id)

        submission.message = message
        submission.status = LOADED
        submission.save()

        # send async message
        send_message(
            submission,
            validation_message=construct_validation_message(submission))

    logger.info("Import from CRBAnim is complete")

    return True


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Thu Feb 21 15:37:16 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import io
10			import csv
11			import urllib
12			import logging
13			import pycountry
14
15			from collections import defaultdict, namedtuple
16
17			from django.utils.dateparse import parse_date
18
19			from common.constants import LOADED, ERROR, MISSING, SAMPLE_STORAGE
20			from common.helpers import image_timedelta
21			from uid.helpers import (
22			FileDataSourceMixin, get_or_create_obj, update_or_create_obj)
23			from uid.models import (
24			DictSpecie, DictSex, DictCountry, DictBreed, Animal, Sample,
25			DictUberon, Publication)
26			from submissions.helpers import send_message
27			from validation.helpers import construct_validation_message
28			from validation.models import ValidationSummary
29
30			# Get an instance of a logger
31			logger = logging.getLogger(__name__)
32
33
34			# A class to deal with cryoweb import errors
35			class CRBAnimImportError(Exception):
36			pass
37
38
39			class CRBAnimReader(FileDataSourceMixin):
40			mandatory_columns = [
41			'sex',
42			'species_latin_name',
43			'country_of_origin',
44			'breed_name',
45			'animal_ID',
46			'sample_bibliographic_references',
47			'sample_identifier',
48			'animal_birth_date',
49			'sample_storage_temperature',
50			'sample_type_name',
51			'body_part_name',
52			'sampling_date',
53			'sampling_protocol_url',
54			'sample_availability',
55			'EBI_Biosample_identifier',
56			]
57
58			def __init__(self):
59			self.data = None
60			self.header = None
61			self.dialect = None
62			self.items = None
63			self.filename = None
64
65			@classmethod
66			def get_dialect(cls, chunk):
67			"""Determine dialect of a CSV from a chunk"""
68
69			return csv.Sniffer().sniff(chunk)
70
71			@classmethod
72			def is_valid(cls, chunk):
73			"""Try to determine if CRBanim has at least the required columns
74			or not"""
75
76			dialect = cls.get_dialect(chunk)
77
78			# get a handle from a string
79			handle = io.StringIO(chunk)
80
81			# read chunk
82			reader = csv.reader(handle, dialect)
83			header = next(reader)
84
85			not_found = []
86
87			for column in cls.mandatory_columns:
88			if column not in header:
89			not_found.append(column)
90
91			if len(not_found) == 0:
92			logger.debug("This seems to be a valid CRBanim file")
93			return True, []
94
95			else:
96			logger.error("Couldn't not find mandatory CRBanim columns %s" % (
97			not_found))
98			return False, not_found
99
100			def read_file(self, filename):
101			"""Read crb anim files and set tit to class attribute"""
102
103			with open(filename, newline='') as handle:
104			# initialize data
105			self.filename = filename
106			self.data = []
107
108			# get dialect
109			chunk = handle.read(2048)
110			self.dialect = self.get_dialect(chunk)
111
112			# restart filename from the beginning
113			handle.seek(0)
114
115			# read csv file
116			reader = csv.reader(handle, self.dialect)
117			self.header = next(reader)
118
119			# find sex index column
120			sex_idx = self.header.index('sex')
121
122			# create a namedtuple object
123			Data = namedtuple("Data", self.header)
124
125			# add records to data
126			for record in reader:
127			# replace all "\\N" occurences in a list
128			record = [None if col in ["\\N", ""]
129			else col for col in record]
130
131			# 'unknown' sex should be replaced with 'record of unknown sex'
132			if record[sex_idx].lower() == 'unknown':
133			logger.debug(
134			"Changing '%s' with '%s'" % (
135			record[sex_idx], 'record of unknown sex'))
136			record[sex_idx] = 'record of unknown sex'
137
138			record = Data._make(record)
139			self.data.append(record)
140
141			self.items = self.eval_columns()
142
143			def eval_columns(self):
144			"""define a set from column data"""
145
146			# target_columns = ['sex', 'species_latin_name', 'breed_name']
147			target_columns = self.header
148
149			items = defaultdict(list)
150
151			for line in self.data:
152			for column in target_columns:
153			idx = self.header.index(column)
154			items[column].append(line[idx])
155
156			# now get a set of object
157			for column in target_columns:
158			items[column] = set(items[column])
159
160			return items
161
162			def print_line(self, num):
163			"""print a record with its column names"""
164
165			for i, column in enumerate(self.header):
166			logger.debug("%s: %s" % (column, self.data[num][i]))
167
168			def filter_by_column_values(self, column, values, ignorecase=False):
169			if ignorecase is True:
170			# lower values
171			values = [value.lower() for value in values]
172
173			for line in self.data:
174			# search for case insensitive value (lower attrib in lower values)
175			if ignorecase is True:
176			if getattr(line, column).lower() in values:
177			yield line
178
179			else:
180			logger.debug("Filtering: %s" % (str(line)))
181
182			else:
183			if getattr(line, column) in values:
184			yield line
185
186			else:
187			logger.debug("Filtering: %s" % (str(line)))
188
189			# ignore case or not
190
191			# cicle for line
192
193			# a function to detect if crbanim species are in UID database or not
194			def check_species(self, country):
195			"""Check if all species are defined in UID DictSpecies"""
196
197			# CRBAnim usually have species in the form required for UID
198			# However sometimes there could be a common name, not a DictSpecie one
199			column = 'species_latin_name'
200			item_set = self.items[column]
201
202			# call FileDataSourceMixin.check_species
203			return super().check_species(column, item_set, country)
204
205			# check that dict sex table contains data
206			def check_sex(self):
207			"""check that dict sex table contains data"""
208
209			# item.sex are in uppercase
210			column = 'sex'
211			item_set = [item.lower() for item in self.items[column]]
212
213			# call FileDataSourceMixin.check_items
214			return self.check_items(item_set, DictSex, column)
215
216			def check_countries(self):
217			"""Check that all efabis countries are present in database"""
218
219			def get_label(country_of_origin):
220			return pycountry.countries.get(
221			alpha_2=country_of_origin).name
222
223			column = "country_of_origin"
224			item_set = [get_label(item) for item in self.items[column]]
225
226			# call FileDataSourceMixin.check_items
227			return self.check_items(item_set, DictCountry, column)
228
229
230			def fill_uid_breed(record, language):
231			"""Fill DictBreed from a crbanim record"""
232
233			# get a DictSpecie object. Species are in latin names, but I can
234			# find also a common name in translation tables
235			specie = DictSpecie.get_specie_check_synonyms(
236			species_label=record.species_latin_name,
237			language=language)
238
239			# get country name using pycountries
240			country_name = pycountry.countries.get(
241			alpha_2=record.country_of_origin).name
242
243			# get country for breeds. Ideally will be the same of submission,
244			# however, it could be possible to store data from other contries
245			country = DictCountry.objects.get(label=country_name)
246
247			breed = get_or_create_obj(
248			DictBreed,
249			supplied_breed=record.breed_name,
250			specie=specie,
251			country=country)
252
253			# return a DictBreed object
254			return breed
255
256
257			def fill_uid_animal(record, breed, submission, animals):
258			"""Helper function to fill animal data in UID animal table"""
259
260			# HINT: does CRBAnim models mother and father?
261
262			# check if such animal is already beed updated
263			if record.animal_ID in animals:
264			logger.debug(
265			"Ignoring %s: already created or updated" % (record.animal_ID))
266
267			# return an animal object
268			animal = animals[record.animal_ID]
269
270			else:
271			# determine sex. Check for values
272			sex = DictSex.objects.get(label__iexact=record.sex)
273
274			# there's no birth_location for animal in CRBAnim
275			accuracy = MISSING
276
277			# create a new object. Using defaults to avoid collisions when
278			# updating data
279			# HINT: CRBanim has less attribute than cryoweb
280			defaults = {
281			# HINT: is a duplication of name. Can this be non-mandatory?
282			'alternative_id': record.animal_ID,
283			'sex': sex,
284			'birth_date': record.animal_birth_date,
285			'birth_location_accuracy': accuracy,
286			}
287
288			# I could have the same animal again and again. by tracking it in a
289			# dictionary, I will change animal once
290			animal = update_or_create_obj(
291			Animal,
292			name=record.animal_ID,
293			breed=breed,
294			owner=submission.owner,
295			submission=submission,
296			defaults=defaults)
297
298			# track this animal in dictionary
299			animals[record.animal_ID] = animal
300
301			# I need to track animal to relate the sample
302			return animal
303
304
305			def find_storage_type(record):
306			"""Determine a sample storage relying on a dictionary"""
307
308			mapping = {
309			'-196°C': 'frozen, liquid nitrogen',
310			'-20°C': 'frozen, -20 degrees Celsius freezer',
311			'-30°C': 'frozen, -20 degrees Celsius freezer',
312			'-80°C': 'frozen, -80 degrees Celsius freezer'}
313
314			if record.sample_storage_temperature in mapping:
315			# get ENUM conversion
316			storage = SAMPLE_STORAGE.get_value_by_desc(
317			mapping[record.sample_storage_temperature])
318
319			return storage
320
321			else:
322			logging.warning("Couldn't find %s in storage types mapping" % (
323			record.sample_storage_temperature))
324
325			return None
326
327
328			def sanitize_url(url):
329			"""Quote URLs for accession"""
330
331			return urllib.parse.quote(url, ':/#?=')
332
333
334			def fill_uid_sample(record, animal, submission):
335			"""Helper function to fill animal data in UID sample table"""
336
337			# name and animal name come from parameters
338			organism_part_label = None
339			sample_type_name = record.sample_type_name.lower()
340			body_part_name = record.body_part_name.lower()
341
342			# sylvain has proposed to apply the following decision rule:
343			if body_part_name != "unknown" and body_part_name != "not relevant":
344			organism_part_label = body_part_name
345
346			else:
347			organism_part_label = sample_type_name
348
349			# get a organism part. Organism parts need to be in lowercases
350			organism_part = get_or_create_obj(
351			DictUberon,
352			label=organism_part_label
353			)
354
355			# calculate animal age at collection
356			animal_birth_date = parse_date(record.animal_birth_date)
357			sampling_date = parse_date(record.sampling_date)
358			animal_age_at_collection, time_units = image_timedelta(
359			sampling_date, animal_birth_date)
360
361			# get a publication (if present)
362			publication = None
363
364			if record.sample_bibliographic_references:
365			publication = get_or_create_obj(
366			Publication,
367			doi=record.sample_bibliographic_references)
368
369			# create a new object. Using defaults to avoid collisions when
370			# updating data
371			defaults = {
372			# HINT: is a duplication of name. Can this be non-mandatory?
373			'alternative_id': record.sample_identifier,
374			'collection_date': record.sampling_date,
375			'protocol': record.sampling_protocol_url,
376			'organism_part': organism_part,
377			# 'description': v_vessel.comment,
378			'storage': find_storage_type(record),
379			'availability': sanitize_url(record.sample_availability),
380			'animal_age_at_collection': animal_age_at_collection,
381			'animal_age_at_collection_units': time_units,
382			'publication': publication,
383			}
384
385			sample = update_or_create_obj(
386			Sample,
387			name=record.sample_identifier,
388			animal=animal,
389			owner=submission.owner,
390			submission=submission,
391			defaults=defaults)
392
393			return sample
394
395
396			def process_record(record, submission, animals, language):
397			# Peter mail 26/02/19 18:30: I agree that it sounds like we will
398			# need to create sameAs BioSamples for the IMAGE project, and it makes
399			# sense that the inject tool is able to do this. It may be that we
400			# tackle these cases after getting the main part of the inject tool
401			# functioning and hold or ignore these existing BioSamples for now.
402			# HINT: record with a biosample id should be ignored, for the moment
403			if record.EBI_Biosample_identifier is not None:
404			logger.warning("Ignoring %s: already in biosample!" % str(record))
405			return
406
407			# filling breeds
408			breed = fill_uid_breed(record, language)
409
410			# fill animal
411			animal = fill_uid_animal(record, breed, submission, animals)
412
413			# fill sample
414			fill_uid_sample(record, animal, submission)
415
416
417			def check_UID(submission, reader):
418			# check for species and sex in a similar way as cryoweb does
419			check, not_found = reader.check_sex()
420
421			if not check:
422			message = (
423			"Not all Sex terms are loaded into database: "
424			"check for '%s' in your dataset" % (not_found))
425
426			raise CRBAnimImportError(message)
427
428			# check for countries
429			check, not_found = reader.check_countries()
430
431			if not check:
432			message = (
433			"Not all countries are loaded into database: "
434			"check for '%s' in your dataset" % (not_found))
435
436			raise CRBAnimImportError(message)
437
438			check, not_found = reader.check_species(submission.gene_bank_country)
439
440			if not check:
441			raise CRBAnimImportError(
442			"Some species are not loaded in UID database: "
443			"check for '%s' in your dataset" % (not_found))
444
445
446			def upload_crbanim(submission):
447			# debug
448			logger.info("Importing from CRB-Anim file")
449
450			# this is the full path in docker container
451			fullpath = submission.get_uploaded_file_path()
452
453			# read submission data
454			reader = CRBAnimReader()
455			reader.read_file(fullpath)
456
457			# start data loading
458			try:
459			# check UID data like cryoweb does
460			check_UID(submission, reader)
461
462			# ok get languages from submission (useful for translation)
463			# HINT: no traslations implemented, at the moment
464			language = submission.gene_bank_country.label
465
466			# a dictionary in which store animal data
467			animals = {}
468
469			for record in reader.data:
470			process_record(record, submission, animals, language)
471
472			# after processing records, initilize validationsummary objects
473			# create a validation summary object and set all_count
474			vs_animal = get_or_create_obj(
475			ValidationSummary,
476			submission=submission,
477			type="animal")
478
479			# reset counts
480			vs_animal.reset_all_count()
481
482			vs_sample = get_or_create_obj(
483			ValidationSummary,
484			submission=submission,
485			type="sample")
486
487			# reset counts
488			vs_sample.reset_all_count()
489
490			except Exception as exc:
491			# set message:
492			message = "Error in importing data: %s" % (str(exc))
493
494			# save a message in database
495			submission.status = ERROR
496			submission.message = message
497			submission.save()
498
499			# send async message
500			send_message(submission)
501
502			# debug
503			logger.error("error in importing from crbanim: %s" % (exc))
504			logger.exception(exc)
505
506			return False
507
508			else:
509			message = "CRBAnim import completed for submission: %s" % (
510			submission.id)
511
512			submission.message = message
513			submission.status = LOADED
514			submission.save()
515
516			# send async message
517			send_message(
518			submission,
519			validation_message=construct_validation_message(submission))
520
521			logger.info("Import from CRBAnim is complete")
522
523			return True
524

cnr-ibba / IMAGE-InjectTool

crbanim.helpers B last analyzed 2023-06-12 09:55 UTC

Complexity

Size/Duplication

Importance

10 Methods

8 Functions

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

crbanim.helpers B
last analyzed 2023-06-12 09:55 UTC