Passed
Pull Request — master (#40)
by Paolo
07:29 queued 44s
created

excel.helpers.fill_uid   A

Complexity

Total Complexity 34

Size/Duplication

Total Lines 458
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 34
eloc 250
dl 0
loc 458
rs 9.68
c 0
b 0
f 0

5 Functions

Rating   Name   Duplication   Size   Complexity  
B fill_uid_breeds() 0 50 5
B fill_uid_names() 0 36 5
C fill_uid_animals() 0 112 7
D fill_uid_samples() 0 137 11
B upload_template() 0 87 6
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
"""
4
Created on Fri Jul  5 16:37:48 2019
5
6
@author: Paolo Cozzi <[email protected]>
7
"""
8
9
import logging
10
11
from common.constants import (
12
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
13
from common.helpers import image_timedelta, parse_image_timedelta
14
from image_app.models import (
15
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Name, Animal,
16
    Sample, DictDevelStage, DictPhysioStage)
17
from submissions.helpers import send_message
18
from validation.helpers import construct_validation_message
19
from validation.models import ValidationSummary
20
21
from .exceptions import ExcelImportError
22
from .exceltemplate import ExcelTemplateReader
23
24
# Get an instance of a logger
25
logger = logging.getLogger(__name__)
26
27
28
def fill_uid_breeds(submission_obj, template):
29
    """Fill DictBreed from a excel record"""
30
31
    logger.info("fill_uid_breeds() started")
32
33
    # ok get languages from submission (useful for translation)
34
    language = submission_obj.gene_bank_country.label
35
36
    # iterate among excel template
37
    for record in template.get_breed_records():
38
        # TODO: move this in a helper module (image_app.helpers?)
39
        # get a DictSpecie object. Species are in latin names, but I can
40
        # find also a common name in translation tables
41
        try:
42
            specie = DictSpecie.objects.get(label=record.species)
43
44
        except DictSpecie.DoesNotExist:
45
            logger.info("Search %s in synonyms" % (record.species))
46
            # search for language synonym (if I arrived here a synonym should
47
            # exists)
48
            specie = DictSpecie.get_by_synonym(
49
                synonym=record.species,
50
                language=language)
51
52
        # get country for breeds. Ideally will be the same of submission,
53
        # however, it could be possible to store data from other contries
54
        country, created = DictCountry.objects.get_or_create(
55
            label=record.efabis_breed_country)
56
57
        # I could create a country from a v_breed_specie instance. That's
58
        # ok, maybe I could have a lot of breed from different countries and
59
        # a few organizations submitting them
60
        if created:
61
            logger.info("Created %s" % country)
62
63
        else:
64
            logger.debug("Found %s" % country)
65
66
        breed, created = DictBreed.objects.get_or_create(
67
            supplied_breed=record.supplied_breed,
68
            specie=specie,
69
            country=country)
70
71
        if created:
72
            logger.info("Created %s" % breed)
73
74
        else:
75
            logger.debug("Found %s" % breed)
76
77
    logger.info("fill_uid_breeds() completed")
78
79
80
def fill_uid_names(submission_obj, template):
81
    """fill Names table from crbanim record"""
82
83
    # debug
84
    logger.info("called fill_uid_names()")
85
86
    # iterate among excel template
87
    for record in template.get_animal_records():
88
        # in the same record I have the sample identifier and animal identifier
89
        # a name record for animal
90
        animal_name, created = Name.objects.get_or_create(
91
            name=record.animal_id_in_data_source,
92
            submission=submission_obj,
93
            owner=submission_obj.owner)
94
95
        if created:
96
            logger.debug("Created animal name %s" % animal_name)
97
98
        else:
99
            logger.debug("Found animal name %s" % animal_name)
100
101
    # iterate among excel template
102
    for record in template.get_sample_records():
103
        # name record for sample
104
        sample_name, created = Name.objects.get_or_create(
105
            name=record.sample_id_in_data_source,
106
            submission=submission_obj,
107
            owner=submission_obj.owner)
108
109
        if created:
110
            logger.debug("Created sample name %s" % sample_name)
111
112
        else:
113
            logger.debug("Found sample name %s" % sample_name)
114
115
    logger.info("fill_uid_names() completed")
116
117
118
def fill_uid_animals(submission_obj, template):
119
    # debug
120
    logger.info("called fill_uid_animals()")
121
122
    # get submission language
123
    language = submission_obj.gene_bank_country.label
124
125
    # iterate among excel template
126
    for record in template.get_animal_records():
127
        # determine sex. Check for values
128
        sex = DictSex.objects.get(label__iexact=record.sex)
129
130
        # get specie
131
        specie = DictSpecie.objects.get(label=record.species)
132
133
        # how I can get breed from my data?
134
        breeds = [breed for breed in template.get_breed_records()
135
                  if breed.supplied_breed == record.breed and
136
                  breed.species == record.species]
137
138
        # breed is supposed to be unique, from UID constraints. However
139
        # I could place the same breed name for two countries. In that case,
140
        # I cant derive a unique breed from users data
141
        if len(breeds) != 1:
142
            raise ExcelImportError(
143
                "Can't determine a unique breed for '%s:%s' from user data" %
144
                (record.breed, record.species))
145
146
        # get a country for this breed
147
        country = DictCountry.objects.get(
148
            label=breeds[0].efabis_breed_country)
149
150
        # ok get a real dictbreed object
151
        breed = DictBreed.objects.get(
152
            supplied_breed=record.breed,
153
            specie=specie,
154
            country=country)
155
156
        logger.debug("Selected breed is %s" % (breed))
157
158
        # define names
159
        name, mother, father = None, None, None
160
161
        # get name for this animal and for mother and father
162
        logger.debug("Getting %s as my name" % (
163
            record.animal_id_in_data_source))
164
165
        name = Name.objects.get(
166
            name=record.animal_id_in_data_source,
167
            submission=submission_obj)
168
169
        if record.father_id_in_data_source:
170
            logger.debug("Getting %s as father" % (
171
                record.father_id_in_data_source))
172
173
            father = Name.objects.get(
174
                name=record.father_id_in_data_source,
175
                submission=submission_obj)
176
177
        if record.mother_id_in_data_source:
178
            logger.debug("Getting %s as mother" % (
179
                record.mother_id_in_data_source))
180
181
            mother = Name.objects.get(
182
                name=record.mother_id_in_data_source,
183
                submission=submission_obj)
184
185
        # now get accuracy
186
        accuracy = ACCURACIES.get_value_by_desc(
187
            record.birth_location_accuracy)
188
189
        # create a new object. Using defaults to avoid collisions when
190
        # updating data
191
        defaults = {
192
            'alternative_id': record.alternative_animal_id,
193
            'description': record.animal_description,
194
            'breed': breed,
195
            'sex': sex,
196
            'father': father,
197
            'mother': mother,
198
            'birth_date': record.birth_date,
199
            'birth_location': record.birth_location,
200
            'birth_location_latitude': record.birth_location_latitude,
201
            'birth_location_longitude': record.birth_location_longitude,
202
            'birth_location_accuracy': accuracy,
203
            'owner': submission_obj.owner
204
        }
205
206
        animal, created = Animal.objects.update_or_create(
207
            name=name,
208
            defaults=defaults)
209
210
        if created:
211
            logger.debug("Created %s" % animal)
212
213
        else:
214
            logger.debug("Updating %s" % animal)
215
216
    # create a validation summary object and set all_count
217
    validation_summary, created = ValidationSummary.objects.get_or_create(
218
        submission=submission_obj, type="animal")
219
220
    if created:
221
        logger.debug(
222
            "ValidationSummary animal created for submission %s" %
223
            submission_obj)
224
225
    # reset counts
226
    validation_summary.reset_all_count()
227
228
    # debug
229
    logger.info("fill_uid_animals() completed")
230
231
232
def fill_uid_samples(submission_obj, template):
233
    # debug
234
    logger.info("called fill_uid_samples()")
235
236
    # iterate among excel template
237
    for record in template.get_sample_records():
238
        # get name for this sample
239
        name = Name.objects.get(
240
            name=record.sample_id_in_data_source,
241
            submission=submission_obj,
242
            owner=submission_obj.owner)
243
244
        # get animal by reading record
245
        animal = Animal.objects.get(
246
            name__name=record.animal_id_in_data_source,
247
            name__submission=submission_obj)
248
249
        # get a organism part. Organism parts need to be in lowercases
250
        organism_part, created = DictUberon.objects.get_or_create(
251
            label=record.organism_part
252
        )
253
254
        if created:
255
            logger.info("Created %s" % organism_part)
256
257
        else:
258
            logger.debug("Found %s" % organism_part)
259
260
        # get developmental_stage and physiological_stage terms
261
        # they are not mandatory
262
        if record.developmental_stage:
263
            devel_stage, created = DictDevelStage.objects.get_or_create(
264
                label=record.developmental_stage
265
            )
266
267
            if created:
268
                logger.info("Created %s" % devel_stage)
269
270
            else:
271
                logger.debug("Found %s" % devel_stage)
272
273
        else:
274
            devel_stage = None
275
276
        if record.physiological_stage:
277
            physio_stage, created = DictPhysioStage.objects.get_or_create(
278
                    label=record.physiological_stage
279
            )
280
281
            if created:
282
                logger.info("Created %s" % physio_stage)
283
284
            else:
285
                logger.debug("Found %s" % physio_stage)
286
287
        else:
288
            physio_stage = None
289
290
        # animal age could be present or not
291
        if record.animal_age_at_collection:
292
            animal_age_at_collection, time_units = parse_image_timedelta(
293
                record.animal_age_at_collection)
294
295
        else:
296
            # derive animal age at collection
297
            animal_age_at_collection, time_units = image_timedelta(
298
                record.collection_date, animal.birth_date)
299
300
        # another time column
301
        if record.sampling_to_preparation_interval:
302
            preparation_interval, preparation_interval_units = \
303
                parse_image_timedelta(record.sampling_to_preparation_interval)
304
305
        else:
306
            preparation_interval, preparation_interval_units = None, None
307
308
        # now get accuracy
309
        accuracy = ACCURACIES.get_value_by_desc(
310
            record.collection_place_accuracy)
311
312
        # now get storage and storage processing
313
        # TODO; check those values in excel columns
314
        storage = SAMPLE_STORAGE.get_value_by_desc(
315
            record.sample_storage)
316
317
        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
318
            record.sample_storage_processing)
319
320
        # create a new object. Using defaults to avoid collisions when
321
        # updating data
322
        defaults = {
323
            'alternative_id': record.alternative_sample_id,
324
            'description': record.sample_description,
325
            'animal': animal,
326
            'protocol': record.specimen_collection_protocol,
327
            'collection_date': record.collection_date,
328
            'collection_place_latitude': record.collection_place_latitude,
329
            'collection_place_longitude': record.collection_place_longitude,
330
            'collection_place': record.collection_place,
331
            'collection_place_accuracy': accuracy,
332
            'organism_part': organism_part,
333
            'developmental_stage': devel_stage,
334
            'physiological_stage': physio_stage,
335
            'animal_age_at_collection': animal_age_at_collection,
336
            'animal_age_at_collection_units': time_units,
337
            'availability': record.availability,
338
            'storage': storage,
339
            'storage_processing': storage_processing,
340
            'preparation_interval': preparation_interval,
341
            'preparation_interval_units': preparation_interval_units,
342
            'owner': submission_obj.owner,
343
        }
344
345
        sample, created = Sample.objects.update_or_create(
346
            name=name,
347
            defaults=defaults)
348
349
        if created:
350
            logger.debug("Created %s" % sample)
351
352
        else:
353
            logger.debug("Updating %s" % sample)
354
355
    # create a validation summary object and set all_count
356
    validation_summary, created = ValidationSummary.objects.get_or_create(
357
        submission=submission_obj, type="sample")
358
359
    if created:
360
        logger.debug(
361
            "ValidationSummary animal created for submission %s" %
362
            submission_obj)
363
364
    # reset counts
365
    validation_summary.reset_all_count()
366
367
    # debug
368
    logger.info("fill_uid_samples() completed")
369
370
371
def upload_template(submission_obj):
372
    # debug
373
    logger.info("Importing from Excel template file")
374
375
    # this is the full path in docker container
376
    fullpath = submission_obj.get_uploaded_file_path()
377
378
    # read submission data
379
    reader = ExcelTemplateReader()
380
    reader.read_file(fullpath)
381
382
    # start data loading
383
    try:
384
        # check for species and sex in a similar way as cryoweb does
385
        # TODO: identical to CRBanim. Move to a mixin
386
        check, not_found = reader.check_sex()
387
388
        if not check:
389
            message = (
390
                "Not all Sex terms are loaded into database: "
391
                "check for %s in your dataset" % (not_found))
392
393
            raise ExcelImportError(message)
394
395
        check, not_found = reader.check_species(
396
            submission_obj.gene_bank_country)
397
398
        if not check:
399
            raise ExcelImportError(
400
                "Some species are not loaded in UID database: "
401
                "%s" % (not_found))
402
403
        check, not_found = reader.check_accuracies()
404
405
        if not check:
406
            message = (
407
                "Not all accuracy levels are defined in database: "
408
                "check for %s in your dataset" % (not_found))
409
410
            raise ExcelImportError(message)
411
412
        # BREEDS
413
        fill_uid_breeds(submission_obj, reader)
414
415
        # NAME
416
        fill_uid_names(submission_obj, reader)
417
418
        # ANIMALS
419
        fill_uid_animals(submission_obj, reader)
420
421
        # SAMPLES
422
        fill_uid_samples(submission_obj, reader)
423
424
    except Exception as exc:
425
        # set message:
426
        message = "Error in importing data: %s" % (str(exc))
427
428
        # save a message in database
429
        submission_obj.status = ERROR
430
        submission_obj.message = message
431
        submission_obj.save()
432
433
        # send async message
434
        send_message(submission_obj)
435
436
        # debug
437
        logger.error("Error in importing from Template: %s" % (exc))
438
        logger.exception(exc)
439
440
        return False
441
442
    else:
443
        message = "Template import completed for submission: %s" % (
444
            submission_obj.id)
445
446
        submission_obj.message = message
447
        submission_obj.status = LOADED
448
        submission_obj.save()
449
450
        # send async message
451
        send_message(
452
            submission_obj,
453
            validation_message=construct_validation_message(submission_obj))
454
455
    logger.info("Import from Template is complete")
456
457
    return True
458