1
|
|
|
#!/usr/bin/env python3 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
""" |
4
|
|
|
Created on Mon May 14 10:28:39 2018 |
5
|
|
|
|
6
|
|
|
@author: Paolo Cozzi <[email protected]> |
7
|
|
|
""" |
8
|
|
|
|
9
|
|
|
# --- import |
10
|
|
|
|
11
|
|
|
import logging |
12
|
|
|
import os |
13
|
|
|
import shlex |
14
|
|
|
import subprocess |
15
|
|
|
|
16
|
|
|
from decouple import AutoConfig |
17
|
|
|
|
18
|
|
|
from django.conf import settings |
19
|
|
|
|
20
|
|
|
from common.constants import LOADED, ERROR, MISSING, UNKNOWN |
21
|
|
|
from common.helpers import image_timedelta |
22
|
|
|
from uid.helpers import get_or_create_obj, update_or_create_obj |
23
|
|
|
from uid.models import ( |
24
|
|
|
Animal, DictBreed, DictCountry, DictSex, DictSpecie, Sample, |
25
|
|
|
Submission, DictUberon) |
26
|
|
|
from language.helpers import check_species_synonyms |
27
|
|
|
from submissions.helpers import send_message |
28
|
|
|
from validation.helpers import construct_validation_message |
29
|
|
|
from validation.models import ValidationSummary |
30
|
|
|
|
31
|
|
|
from .models import db_has_data as cryoweb_has_data |
32
|
|
|
from .models import VAnimal, VBreedsSpecies, VVessels |
33
|
|
|
|
34
|
|
|
# Get an instance of a logger |
35
|
|
|
logger = logging.getLogger(__name__) |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
# --- check functions |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
# a function to detect if cryoweb species have synonyms or not |
42
|
|
|
def check_species(country): |
43
|
|
|
"""Check all cryoweb species for a synonym in a supplied language or |
44
|
|
|
the default one, ie: check_species(country). country is an |
45
|
|
|
uid.models.DictCountry.label""" |
46
|
|
|
|
47
|
|
|
# get all species using view |
48
|
|
|
words = VBreedsSpecies.get_all_species() |
49
|
|
|
|
50
|
|
|
# for logging purposes |
51
|
|
|
database_name = settings.DATABASES['cryoweb']['NAME'] |
52
|
|
|
|
53
|
|
|
if len(words) == 0: |
54
|
|
|
raise CryoWebImportError( |
55
|
|
|
"You have no species in %s database" % database_name) |
56
|
|
|
|
57
|
|
|
# debug |
58
|
|
|
logger.debug("Got %s species from %s" % (words, database_name)) |
59
|
|
|
|
60
|
|
|
# check if every word as a synonym (a specie) |
61
|
|
|
# (And create synonyms if don't exist) |
62
|
|
|
return check_species_synonyms(words, country, create=True) |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
# a function to test if I have all countries or not |
66
|
|
|
def check_countries(): |
67
|
|
|
"""Check that all efabis countries have a dictionary object""" |
68
|
|
|
|
69
|
|
|
# get all countries |
70
|
|
|
countries = VBreedsSpecies.get_all_countries() |
71
|
|
|
|
72
|
|
|
countries_not_found = [] |
73
|
|
|
|
74
|
|
|
for country in countries: |
75
|
|
|
if not DictCountry.objects.filter(label=country).exists(): |
76
|
|
|
countries_not_found.append(country) |
77
|
|
|
|
78
|
|
|
return countries_not_found |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
# a function specific for cryoweb import path to ensure that all required |
82
|
|
|
# fields in UID are present. There could be a function like this in others |
83
|
|
|
# import paths |
84
|
|
|
def check_UID(submission): |
85
|
|
|
"""A function to ensure that UID is valid before data upload. Specific |
86
|
|
|
to the module where is called from""" |
87
|
|
|
|
88
|
|
|
logger.debug("Checking UID") |
89
|
|
|
|
90
|
|
|
# check that dict sex table contains data |
91
|
|
|
if len(DictSex.objects.all()) == 0: |
92
|
|
|
raise CryoWebImportError("You have to upload DictSex data") |
93
|
|
|
|
94
|
|
|
# test for specie synonyms in submission language or defaul one |
95
|
|
|
# otherwise, fill synonym table with new terms then throw exception |
96
|
|
|
if not check_species(submission.gene_bank_country): |
97
|
|
|
raise CryoWebImportError("Some species haven't a synonym!") |
98
|
|
|
|
99
|
|
|
# test for countries in UID |
100
|
|
|
countries_not_found = check_countries() |
101
|
|
|
|
102
|
|
|
if len(countries_not_found) > 0: |
103
|
|
|
raise CryoWebImportError( |
104
|
|
|
"Not all countries are loaded into database: " |
105
|
|
|
"check for '%s' in your dataset" % (countries_not_found) |
106
|
|
|
) |
107
|
|
|
|
108
|
|
|
# return a status |
109
|
|
|
return True |
110
|
|
|
|
111
|
|
|
|
112
|
|
|
# A class to deal with cryoweb import errors |
113
|
|
|
class CryoWebImportError(Exception): |
114
|
|
|
pass |
115
|
|
|
|
116
|
|
|
|
117
|
|
|
# --- Upload data into cryoweb database |
118
|
|
|
def upload_cryoweb(submission_id): |
119
|
|
|
"""Imports backup into the cryoweb db |
120
|
|
|
|
121
|
|
|
This function uses the container's installation of psql to import a backup |
122
|
|
|
file into the "cryoweb" database. The imported backup file is |
123
|
|
|
the last inserted into the image's table uid_submission. |
124
|
|
|
|
125
|
|
|
:submission_id: the submission primary key |
126
|
|
|
""" |
127
|
|
|
|
128
|
|
|
# define some useful variables |
129
|
|
|
database_name = settings.DATABASES['cryoweb']['NAME'] |
130
|
|
|
|
131
|
|
|
# define a decouple config object |
132
|
|
|
config_dir = os.path.join(settings.BASE_DIR, 'image') |
133
|
|
|
config = AutoConfig(search_path=config_dir) |
134
|
|
|
|
135
|
|
|
# get a submission object |
136
|
|
|
submission = Submission.objects.get(pk=submission_id) |
137
|
|
|
|
138
|
|
|
# debug |
139
|
|
|
logger.info("Importing data into CryoWeb staging area") |
140
|
|
|
logger.debug("Got Submission %s" % (submission)) |
141
|
|
|
|
142
|
|
|
# If cryoweb has data, update submission message and return exception: |
143
|
|
|
# maybe another process is running or there is another type of problem |
144
|
|
|
if cryoweb_has_data(): |
145
|
|
|
logger.error("CryoWeb has data!") |
146
|
|
|
|
147
|
|
|
# update submission status |
148
|
|
|
submission.status = ERROR |
149
|
|
|
submission.message = "Error in importing data: Cryoweb has data" |
150
|
|
|
submission.save() |
151
|
|
|
|
152
|
|
|
# send async message |
153
|
|
|
send_message(submission) |
154
|
|
|
|
155
|
|
|
raise CryoWebImportError("Cryoweb has data!") |
156
|
|
|
|
157
|
|
|
# this is the full path in docker container |
158
|
|
|
fullpath = submission.get_uploaded_file_path() |
159
|
|
|
|
160
|
|
|
# define command line |
161
|
|
|
cmd_line = ( |
162
|
|
|
"/usr/bin/psql -U {user} -h db {database} -v ON_ERROR_STOP=1".format( |
163
|
|
|
database=database_name, user='cryoweb_insert_only')) |
164
|
|
|
|
165
|
|
|
cmds = shlex.split(cmd_line) |
166
|
|
|
|
167
|
|
|
logger.debug("Executing: %s" % " ".join(cmds)) |
168
|
|
|
|
169
|
|
|
try: |
170
|
|
|
result = subprocess.run( |
171
|
|
|
cmds, |
172
|
|
|
stdin=open(fullpath), |
173
|
|
|
stdout=subprocess.PIPE, |
174
|
|
|
stderr=subprocess.PIPE, |
175
|
|
|
check=True, |
176
|
|
|
env={'PGPASSWORD': config('CRYOWEB_INSERT_ONLY_PW')}, |
177
|
|
|
encoding='utf8' |
178
|
|
|
) |
179
|
|
|
|
180
|
|
|
except Exception as exc: |
181
|
|
|
# save a message in database |
182
|
|
|
submission.status = ERROR |
183
|
|
|
submission.message = ( |
184
|
|
|
"Error in importing data: Is '%s' a valid CryoWeb" |
185
|
|
|
" dump file?" % ( |
186
|
|
|
os.path.split(submission.uploaded_file.name)[-1])) |
187
|
|
|
submission.save() |
188
|
|
|
|
189
|
|
|
# send async message |
190
|
|
|
send_message(submission) |
191
|
|
|
|
192
|
|
|
# debug |
193
|
|
|
logger.error("error in calling upload_cryoweb: %s" % (exc)) |
194
|
|
|
|
195
|
|
|
return False |
196
|
|
|
|
197
|
|
|
n_of_statements = len(result.stdout.split("\n")) |
198
|
|
|
logger.debug("%s statement executed" % n_of_statements) |
199
|
|
|
|
200
|
|
|
if len(result.stderr) > 0: |
201
|
|
|
for line in result.stderr.split("\n"): |
202
|
|
|
logger.error(line) |
203
|
|
|
|
204
|
|
|
logger.info("{filename} uploaded into {database}".format( |
205
|
|
|
filename=submission.uploaded_file.name, database=database_name)) |
206
|
|
|
|
207
|
|
|
return True |
208
|
|
|
|
209
|
|
|
|
210
|
|
|
# --- Upload data from cryoweb to UID |
211
|
|
|
|
212
|
|
|
|
213
|
|
|
def fill_uid_breeds(submission): |
214
|
|
|
"""Fill UID DictBreed model. Require a submission instance""" |
215
|
|
|
|
216
|
|
|
logger.info("fill_uid_breeds() started") |
217
|
|
|
|
218
|
|
|
# get submission language |
219
|
|
|
language = submission.gene_bank_country.label |
220
|
|
|
|
221
|
|
|
for v_breed_specie in VBreedsSpecies.objects.all(): |
222
|
|
|
# get specie. Since I need a dictionary tables, DictSpecie is |
223
|
|
|
# already filled |
224
|
|
|
specie = DictSpecie.get_by_synonym( |
225
|
|
|
synonym=v_breed_specie.ext_species, |
226
|
|
|
language=language) |
227
|
|
|
|
228
|
|
|
# get country for breeds. Ideally will be the same of submission, |
229
|
|
|
# since the Italian cryoweb is supposed to contains italian breeds. |
230
|
|
|
# however, it could be possible to store data from other contries |
231
|
|
|
country = DictCountry.objects.get(label=v_breed_specie.efabis_country) |
232
|
|
|
|
233
|
|
|
# create breed obj if necessary |
234
|
|
|
get_or_create_obj( |
235
|
|
|
DictBreed, |
236
|
|
|
supplied_breed=v_breed_specie.efabis_mcname, |
237
|
|
|
specie=specie, |
238
|
|
|
country=country) |
239
|
|
|
|
240
|
|
|
logger.info("fill_uid_breeds() completed") |
241
|
|
|
|
242
|
|
|
|
243
|
|
|
def get_animal_specie_and_breed(v_animal, language): |
244
|
|
|
# get specie translated by dictionary |
245
|
|
|
specie = DictSpecie.get_by_synonym( |
246
|
|
|
synonym=v_animal.ext_species, |
247
|
|
|
language=language) |
248
|
|
|
|
249
|
|
|
logger.debug("Selected specie is %s" % (specie)) |
250
|
|
|
|
251
|
|
|
# get breed name and country through VBreedsSpecies model |
252
|
|
|
efabis_mcname = v_animal.efabis_mcname |
253
|
|
|
efabis_country = v_animal.efabis_country |
254
|
|
|
|
255
|
|
|
# get a country object |
256
|
|
|
country = DictCountry.objects.get(label=efabis_country) |
257
|
|
|
|
258
|
|
|
# a breed could be specie/country specific |
259
|
|
|
breed = DictBreed.objects.get( |
260
|
|
|
supplied_breed=efabis_mcname, |
261
|
|
|
specie=specie, |
262
|
|
|
country=country) |
263
|
|
|
|
264
|
|
|
logger.debug("Selected breed is %s" % (breed)) |
265
|
|
|
|
266
|
|
|
return specie, breed |
267
|
|
|
|
268
|
|
|
|
269
|
|
|
def fill_uid_animals(submission): |
270
|
|
|
"""Helper function to fill animal data in UID animal table""" |
271
|
|
|
|
272
|
|
|
# debug |
273
|
|
|
logger.info("called fill_uid_animals()") |
274
|
|
|
|
275
|
|
|
# get submission language |
276
|
|
|
language = submission.gene_bank_country.label |
277
|
|
|
|
278
|
|
|
# get male and female DictSex objects from database |
279
|
|
|
male = DictSex.objects.get(label="male") |
280
|
|
|
female = DictSex.objects.get(label="female") |
281
|
|
|
|
282
|
|
|
# cycle over animals |
283
|
|
|
for v_animal in VAnimal.objects.all(): |
284
|
|
|
# getting specie and breed |
285
|
|
|
specie, breed = get_animal_specie_and_breed(v_animal, language) |
286
|
|
|
|
287
|
|
|
# get name for this animal and for mother and father |
288
|
|
|
logger.debug("Getting %s as my name" % (v_animal.ext_animal)) |
289
|
|
|
|
290
|
|
|
logger.debug("Getting %s as father" % (v_animal.ext_sire)) |
291
|
|
|
|
292
|
|
|
# get father or None |
293
|
|
|
father = Animal.objects.filter( |
294
|
|
|
name=v_animal.ext_sire, |
295
|
|
|
breed=breed, |
296
|
|
|
owner=submission.owner).first() |
297
|
|
|
|
298
|
|
|
logger.debug("Getting %s as mother" % (v_animal.ext_dam)) |
299
|
|
|
|
300
|
|
|
# get mother or None |
301
|
|
|
mother = Animal.objects.filter( |
302
|
|
|
name=v_animal.ext_dam, |
303
|
|
|
breed=breed, |
304
|
|
|
owner=submission.owner).first() |
305
|
|
|
|
306
|
|
|
# determine sex. Check for values |
307
|
|
|
if v_animal.ext_sex == 'm': |
308
|
|
|
sex = male |
309
|
|
|
|
310
|
|
|
elif v_animal.ext_sex == 'f': |
311
|
|
|
sex = female |
312
|
|
|
|
313
|
|
|
else: |
314
|
|
|
raise CryoWebImportError( |
315
|
|
|
"Unknown sex '%s' for '%s'" % (v_animal.ext_sex, v_animal)) |
316
|
|
|
|
317
|
|
|
# checking accuracy |
318
|
|
|
accuracy = MISSING |
319
|
|
|
|
320
|
|
|
# HINT: this will is not sufficent for validation, since we need also |
321
|
|
|
# birth location as a Text to have valid birth location. Cryoweb |
322
|
|
|
# with coordinates will always fail validation |
323
|
|
|
if v_animal.latitude and v_animal.longitude: |
324
|
|
|
accuracy = UNKNOWN |
325
|
|
|
|
326
|
|
|
# create a new object. Using defaults to avoid collisions when |
327
|
|
|
# updating data |
328
|
|
|
defaults = { |
329
|
|
|
'alternative_id': v_animal.db_animal, |
330
|
|
|
'sex': sex, |
331
|
|
|
'father': father, |
332
|
|
|
'mother': mother, |
333
|
|
|
'birth_date': v_animal.birth_dt, |
334
|
|
|
'birth_location_latitude': v_animal.latitude, |
335
|
|
|
'birth_location_longitude': v_animal.longitude, |
336
|
|
|
'birth_location_accuracy': accuracy, |
337
|
|
|
'description': v_animal.comment, |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
# Upate or create animal obj |
341
|
|
|
update_or_create_obj( |
342
|
|
|
Animal, |
343
|
|
|
name=v_animal.ext_animal, |
344
|
|
|
breed=breed, |
345
|
|
|
owner=submission.owner, |
346
|
|
|
submission=submission, |
347
|
|
|
defaults=defaults) |
348
|
|
|
|
349
|
|
|
# create a validation summary object and set all_count |
350
|
|
|
validation_summary = get_or_create_obj( |
351
|
|
|
ValidationSummary, |
352
|
|
|
submission=submission, |
353
|
|
|
type="animal") |
354
|
|
|
|
355
|
|
|
# reset counts |
356
|
|
|
validation_summary.reset_all_count() |
357
|
|
|
|
358
|
|
|
# debug |
359
|
|
|
logger.info("fill_uid_animals() completed") |
360
|
|
|
|
361
|
|
|
|
362
|
|
|
def fill_uid_samples(submission): |
363
|
|
|
"""Helper function to fill animal data in UID animal table""" |
364
|
|
|
|
365
|
|
|
# debug |
366
|
|
|
logger.info("called fill_uid_samples()") |
367
|
|
|
|
368
|
|
|
# get submission language |
369
|
|
|
language = submission.gene_bank_country.label |
370
|
|
|
|
371
|
|
|
for v_vessel in VVessels.objects.all(): |
372
|
|
|
# get name for this sample |
373
|
|
|
name = v_vessel.ext_vessel |
374
|
|
|
|
375
|
|
|
# get the animal of this sample |
376
|
|
|
v_animal = v_vessel.get_animal() |
377
|
|
|
|
378
|
|
|
# getting specie and breed |
379
|
|
|
specie, breed = get_animal_specie_and_breed(v_animal, language) |
380
|
|
|
|
381
|
|
|
# get animal object using name |
382
|
|
|
animal = Animal.objects.get( |
383
|
|
|
name=v_animal.ext_animal, |
384
|
|
|
breed=breed, |
385
|
|
|
owner=submission.owner) |
386
|
|
|
|
387
|
|
|
# get a organism part. Organism parts need to be in lowercases |
388
|
|
|
organism_part = get_or_create_obj( |
389
|
|
|
DictUberon, |
390
|
|
|
label=v_vessel.get_organism_part().lower() |
391
|
|
|
) |
392
|
|
|
|
393
|
|
|
# derive animal age at collection. THis function deals with NULL valies |
394
|
|
|
animal_age_at_collection, time_units = image_timedelta( |
395
|
|
|
v_vessel.production_dt, v_animal.birth_dt) |
396
|
|
|
|
397
|
|
|
# create a new object. Using defaults to avoid collisions when |
398
|
|
|
# updating data |
399
|
|
|
defaults = { |
400
|
|
|
'alternative_id': v_vessel.db_vessel, |
401
|
|
|
'collection_date': v_vessel.production_dt, |
402
|
|
|
# 'protocol': v_vessel.get_protocol_name(), |
403
|
|
|
'organism_part': organism_part, |
404
|
|
|
'description': v_vessel.comment, |
405
|
|
|
'animal_age_at_collection': animal_age_at_collection, |
406
|
|
|
'animal_age_at_collection_units': time_units, |
407
|
|
|
# 'storage': v_vessel.ext_vessel_type, |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
update_or_create_obj( |
411
|
|
|
Sample, |
412
|
|
|
name=name, |
413
|
|
|
animal=animal, |
414
|
|
|
owner=submission.owner, |
415
|
|
|
submission=submission, |
416
|
|
|
defaults=defaults) |
417
|
|
|
|
418
|
|
|
# create a validation summary object and set all_count |
419
|
|
|
validation_summary = get_or_create_obj( |
420
|
|
|
ValidationSummary, |
421
|
|
|
submission=submission, |
422
|
|
|
type="sample") |
423
|
|
|
|
424
|
|
|
# reset counts |
425
|
|
|
validation_summary.reset_all_count() |
426
|
|
|
|
427
|
|
|
# debug |
428
|
|
|
logger.info("fill_uid_samples() completed") |
429
|
|
|
|
430
|
|
|
|
431
|
|
|
def cryoweb_import(submission): |
432
|
|
|
"""Import data from cryoweb stage database into UID |
433
|
|
|
|
434
|
|
|
:submission: a submission instance |
435
|
|
|
""" |
436
|
|
|
|
437
|
|
|
# debug |
438
|
|
|
logger.info("Importing from cryoweb staging area") |
439
|
|
|
|
440
|
|
|
try: |
441
|
|
|
# check UID status. get an exception if database is not initialized |
442
|
|
|
check_UID(submission) |
443
|
|
|
|
444
|
|
|
# BREEDS |
445
|
|
|
fill_uid_breeds(submission) |
446
|
|
|
|
447
|
|
|
# ANIMALS |
448
|
|
|
fill_uid_animals(submission) |
449
|
|
|
|
450
|
|
|
# SAMPLES |
451
|
|
|
fill_uid_samples(submission) |
452
|
|
|
|
453
|
|
|
except Exception as exc: |
454
|
|
|
# save a message in database |
455
|
|
|
submission.status = ERROR |
456
|
|
|
submission.message = "Error in importing data: %s" % (str(exc)) |
457
|
|
|
submission.save() |
458
|
|
|
|
459
|
|
|
# send async message |
460
|
|
|
send_message(submission) |
461
|
|
|
|
462
|
|
|
# debug |
463
|
|
|
logger.error("error in importing from cryoweb: %s" % (exc)) |
464
|
|
|
logger.exception(exc) |
465
|
|
|
|
466
|
|
|
return False |
467
|
|
|
|
468
|
|
|
else: |
469
|
|
|
message = "Cryoweb import completed for submission: %s" % ( |
470
|
|
|
submission.id) |
471
|
|
|
|
472
|
|
|
submission.message = message |
473
|
|
|
submission.status = LOADED |
474
|
|
|
submission.save() |
475
|
|
|
|
476
|
|
|
send_message( |
477
|
|
|
submission, |
478
|
|
|
validation_message=construct_validation_message(submission)) |
479
|
|
|
|
480
|
|
|
logger.info("Import from staging area is complete") |
481
|
|
|
|
482
|
|
|
return True |
483
|
|
|
|