1
|
|
|
#!/usr/bin/env python3 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
""" |
4
|
|
|
Created on Mon May 14 10:28:39 2018 |
5
|
|
|
|
6
|
|
|
@author: Paolo Cozzi <[email protected]> |
7
|
|
|
""" |
8
|
|
|
|
9
|
|
|
# --- import |
10
|
|
|
|
11
|
|
|
import logging |
12
|
|
|
import os |
13
|
|
|
import shlex |
14
|
|
|
import subprocess |
15
|
|
|
|
16
|
|
|
from decouple import AutoConfig |
17
|
|
|
|
18
|
|
|
from django.conf import settings |
19
|
|
|
|
20
|
|
|
from common.constants import LOADED, ERROR, MISSING, UNKNOWN |
21
|
|
|
from common.helpers import image_timedelta |
22
|
|
|
from image_app.helpers import get_or_create_obj, update_or_create_obj |
23
|
|
|
from image_app.models import ( |
24
|
|
|
Animal, DictBreed, DictCountry, DictSex, DictSpecie, Name, Sample, |
25
|
|
|
Submission, DictUberon) |
26
|
|
|
from language.helpers import check_species_synonyms |
27
|
|
|
from submissions.helpers import send_message |
28
|
|
|
from validation.helpers import construct_validation_message |
29
|
|
|
from validation.models import ValidationSummary |
30
|
|
|
|
31
|
|
|
from .models import db_has_data as cryoweb_has_data |
32
|
|
|
from .models import VAnimal, VBreedsSpecies, VTransfer, VVessels |
33
|
|
|
|
34
|
|
|
# Get an instance of a logger |
35
|
|
|
logger = logging.getLogger(__name__) |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
# --- check functions |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
# a function to detect if cryoweb species have synonyms or not |
42
|
|
|
def check_species(country): |
43
|
|
|
"""Check all cryoweb species for a synonym in a supplied language or |
44
|
|
|
the default one, ie: check_species(country). country is an |
45
|
|
|
image_app.models.DictCountry.label""" |
46
|
|
|
|
47
|
|
|
# get all species using view |
48
|
|
|
words = VBreedsSpecies.get_all_species() |
49
|
|
|
|
50
|
|
|
# for logging purposes |
51
|
|
|
database_name = settings.DATABASES['cryoweb']['NAME'] |
52
|
|
|
|
53
|
|
|
if len(words) == 0: |
54
|
|
|
raise CryoWebImportError( |
55
|
|
|
"You have no species in %s database" % database_name) |
56
|
|
|
|
57
|
|
|
# debug |
58
|
|
|
logger.debug("Got %s species from %s" % (words, database_name)) |
59
|
|
|
|
60
|
|
|
# check if every word as a synonym (a specie) |
61
|
|
|
# (And create synonyms if don't exist) |
62
|
|
|
return check_species_synonyms(words, country, create=True) |
63
|
|
|
|
64
|
|
|
|
65
|
|
|
# a function to test if I have all countries or not |
66
|
|
|
def check_countries(): |
67
|
|
|
"""Check that all efabis countries have a dictionary object""" |
68
|
|
|
|
69
|
|
|
# get all countries |
70
|
|
|
countries = VBreedsSpecies.get_all_countries() |
71
|
|
|
|
72
|
|
|
countries_not_found = [] |
73
|
|
|
|
74
|
|
|
for country in countries: |
75
|
|
|
if not DictCountry.objects.filter(label=country).exists(): |
76
|
|
|
countries_not_found.append(country) |
77
|
|
|
|
78
|
|
|
return countries_not_found |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
# a function specific for cryoweb import path to ensure that all required |
82
|
|
|
# fields in UID are present. There could be a function like this in others |
83
|
|
|
# import paths |
84
|
|
|
def check_UID(submission): |
85
|
|
|
"""A function to ensure that UID is valid before data upload. Specific |
86
|
|
|
to the module where is called from""" |
87
|
|
|
|
88
|
|
|
logger.debug("Checking UID") |
89
|
|
|
|
90
|
|
|
# check that dict sex table contains data |
91
|
|
|
if len(DictSex.objects.all()) == 0: |
92
|
|
|
raise CryoWebImportError("You have to upload DictSex data") |
93
|
|
|
|
94
|
|
|
# test for specie synonyms in submission language or defaul one |
95
|
|
|
# otherwise, fill synonym table with new terms then throw exception |
96
|
|
|
if not check_species(submission.gene_bank_country): |
97
|
|
|
raise CryoWebImportError("Some species haven't a synonym!") |
98
|
|
|
|
99
|
|
|
# test for countries in UID |
100
|
|
|
countries_not_found = check_countries() |
101
|
|
|
|
102
|
|
|
if len(countries_not_found) > 0: |
103
|
|
|
raise CryoWebImportError( |
104
|
|
|
"Not all countries are loaded into database: " |
105
|
|
|
"check for '%s' in your dataset" % (countries_not_found) |
106
|
|
|
) |
107
|
|
|
|
108
|
|
|
# return a status |
109
|
|
|
return True |
110
|
|
|
|
111
|
|
|
|
112
|
|
|
# A class to deal with cryoweb import errors |
113
|
|
|
class CryoWebImportError(Exception): |
114
|
|
|
pass |
115
|
|
|
|
116
|
|
|
|
117
|
|
|
# --- Upload data into cryoweb database |
118
|
|
|
def upload_cryoweb(submission_id): |
119
|
|
|
"""Imports backup into the cryoweb db |
120
|
|
|
|
121
|
|
|
This function uses the container's installation of psql to import a backup |
122
|
|
|
file into the "cryoweb" database. The imported backup file is |
123
|
|
|
the last inserted into the image's table image_app_submission. |
124
|
|
|
|
125
|
|
|
:submission_id: the submission primary key |
126
|
|
|
""" |
127
|
|
|
|
128
|
|
|
# define some useful variables |
129
|
|
|
database_name = settings.DATABASES['cryoweb']['NAME'] |
130
|
|
|
|
131
|
|
|
# define a decouple config object |
132
|
|
|
config_dir = os.path.join(settings.BASE_DIR, 'image') |
133
|
|
|
config = AutoConfig(search_path=config_dir) |
134
|
|
|
|
135
|
|
|
# get a submission object |
136
|
|
|
submission = Submission.objects.get(pk=submission_id) |
137
|
|
|
|
138
|
|
|
# debug |
139
|
|
|
logger.info("Importing data into cryoweb staging area") |
140
|
|
|
logger.debug("Got Submission %s" % (submission)) |
141
|
|
|
|
142
|
|
|
# If cryoweb has data, update submission message and return exception: |
143
|
|
|
# maybe another process is running or there is another type of problem |
144
|
|
|
if cryoweb_has_data(): |
145
|
|
|
logger.error("Cryoweb has data!") |
146
|
|
|
|
147
|
|
|
# update submission status |
148
|
|
|
submission.status = ERROR |
149
|
|
|
submission.message = "Error in importing data: Cryoweb has data" |
150
|
|
|
submission.save() |
151
|
|
|
|
152
|
|
|
# send async message |
153
|
|
|
send_message(submission) |
154
|
|
|
|
155
|
|
|
raise CryoWebImportError("Cryoweb has data!") |
156
|
|
|
|
157
|
|
|
# this is the full path in docker container |
158
|
|
|
fullpath = submission.get_uploaded_file_path() |
159
|
|
|
|
160
|
|
|
# define command line |
161
|
|
|
cmd_line = "/usr/bin/psql -U {user} -h db {database}".format( |
162
|
|
|
database=database_name, user='cryoweb_insert_only') |
163
|
|
|
|
164
|
|
|
cmds = shlex.split(cmd_line) |
165
|
|
|
|
166
|
|
|
logger.debug("Executing: %s" % " ".join(cmds)) |
167
|
|
|
|
168
|
|
|
try: |
169
|
|
|
result = subprocess.run( |
170
|
|
|
cmds, |
171
|
|
|
stdin=open(fullpath), |
172
|
|
|
stdout=subprocess.PIPE, |
173
|
|
|
stderr=subprocess.PIPE, |
174
|
|
|
check=True, |
175
|
|
|
env={'PGPASSWORD': config('CRYOWEB_INSERT_ONLY_PW')}, |
176
|
|
|
encoding='utf8' |
177
|
|
|
) |
178
|
|
|
|
179
|
|
|
except Exception as exc: |
180
|
|
|
# save a message in database |
181
|
|
|
submission.status = ERROR |
182
|
|
|
submission.message = "Error in importing data: %s" % (str(exc)) |
183
|
|
|
submission.save() |
184
|
|
|
|
185
|
|
|
# send async message |
186
|
|
|
send_message(submission) |
187
|
|
|
|
188
|
|
|
# debug |
189
|
|
|
logger.error("error in calling upload_cryoweb: %s" % (exc)) |
190
|
|
|
|
191
|
|
|
return False |
192
|
|
|
|
193
|
|
|
n_of_statements = len(result.stdout.split("\n")) |
194
|
|
|
logger.debug("%s statement executed" % n_of_statements) |
195
|
|
|
|
196
|
|
|
if len(result.stderr) > 0: |
197
|
|
|
for line in result.stderr.split("\n"): |
198
|
|
|
logger.error(line) |
199
|
|
|
|
200
|
|
|
logger.info("{filename} uploaded into {database}".format( |
201
|
|
|
filename=submission.uploaded_file.name, database=database_name)) |
202
|
|
|
|
203
|
|
|
return True |
204
|
|
|
|
205
|
|
|
|
206
|
|
|
# --- Upload data from cryoweb to UID |
207
|
|
|
|
208
|
|
|
|
209
|
|
|
def fill_uid_breeds(submission): |
210
|
|
|
"""Fill UID DictBreed model. Require a submission instance""" |
211
|
|
|
|
212
|
|
|
logger.info("fill_uid_breeds() started") |
213
|
|
|
|
214
|
|
|
# get submission language |
215
|
|
|
language = submission.gene_bank_country.label |
216
|
|
|
|
217
|
|
|
for v_breed_specie in VBreedsSpecies.objects.all(): |
218
|
|
|
# get specie. Since I need a dictionary tables, DictSpecie is |
219
|
|
|
# already filled |
220
|
|
|
specie = DictSpecie.get_by_synonym( |
221
|
|
|
synonym=v_breed_specie.ext_species, |
222
|
|
|
language=language) |
223
|
|
|
|
224
|
|
|
# get country for breeds. Ideally will be the same of submission, |
225
|
|
|
# since the Italian cryoweb is supposed to contains italian breeds. |
226
|
|
|
# however, it could be possible to store data from other contries |
227
|
|
|
country = DictCountry.objects.get(label=v_breed_specie.efabis_country) |
228
|
|
|
|
229
|
|
|
# create breed obj if necessary |
230
|
|
|
get_or_create_obj( |
231
|
|
|
DictBreed, |
232
|
|
|
supplied_breed=v_breed_specie.efabis_mcname, |
233
|
|
|
specie=specie, |
234
|
|
|
country=country) |
235
|
|
|
|
236
|
|
|
logger.info("fill_uid_breeds() completed") |
237
|
|
|
|
238
|
|
|
|
239
|
|
|
def fill_uid_names(submission): |
240
|
|
|
"""Read VTransfer Views and fill name table""" |
241
|
|
|
|
242
|
|
|
# debug |
243
|
|
|
logger.info("called fill_uid_names()") |
244
|
|
|
|
245
|
|
|
# get all Vtransfer object |
246
|
|
|
for v_tranfer in VTransfer.objects.all(): |
247
|
|
|
# no name manipulation. If two objects are indentical, there's no |
248
|
|
|
# duplicates. |
249
|
|
|
# HINT: The ramon example will be a issue in validation step |
250
|
|
|
get_or_create_obj( |
251
|
|
|
Name, |
252
|
|
|
name=v_tranfer.get_fullname(), |
253
|
|
|
submission=submission, |
254
|
|
|
owner=submission.owner) |
255
|
|
|
|
256
|
|
|
logger.info("fill_uid_names() completed") |
257
|
|
|
|
258
|
|
|
|
259
|
|
|
def fill_uid_animals(submission): |
260
|
|
|
"""Helper function to fill animal data in UID animal table""" |
261
|
|
|
|
262
|
|
|
# debug |
263
|
|
|
logger.info("called fill_uid_animals()") |
264
|
|
|
|
265
|
|
|
# get submission language |
266
|
|
|
language = submission.gene_bank_country.label |
267
|
|
|
|
268
|
|
|
# get male and female DictSex objects from database |
269
|
|
|
male = DictSex.objects.get(label="male") |
270
|
|
|
female = DictSex.objects.get(label="female") |
271
|
|
|
|
272
|
|
|
# cycle over animals |
273
|
|
|
for v_animal in VAnimal.objects.all(): |
274
|
|
|
# get specie translated by dictionary |
275
|
|
|
specie = DictSpecie.get_by_synonym( |
276
|
|
|
synonym=v_animal.ext_species, |
277
|
|
|
language=language) |
278
|
|
|
|
279
|
|
|
# get breed name and country through VBreedsSpecies model |
280
|
|
|
efabis_mcname = v_animal.efabis_mcname |
281
|
|
|
efabis_country = v_animal.efabis_country |
282
|
|
|
|
283
|
|
|
# get a country object |
284
|
|
|
country = DictCountry.objects.get(label=efabis_country) |
285
|
|
|
|
286
|
|
|
# a breed could be specie/country specific |
287
|
|
|
breed = DictBreed.objects.get( |
288
|
|
|
supplied_breed=efabis_mcname, |
289
|
|
|
specie=specie, |
290
|
|
|
country=country) |
291
|
|
|
|
292
|
|
|
logger.debug("Selected breed is %s" % (breed)) |
293
|
|
|
|
294
|
|
|
# get name for this animal and for mother and father |
295
|
|
|
logger.debug("Getting %s as my name" % (v_animal.ext_animal)) |
296
|
|
|
name = Name.objects.get( |
297
|
|
|
name=v_animal.ext_animal, submission=submission) |
298
|
|
|
|
299
|
|
|
logger.debug("Getting %s as father" % (v_animal.ext_sire)) |
300
|
|
|
father = Name.objects.get( |
301
|
|
|
name=v_animal.ext_sire, submission=submission) |
302
|
|
|
|
303
|
|
|
logger.debug("Getting %s as mother" % (v_animal.ext_dam)) |
304
|
|
|
mother = Name.objects.get( |
305
|
|
|
name=v_animal.ext_dam, submission=submission) |
306
|
|
|
|
307
|
|
|
# determine sex. Check for values |
308
|
|
|
if v_animal.ext_sex == 'm': |
309
|
|
|
sex = male |
310
|
|
|
|
311
|
|
|
elif v_animal.ext_sex == 'f': |
312
|
|
|
sex = female |
313
|
|
|
|
314
|
|
|
else: |
315
|
|
|
raise CryoWebImportError( |
316
|
|
|
"Unknown sex '%s' for '%s'" % (v_animal.ext_sex, v_animal)) |
317
|
|
|
|
318
|
|
|
# checking accuracy |
319
|
|
|
accuracy = MISSING |
320
|
|
|
|
321
|
|
|
if v_animal.latitude and v_animal.longitude: |
322
|
|
|
accuracy = UNKNOWN |
323
|
|
|
|
324
|
|
|
# create a new object. Using defaults to avoid collisions when |
325
|
|
|
# updating data |
326
|
|
|
defaults = { |
327
|
|
|
'alternative_id': v_animal.db_animal, |
328
|
|
|
'breed': breed, |
329
|
|
|
'sex': sex, |
330
|
|
|
'father': father, |
331
|
|
|
'mother': mother, |
332
|
|
|
'birth_date': v_animal.birth_dt, |
333
|
|
|
'birth_location_latitude': v_animal.latitude, |
334
|
|
|
'birth_location_longitude': v_animal.longitude, |
335
|
|
|
'birth_location_accuracy': accuracy, |
336
|
|
|
'description': v_animal.comment, |
337
|
|
|
'owner': submission.owner |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
# Upate or create animal obj |
341
|
|
|
update_or_create_obj( |
342
|
|
|
Animal, |
343
|
|
|
name=name, |
344
|
|
|
defaults=defaults) |
345
|
|
|
|
346
|
|
|
# create a validation summary object and set all_count |
347
|
|
|
validation_summary = get_or_create_obj( |
348
|
|
|
ValidationSummary, |
349
|
|
|
submission=submission, |
350
|
|
|
type="animal") |
351
|
|
|
|
352
|
|
|
# reset counts |
353
|
|
|
validation_summary.reset_all_count() |
354
|
|
|
|
355
|
|
|
# debug |
356
|
|
|
logger.info("fill_uid_animals() completed") |
357
|
|
|
|
358
|
|
|
|
359
|
|
|
def fill_uid_samples(submission): |
360
|
|
|
"""Helper function to fill animal data in UID animal table""" |
361
|
|
|
|
362
|
|
|
# debug |
363
|
|
|
logger.info("called fill_uid_samples()") |
364
|
|
|
|
365
|
|
|
for v_vessel in VVessels.objects.all(): |
366
|
|
|
# get name for this sample. Need to insert it |
367
|
|
|
name = get_or_create_obj( |
368
|
|
|
Name, |
369
|
|
|
name=v_vessel.ext_vessel, |
370
|
|
|
submission=submission, |
371
|
|
|
owner=submission.owner) |
372
|
|
|
|
373
|
|
|
# get animal object using name |
374
|
|
|
animal = Animal.objects.get( |
375
|
|
|
name__name=v_vessel.ext_animal, |
376
|
|
|
name__submission=submission) |
377
|
|
|
|
378
|
|
|
# get a organism part. Organism parts need to be in lowercases |
379
|
|
|
organism_part = get_or_create_obj( |
380
|
|
|
DictUberon, |
381
|
|
|
label=v_vessel.get_organism_part().lower() |
382
|
|
|
) |
383
|
|
|
|
384
|
|
|
# get a v_animal instance to get access to animal birth date |
385
|
|
|
v_animal = VAnimal.objects.get(db_animal=v_vessel.db_animal) |
386
|
|
|
|
387
|
|
|
# derive animal age at collection. THis function deals with NULL valies |
388
|
|
|
animal_age_at_collection, time_units = image_timedelta( |
389
|
|
|
v_vessel.production_dt, v_animal.birth_dt) |
390
|
|
|
|
391
|
|
|
# create a new object. Using defaults to avoid collisions when |
392
|
|
|
# updating data |
393
|
|
|
defaults = { |
394
|
|
|
'alternative_id': v_vessel.db_vessel, |
395
|
|
|
'collection_date': v_vessel.production_dt, |
396
|
|
|
# 'protocol': v_vessel.get_protocol_name(), |
397
|
|
|
'organism_part': organism_part, |
398
|
|
|
'animal': animal, |
399
|
|
|
'description': v_vessel.comment, |
400
|
|
|
'owner': submission.owner, |
401
|
|
|
'animal_age_at_collection': animal_age_at_collection, |
402
|
|
|
'animal_age_at_collection_units': time_units, |
403
|
|
|
# 'storage': v_vessel.ext_vessel_type, |
404
|
|
|
} |
405
|
|
|
|
406
|
|
|
update_or_create_obj( |
407
|
|
|
Sample, |
408
|
|
|
name=name, |
409
|
|
|
defaults=defaults) |
410
|
|
|
|
411
|
|
|
# create a validation summary object and set all_count |
412
|
|
|
validation_summary = get_or_create_obj( |
413
|
|
|
ValidationSummary, |
414
|
|
|
submission=submission, |
415
|
|
|
type="sample") |
416
|
|
|
|
417
|
|
|
# reset counts |
418
|
|
|
validation_summary.reset_all_count() |
419
|
|
|
|
420
|
|
|
# debug |
421
|
|
|
logger.info("fill_uid_samples() completed") |
422
|
|
|
|
423
|
|
|
|
424
|
|
|
def cryoweb_import(submission): |
425
|
|
|
"""Import data from cryoweb stage database into UID |
426
|
|
|
|
427
|
|
|
:submission: a submission instance |
428
|
|
|
""" |
429
|
|
|
|
430
|
|
|
# debug |
431
|
|
|
logger.info("Importing from cryoweb staging area") |
432
|
|
|
|
433
|
|
|
try: |
434
|
|
|
# check UID status. get an exception if database is not initialized |
435
|
|
|
check_UID(submission) |
436
|
|
|
|
437
|
|
|
# BREEDS |
438
|
|
|
fill_uid_breeds(submission) |
439
|
|
|
|
440
|
|
|
# NAME |
441
|
|
|
fill_uid_names(submission) |
442
|
|
|
|
443
|
|
|
# ANIMALS |
444
|
|
|
fill_uid_animals(submission) |
445
|
|
|
|
446
|
|
|
# SAMPLES |
447
|
|
|
fill_uid_samples(submission) |
448
|
|
|
|
449
|
|
|
except Exception as exc: |
450
|
|
|
# save a message in database |
451
|
|
|
submission.status = ERROR |
452
|
|
|
submission.message = "Error in importing data: %s" % (str(exc)) |
453
|
|
|
submission.save() |
454
|
|
|
|
455
|
|
|
# send async message |
456
|
|
|
send_message(submission) |
457
|
|
|
|
458
|
|
|
# debug |
459
|
|
|
logger.error("error in importing from cryoweb: %s" % (exc)) |
460
|
|
|
logger.exception(exc) |
461
|
|
|
|
462
|
|
|
return False |
463
|
|
|
|
464
|
|
|
else: |
465
|
|
|
message = "Cryoweb import completed for submission: %s" % ( |
466
|
|
|
submission.id) |
467
|
|
|
|
468
|
|
|
submission.message = message |
469
|
|
|
submission.status = LOADED |
470
|
|
|
submission.save() |
471
|
|
|
|
472
|
|
|
send_message( |
473
|
|
|
submission, |
474
|
|
|
validation_message=construct_validation_message(submission)) |
475
|
|
|
|
476
|
|
|
logger.info("Import from staging area is complete") |
477
|
|
|
|
478
|
|
|
return True |
479
|
|
|
|