uid.management.commands.initializedb.fill_SpeciesAndSynonyms() - Code Metrics - Inspection of ":bookmark: Bump version: 0.9.0.dev0 → 0.9.0" - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( a65260...307dd7 )

by Paolo

created 2019-11-15 09:20 UTC

fill_SpeciesAndSynonyms() A

↳ Parent: uid.management.commands.initializedb

Complexity

Conditions

Size

Total Lines	48
Code Lines	35

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	35
dl	0
loc	48
rs	9.0399
c	0
b	0
f	0
cc	4
nop	0

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 15:28:05 2018

@author: Paolo Cozzi <[email protected]>

This django command script need to be called after database initialization.
The aim is to fill tables like ontology tables (roles, sex) in order to upload
data from cryoweb database (or xls template files, or ...)
"""

import collections
import csv
import io
import logging
import os

from image_validation.use_ontology import get_general_breed_by_species

from django.core.management import BaseCommand

from common.constants import OBO_URL, CURATED
from uid.helpers import get_or_create_obj, update_or_create_obj
from uid.models import (
    DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
    DictUberon)
from language.models import SpecieSynonym

# Get an instance of a logger
logger = logging.getLogger(__name__)


def fill_ontology():
    data = """Library name;Library URI;Comment
PATO;{obo_url};Phenotype And Trait Ontology
LBO;{obo_url};Livestock Breed Ontology
EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
OBI;{obo_url};Ontology for Biomedical Investigations
NCBITaxon;{obo_url};NCBI Taxonomy
UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
GAZ;{obo_url};A gazetteer constructed on ontological principles
NCIT;{obo_url};NCI Thesaurus OBO Edition
""".format(obo_url=OBO_URL)

    handle = io.StringIO(data)
    reader = csv.reader(handle, delimiter=";")

    header = next(reader)

    # fix header
    header = [col.lower().replace(" ", "_") for col in header]

    Data = collections.namedtuple('Data', header)

    for row in map(Data._make, reader):
        # update objects
        update_or_create_obj(
            Ontology,
            library_name=row.library_name,
            defaults=row._asdict())


def fill_DictSex():
    # define three DictSex objects
    get_or_create_obj(
        DictSex,
        label='male',
        term='PATO_0000384')

    get_or_create_obj(
        DictSex,
        label='female',
        term='PATO_0000383')

    get_or_create_obj(
        DictSex,
        label='record of unknown sex',
        term='OBI_0000858')


# a function to fill up DictRoles
# TODO: need I fill tables with descendants terms?
def fill_DictRoles():
    # define a submitter role
    role = get_or_create_obj(
        DictRole,
        label='submitter',
        term='EFO_0001741')

    return role


# a function to fill up only species
def fill_Species():
    """Populate species table"""

    data = [
        {'confidence': CURATED, 'label': 'Crassostrea gigas',
         'term': 'NCBITaxon_29159'},
        {'confidence': CURATED, 'label': 'Equus asinus',
         'term': 'NCBITaxon_9793'},
        {'confidence': CURATED, 'label': 'Oncorhynchus mykiss',
         'term': 'NCBITaxon_8022'},
        {'confidence': CURATED, 'label': 'Canis lupus familiaris',
         'term': 'NCBITaxon_9615'}]

    for specie in data:
        get_or_create_obj(DictSpecie, **specie)


# a function to fill up dictspecie and speciesynonym
def fill_SpeciesAndSynonyms():
    """Populate cryoweb dictionary tables"""

    # insert country and get the default language
    language = fill_Countries()

    # those are cryoweb DE species an synonyms
    cryoweb = {
        'Cattle': 'Bos taurus',
        'Chicken': 'Gallus gallus',
        'Deer': 'Cervidae',
        'Duck (domestic)': 'Anas platyrhynchos',
        'Goat': 'Capra hircus',
        'Goose (domestic)': 'Anser anser',
        'Horse': 'Equus caballus',
        'Pig': 'Sus scrofa',
        'Rabbit': 'Oryctolagus cuniculus',
        'Sheep': 'Ovis aries',
        'Turkey': 'Meleagris gallopavo',
        'Rainbow trout': 'Oncorhynchus mykiss',
        'Goose': 'Anser anser',
        'Dog': 'Canis lupus familiaris',
    }

    for word, specie in cryoweb.items():
        dictspecie = get_or_create_obj(
            DictSpecie,
            label=specie)

        # update with general specie
        result = get_general_breed_by_species(specie)

        if result != {}:
            general_breed_label = result['text']
            # split the full part and get the last piece
            general_breed_term = result['ontologyTerms'].split("/")[-1]

            if dictspecie.general_breed_label != general_breed_label:
                dictspecie.general_breed_label = general_breed_label
                dictspecie.general_breed_term = general_breed_term
                dictspecie.save()
                logger.info("Added general breed: %s" % (general_breed_label))

        get_or_create_obj(
            SpecieSynonym,
            dictspecie=dictspecie,
            language=language,
            word=word)


def fill_Countries():
    """Fill countries and return the default country (for languages)"""

    # define the default country for the default language
    united_kingdom = get_or_create_obj(
        DictCountry,
        label='United Kingdom',
        term='NCIT_C17233',
        confidence=CURATED)

    # add a country difficult to annotate with zooma
    get_or_create_obj(
        DictCountry,
        label='Colombia',
        term='NCIT_C16449',
        confidence=CURATED)

    # I will return default language for translations
    return united_kingdom


def fill_OrganismParts():
    """Fill organism parts with manually curated terms"""

    data = {'strand of hair': "UBERON_0001037"}

    for label, term in data.items():
        get_or_create_obj(
            DictUberon,
            label=label,
            term=term,
            confidence=CURATED
        )


def standardize_institute_name(original):
    special = {
        'de': 1,
        'la': 1,
        'of': 1,
        'and': 1,
        'y': 1,
        'fuer': 1,
        'del': 1,
        'l': 1,
        'INRA': 1,
        'FAO': 1
    }

    # search space in original (instutute name) if no space is found
    # it is like that institute name will be EBI or IBBA, and will be
    # treated as it is
    if original.find(" ") > -1:
        if original.upper() == original:
            components = original.split(' ')
            # We capitalize the first letter of each component except the first
            # one with the 'title' method and join them together.
            result = ''
            for component in components:
                result = result + ' '
                if component.lower() in special:
                    result = result + component.lower()
                elif component.upper() in special:
                    result = result + component.upper()
                else:
                    result = result + component.title()
            result = result[1:]
            return result
    return original


def fill_Organization():
    """Fill organization table"""

    base_dir = os.path.dirname(os.path.abspath(__file__))
    filename = os.path.join(base_dir, "organization_list.csv")

    # open data file
    handle = open(filename)
    reader = csv.reader(handle, delimiter=";")
    Data = collections.namedtuple('Data', "id name country")

    # get a role
    role = fill_DictRoles()

    for row in map(Data._make, reader):
        # get a country object
        country = get_or_create_obj(
            DictCountry,
            label=row.country)

        # HINT: could be better to fix organization names in organization_list?
        get_or_create_obj(
            Organization,
            name=standardize_institute_name(row.name),
            role=role,
            country=country)

    handle.close()


class Command(BaseCommand):
    help = 'Fill database tables like roles, sex, etc'

    def handle(self, *args, **options):
        # call commands and fill tables.
        fill_ontology()

        # Fill sex tables
        fill_DictSex()

        # fill DictRoles table
        fill_DictRoles()

        # import custom species
        fill_Species()

        # import synonyms
        fill_SpeciesAndSynonyms()

        # import organizations
        fill_Organization()

        # import organisms
        fill_OrganismParts()


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Thu Jan 25 15:28:05 2018
5
6			@author: Paolo Cozzi <[email protected]>
7
8			This django command script need to be called after database initialization.
9			The aim is to fill tables like ontology tables (roles, sex) in order to upload
10			data from cryoweb database (or xls template files, or ...)
11			"""
12
13			import collections
14			import csv
15			import io
16			import logging
17			import os
18
19			from image_validation.use_ontology import get_general_breed_by_species
20
21			from django.core.management import BaseCommand
22
23			from common.constants import OBO_URL, CURATED
24			from uid.helpers import get_or_create_obj, update_or_create_obj
25			from uid.models import (
26			DictCountry, DictRole, DictSex, DictSpecie, Ontology, Organization,
27			DictUberon)
28			from language.models import SpecieSynonym
29
30			# Get an instance of a logger
31			logger = logging.getLogger(__name__)
32
33
34			def fill_ontology():
35			data = """Library name;Library URI;Comment
36			PATO;{obo_url};Phenotype And Trait Ontology
37			LBO;{obo_url};Livestock Breed Ontology
38			EFO;http://www.ebi.ac.uk/efo;Experimental Factor Ontology
39			OBI;{obo_url};Ontology for Biomedical Investigations
40			NCBITaxon;{obo_url};NCBI Taxonomy
41			UBERON;{obo_url};cross-species ontology covering anatomical structures in animals
42			GAZ;{obo_url};A gazetteer constructed on ontological principles
43			NCIT;{obo_url};NCI Thesaurus OBO Edition
44			""".format(obo_url=OBO_URL)
45
46			handle = io.StringIO(data)
47			reader = csv.reader(handle, delimiter=";")
48
49			header = next(reader)
50
51			# fix header
52			header = [col.lower().replace(" ", "_") for col in header]
53
54			Data = collections.namedtuple('Data', header)
55
56			for row in map(Data._make, reader):
57			# update objects
58			update_or_create_obj(
59			Ontology,
60			library_name=row.library_name,
61			defaults=row._asdict())
62
63
64			def fill_DictSex():
65			# define three DictSex objects
66			get_or_create_obj(
67			DictSex,
68			label='male',
69			term='PATO_0000384')
70
71			get_or_create_obj(
72			DictSex,
73			label='female',
74			term='PATO_0000383')
75
76			get_or_create_obj(
77			DictSex,
78			label='record of unknown sex',
79			term='OBI_0000858')
80
81
82			# a function to fill up DictRoles
83			# TODO: need I fill tables with descendants terms?
84			def fill_DictRoles():
85			# define a submitter role
86			role = get_or_create_obj(
87			DictRole,
88			label='submitter',
89			term='EFO_0001741')
90
91			return role
92
93
94			# a function to fill up only species
95			def fill_Species():
96			"""Populate species table"""
97
98			data = [
99			{'confidence': CURATED, 'label': 'Crassostrea gigas',
100			'term': 'NCBITaxon_29159'},
101			{'confidence': CURATED, 'label': 'Equus asinus',
102			'term': 'NCBITaxon_9793'},
103			{'confidence': CURATED, 'label': 'Oncorhynchus mykiss',
104			'term': 'NCBITaxon_8022'},
105			{'confidence': CURATED, 'label': 'Canis lupus familiaris',
106			'term': 'NCBITaxon_9615'}]
107
108			for specie in data:
109			get_or_create_obj(DictSpecie, **specie)
110
111
112			# a function to fill up dictspecie and speciesynonym
113			def fill_SpeciesAndSynonyms():
114			"""Populate cryoweb dictionary tables"""
115
116			# insert country and get the default language
117			language = fill_Countries()
118
119			# those are cryoweb DE species an synonyms
120			cryoweb = {
121			'Cattle': 'Bos taurus',
122			'Chicken': 'Gallus gallus',
123			'Deer': 'Cervidae',
124			'Duck (domestic)': 'Anas platyrhynchos',
125			'Goat': 'Capra hircus',
126			'Goose (domestic)': 'Anser anser',
127			'Horse': 'Equus caballus',
128			'Pig': 'Sus scrofa',
129			'Rabbit': 'Oryctolagus cuniculus',
130			'Sheep': 'Ovis aries',
131			'Turkey': 'Meleagris gallopavo',
132			'Rainbow trout': 'Oncorhynchus mykiss',
133			'Goose': 'Anser anser',
134			'Dog': 'Canis lupus familiaris',
135			}
136
137			for word, specie in cryoweb.items():
138			dictspecie = get_or_create_obj(
139			DictSpecie,
140			label=specie)
141
142			# update with general specie
143			result = get_general_breed_by_species(specie)
144
145			if result != {}:
146			general_breed_label = result['text']
147			# split the full part and get the last piece
148			general_breed_term = result['ontologyTerms'].split("/")[-1]
149
150			if dictspecie.general_breed_label != general_breed_label:
151			dictspecie.general_breed_label = general_breed_label
152			dictspecie.general_breed_term = general_breed_term
153			dictspecie.save()
154			logger.info("Added general breed: %s" % (general_breed_label))
155
156			get_or_create_obj(
157			SpecieSynonym,
158			dictspecie=dictspecie,
159			language=language,
160			word=word)
161
162
163			def fill_Countries():
164			"""Fill countries and return the default country (for languages)"""
165
166			# define the default country for the default language
167			united_kingdom = get_or_create_obj(
168			DictCountry,
169			label='United Kingdom',
170			term='NCIT_C17233',
171			confidence=CURATED)
172
173			# add a country difficult to annotate with zooma
174			get_or_create_obj(
175			DictCountry,
176			label='Colombia',
177			term='NCIT_C16449',
178			confidence=CURATED)
179
180			# I will return default language for translations
181			return united_kingdom
182
183
184			def fill_OrganismParts():
185			"""Fill organism parts with manually curated terms"""
186
187			data = {'strand of hair': "UBERON_0001037"}
188
189			for label, term in data.items():
190			get_or_create_obj(
191			DictUberon,
192			label=label,
193			term=term,
194			confidence=CURATED
195			)
196
197
198			def standardize_institute_name(original):
199			special = {
200			'de': 1,
201			'la': 1,
202			'of': 1,
203			'and': 1,
204			'y': 1,
205			'fuer': 1,
206			'del': 1,
207			'l': 1,
208			'INRA': 1,
209			'FAO': 1
210			}
211
212			# search space in original (instutute name) if no space is found
213			# it is like that institute name will be EBI or IBBA, and will be
214			# treated as it is
215			if original.find(" ") > -1:
216			if original.upper() == original:
217			components = original.split(' ')
218			# We capitalize the first letter of each component except the first
219			# one with the 'title' method and join them together.
220			result = ''
221			for component in components:
222			result = result + ' '
223			if component.lower() in special:
224			result = result + component.lower()
225			elif component.upper() in special:
226			result = result + component.upper()
227			else:
228			result = result + component.title()
229			result = result[1:]
230			return result
231			return original
232
233
234			def fill_Organization():
235			"""Fill organization table"""
236
237			base_dir = os.path.dirname(os.path.abspath(__file__))
238			filename = os.path.join(base_dir, "organization_list.csv")
239
240			# open data file
241			handle = open(filename)
242			reader = csv.reader(handle, delimiter=";")
243			Data = collections.namedtuple('Data', "id name country")
244
245			# get a role
246			role = fill_DictRoles()
247
248			for row in map(Data._make, reader):
249			# get a country object
250			country = get_or_create_obj(
251			DictCountry,
252			label=row.country)
253
254			# HINT: could be better to fix organization names in organization_list?
255			get_or_create_obj(
256			Organization,
257			name=standardize_institute_name(row.name),
258			role=role,
259			country=country)
260
261			handle.close()
262
263
264			class Command(BaseCommand):
265			help = 'Fill database tables like roles, sex, etc'
266
267			def handle(self, args, *options):
268			# call commands and fill tables.
269			fill_ontology()
270
271			# Fill sex tables
272			fill_DictSex()
273
274			# fill DictRoles table
275			fill_DictRoles()
276
277			# import custom species
278			fill_Species()
279
280			# import synonyms
281			fill_SpeciesAndSynonyms()
282
283			# import organizations
284			fill_Organization()
285
286			# import organisms
287			fill_OrganismParts()
288

cnr-ibba / IMAGE-InjectTool

Push — master ( a65260...307dd7 )

fill_SpeciesAndSynonyms() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like