Passed
Pull Request — master (#540)
by
unknown
03:31
created

annif.util.apply_param_parse_config()   A

Complexity

Conditions 1

Size

Total Lines 6
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""Utility functions for Annif"""
2
3
import glob
4
import os
5
import os.path
6
from shutil import rmtree
7
import tempfile
8
import numpy as np
9
from annif import logger
10
from annif.suggestion import VectorSuggestionResult
11
12
13 View Code Duplication
def atomic_save(obj, dirname, filename, method=None):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
14
    """Save the given object (which must have a .save() method, unless the
15
    method parameter is given) into the given directory with the given
16
    filename, using a temporary file and renaming the temporary file to the
17
    final name.
18
    The .save() mehod or the function provided in the method argument
19
    will be called with the path to the temporary file."""
20
21
    prefix, suffix = os.path.splitext(filename)
22
    tempfd, tempfilename = tempfile.mkstemp(
23
        prefix=prefix, suffix=suffix, dir=dirname)
24
    os.close(tempfd)
25
    logger.debug('saving %s to temporary file %s', str(obj)[:90], tempfilename)
26
    if method is not None:
27
        method(obj, tempfilename)
28
    else:
29
        obj.save(tempfilename)
30
    for fn in glob.glob(tempfilename + '*'):
31
        newname = fn.replace(tempfilename, os.path.join(dirname, filename))
32
        logger.debug('renaming temporary file %s to %s', fn, newname)
33
        os.rename(fn, newname)
34
35
36 View Code Duplication
def atomic_save_folder(obj, dirname, method=None):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
37
    """Save the given object (which must have a .save() method, unless the
38
    method parameter is given) into the given directory,
39
    using a temporary directory and renaming the temporary directory to the
40
    final name.
41
    The .save() mehod or the function provided in the method argument
42
    will be called with the path to the temporary directory."""
43
44
    tldir = os.path.dirname(dirname.rstrip('/'))
45
    os.makedirs(dirname, exist_ok=tldir)
46
    tempdir = tempfile.TemporaryDirectory(dir=tldir)
47
    temp_dir_name = tempdir.name
48
    target_pth = dirname
49
    logger.debug(
50
        'saving %s to temporary file %s', str(obj)[:90],
51
        temp_dir_name)
52
    if method is not None:
53
        method(obj, temp_dir_name)
54
    else:
55
        obj.save(temp_dir_name)
56
    for fn in glob.glob(temp_dir_name + '*'):
57
        newname = fn.replace(temp_dir_name, target_pth)
58
        logger.debug('renaming temporary file %s to %s', fn, newname)
59
        if os.path.isdir(newname):
60
            rmtree(newname)
61
        os.replace(fn, newname)
62
63
64
def cleanup_uri(uri):
65
    """remove angle brackets from a URI, if any"""
66
    if uri.startswith('<') and uri.endswith('>'):
67
        return uri[1:-1]
68
    return uri
69
70
71
def merge_hits(weighted_hits, subject_index):
72
    """Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
73
    objects. A SubjectIndex is needed to convert between subject IDs and URIs.
74
    Returns an SuggestionResult object."""
75
76
    weights = [whit.weight for whit in weighted_hits]
77
    scores = [whit.hits.as_vector(subject_index) for whit in weighted_hits]
78
    result = np.average(scores, axis=0, weights=weights)
79
    return VectorSuggestionResult(result)
80
81
82 View Code Duplication
def parse_sources(sourcedef):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
83
    """parse a source definition such as 'src1:1.0,src2' into a sequence of
84
    tuples (src_id, weight)"""
85
86
    sources = []
87
    totalweight = 0.0
88
    for srcdef in sourcedef.strip().split(','):
89
        srcval = srcdef.strip().split(':')
90
        src_id = srcval[0]
91
        if len(srcval) > 1:
92
            weight = float(srcval[1])
93
        else:
94
            weight = 1.0
95
        sources.append((src_id, weight))
96
        totalweight += weight
97
    return [(srcid, weight / totalweight) for srcid, weight in sources]
98
99
100 View Code Duplication
def parse_args(param_string):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
101
    """Parse a string of comma separated arguments such as '42,43,key=abc' into
102
    a list of positional args [42, 43] and a dict of keyword args {key: abc}"""
103
104
    if not param_string:
105
        return [], {}
106
    posargs = []
107
    kwargs = {}
108
    param_strings = param_string.split(',')
109
    for p_string in param_strings:
110
        parts = p_string.split('=')
111
        if len(parts) == 1:
112
            posargs.append(p_string)
113
        elif len(parts) == 2:
114
            kwargs[parts[0]] = parts[1]
115
    return posargs, kwargs
116
117
118
def apply_param_parse_config(configs, params):
119
    """Applies a parsing configuration to a parameter dict."""
120
    return {
121
        param: configs[param](val)
122
        for param, val in params.items()
123
        if param in configs and val is not None}
124
125
126
def boolean(val):
127
    """Convert the given value to a boolean True/False value, if it isn't already.
128
    True values are '1', 'yes', 'true', and 'on' (case insensitive), everything
129
    else is False."""
130
131
    return str(val).lower() in ('1', 'yes', 'true', 'on')
132
133
134
def identity(x):
135
    """Identity function: return the given argument unchanged"""
136
    return x
137
138
139
def metric_code(metric):
140
    """Convert a human-readable metric name into an alphanumeric string"""
141
    return metric.translate(metric.maketrans(' ', '_', '()'))
142