Passed
Pull Request — master (#496)
by
unknown
02:00
created

annif.util.parse_args()   A

Complexity

Conditions 5

Size

Total Lines 16
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 13
nop 1
dl 0
loc 16
rs 9.2833
c 0
b 0
f 0
1
"""Utility functions for Annif"""
2
3
import glob
4
import os
5
import os.path
6
import tempfile
7
import numpy as np
8
from annif import logger
9
from annif.suggestion import VectorSuggestionResult
10
11
12
def atomic_save(obj, dirname, filename, method=None):
13
    """Save the given object (which must have a .save() method, unless the
14
    method parameter is given) into the given directory with the given
15
    filename, using a temporary file and renaming the temporary file to the
16
    final name."""
17
18
    prefix, suffix = os.path.splitext(filename)
19
    tempfd, tempfilename = tempfile.mkstemp(
20
        prefix=prefix, suffix=suffix, dir=dirname)
21
    os.close(tempfd)
22
    logger.debug('saving %s to temporary file %s', str(obj)[:90], tempfilename)
23
    if method is not None:
24
        method(obj, tempfilename)
25
    else:
26
        obj.save(tempfilename)
27
    for fn in glob.glob(tempfilename + '*'):
28
        newname = fn.replace(tempfilename, os.path.join(dirname, filename))
29
        logger.debug('renaming temporary file %s to %s', fn, newname)
30
        os.rename(fn, newname)
31
32
33
def cleanup_uri(uri):
34
    """remove angle brackets from a URI, if any"""
35
    if uri.startswith('<') and uri.endswith('>'):
36
        return uri[1:-1]
37
    return uri
38
39
40
def merge_hits(weighted_hits, subject_index):
41
    """Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
42
    objects. A SubjectIndex is needed to convert between subject IDs and URIs.
43
    Returns an SuggestionResult object."""
44
45
    weights = [whit.weight for whit in weighted_hits]
46
    scores = [whit.hits.as_vector(subject_index) for whit in weighted_hits]
47
    result = np.average(scores, axis=0, weights=weights)
48
    return VectorSuggestionResult(result)
49
50
51
def parse_sources(sourcedef):
52
    """parse a source definition such as 'src1:1.0,src2' into a sequence of
53
    tuples (src_id, weight)"""
54
55
    sources = []
56
    totalweight = 0.0
57
    for srcdef in sourcedef.strip().split(','):
58
        srcval = srcdef.strip().split(':')
59
        src_id = srcval[0]
60
        if len(srcval) > 1:
61
            weight = float(srcval[1])
62
        else:
63
            weight = 1.0
64
        sources.append((src_id, weight))
65
        totalweight += weight
66
    return [(srcid, weight / totalweight) for srcid, weight in sources]
67
68
69
def parse_args(param_string):
70
    """Parse a string of comma separated arguments such as '42,43,key=abc' into
71
    a list of positional args [42, 43] and a dict of keyword args {key: abc}"""
72
73
    if not param_string:
74
        return [], {}
75
    posargs = []
76
    kwargs = {}
77
    param_strings = param_string.split(',')
78
    for p_string in param_strings:
79
        parts = p_string.split('=')
80
        if len(parts) == 1:
81
            posargs.append(p_string)
82
        elif len(parts) == 2:
83
            kwargs[parts[0]] = parts[1]
84
    return posargs, kwargs
85
86
87
def boolean(val):
88
    """Convert the given value to a boolean True/False value, if it isn't already.
89
    True values are '1', 'yes', 'true', and 'on' (case insensitive), everything
90
    else is False."""
91
92
    return str(val).lower() in ('1', 'yes', 'true', 'on')
93
94
95
def identity(x):
96
    """Identity function: return the given argument unchanged"""
97
    return x
98