Passed
Pull Request — master (#673)
by Juho
02:55
created

annif.util.DuplicateFilter.filter()   A

Complexity

Conditions 2

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""Utility functions for Annif"""
2
3
import glob
4
import logging
5
import os
6
import os.path
7
import tempfile
8
9
import numpy as np
10
11
from annif import logger
12
from annif.suggestion import VectorSuggestionResult
13
14
15
class DuplicateFilter(logging.Filter):
16
    """Filter out log messages that have already been displayed."""
17
18
    def __init__(self):
19
        self.logged = set()
20
21
    def filter(self, record):
22
        current_log = hash((record.module, record.levelno, record.msg, record.args))
23
        if current_log not in self.logged:
24
            self.logged.add(current_log)
25
            return True
26
        return False
27
28
29
def atomic_save(obj, dirname, filename, method=None):
30
    """Save the given object (which must have a .save() method, unless the
31
    method parameter is given) into the given directory with the given
32
    filename, using a temporary file and renaming the temporary file to the
33
    final name."""
34
35
    prefix, suffix = os.path.splitext(filename)
36
    tempfd, tempfilename = tempfile.mkstemp(prefix=prefix, suffix=suffix, dir=dirname)
37
    os.close(tempfd)
38
    logger.debug("saving %s to temporary file %s", str(obj)[:90], tempfilename)
39
    if method is not None:
40
        method(obj, tempfilename)
41
    else:
42
        obj.save(tempfilename)
43
    for fn in glob.glob(tempfilename + "*"):
44
        newname = fn.replace(tempfilename, os.path.join(dirname, filename))
45
        logger.debug("renaming temporary file %s to %s", fn, newname)
46
        os.rename(fn, newname)
47
48
49
def cleanup_uri(uri):
50
    """remove angle brackets from a URI, if any"""
51
    if uri.startswith("<") and uri.endswith(">"):
52
        return uri[1:-1]
53
    return uri
54
55
56
def merge_hits(weighted_hits, size):
57
    """Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
58
    objects. The size parameter determines the length of the subject vector.
59
    Returns an SuggestionResult object."""
60
61
    weights = [whit.weight for whit in weighted_hits]
62
    scores = [whit.hits.as_vector(size) for whit in weighted_hits]
63
    result = np.average(scores, axis=0, weights=weights)
64
    return VectorSuggestionResult(result)
65
66
67
def parse_sources(sourcedef):
68
    """parse a source definition such as 'src1:1.0,src2' into a sequence of
69
    tuples (src_id, weight)"""
70
71
    sources = []
72
    totalweight = 0.0
73
    for srcdef in sourcedef.strip().split(","):
74
        srcval = srcdef.strip().split(":")
75
        src_id = srcval[0]
76
        if len(srcval) > 1:
77
            weight = float(srcval[1])
78
        else:
79
            weight = 1.0
80
        sources.append((src_id, weight))
81
        totalweight += weight
82
    return [(srcid, weight / totalweight) for srcid, weight in sources]
83
84
85
def parse_args(param_string):
86
    """Parse a string of comma separated arguments such as '42,43,key=abc' into
87
    a list of positional args [42, 43] and a dict of keyword args {key: abc}"""
88
89
    if not param_string:
90
        return [], {}
91
    posargs = []
92
    kwargs = {}
93
    param_strings = param_string.split(",")
94
    for p_string in param_strings:
95
        parts = p_string.split("=")
96
        if len(parts) == 1:
97
            posargs.append(p_string)
98
        elif len(parts) == 2:
99
            kwargs[parts[0]] = parts[1]
100
    return posargs, kwargs
101
102
103
def boolean(val):
104
    """Convert the given value to a boolean True/False value, if it isn't already.
105
    True values are '1', 'yes', 'true', and 'on' (case insensitive), everything
106
    else is False."""
107
108
    return str(val).lower() in ("1", "yes", "true", "on")
109
110
111
def identity(x):
112
    """Identity function: return the given argument unchanged"""
113
    return x
114
115
116
def metric_code(metric):
117
    """Convert a human-readable metric name into an alphanumeric string"""
118
    return metric.translate(metric.maketrans(" ", "_", "()"))
119