Passed
Push — master ( b05dd5...9ea6eb )
by Konstantin
02:45
created

ocrd_utils.config   A

Complexity

Total Complexity 26

Size/Duplication

Total Lines 168
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 26
eloc 110
dl 0
loc 168
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A OcrdEnvConfig.has_default() 0 4 2
A OcrdEnvConfig.describe() 0 4 2
A OcrdEnvVariable.describe() 0 13 5
A OcrdEnvConfig.add() 0 3 1
A OcrdEnvConfig.raw_value() 0 4 2
A OcrdEnvVariable.__init__() 0 21 2
A OcrdEnvConfig.is_set() 0 4 2
A OcrdEnvVariable.__str__() 0 2 1
B OcrdEnvConfig.__getattr__() 0 14 6
A OcrdEnvConfig.__init__() 0 2 1

1 Function

Rating   Name   Duplication   Size   Complexity  
A _ocrd_download_timeout_parser() 0 7 2
1
"""
2
Most behavior of OCR-D is controlled via command-line flags or keyword args.
3
Some behavior is global or too cumbersome to handle via explicit code and
4
better solved by using environment variables.
5
6
OcrdEnvConfig is a base class to make this more streamlined, to be subclassed
7
in the `ocrd` package for the actual values
8
"""
9
10
from os import environ
11
from pathlib import Path
12
from textwrap import fill, indent
13
14
class OcrdEnvVariable():
15
16
    def __init__(self, name, description, parser=str, validator=lambda val: True, default=[False, None]):
17
        """
18
        An environment variable for use in OCR-D.
19
20
        Args:
21
            name (str): Name of the environment vairable
22
            description (str): Description of what the variable is used for.
23
24
        Keyword Args:
25
            parser (callable): Function to transform the raw (string) value to whatever is needed.
26
            validator (callable): Function to validate that the raw (string) value is parseable.
27
            default (tuple(bool, any)): 2-tuple, first element is a bool whether there is a default
28
                value defined and second element contains that default value, which can be a callable
29
                for defered evaluation
30
        """
31
        self.name = name
32
        self.description = description
33
        self.parser = parser
34
        self.validator = validator
35
        self.has_default = default[0]
36
        self.default = default[1]
37
38
    def __str__(self):
39
        return f'{self.name}: {self.description}'
40
41
    def describe(self, wrap_text=True, indent_text=True):
42
        desc = self.description
43
        if self.has_default:
44
            default = self.default() if callable(self.default) else self.default
45
            desc += f' (Default: "{default}")'
46
        ret = ''
47
        ret  = f'{self.name}\n'
48
        if wrap_text:
49
            desc = fill(desc, width=50)
50
        if indent_text:
51
            ret = f'  {ret}'
52
            desc = indent(desc, '    ')
53
        return ret + desc
54
55
class OcrdEnvConfig():
56
57
    def __init__(self):
58
        self._variables = {}
59
60
    def add(self, name, *args, **kwargs):
61
        self._variables[name] = OcrdEnvVariable(name, *args, **kwargs)
62
        return self._variables[name]
63
64
    def has_default(self, name):
65
        if not name in self._variables:
66
            raise ValueError(f"Unregistered env variable {name}")
67
        return self._variables[name].has_default
68
69
    def describe(self, name, *args, **kwargs):
70
        if not name in self._variables:
71
            raise ValueError(f"Unregistered env variable {name}")
72
        return self._variables[name].describe(*args, **kwargs)
73
74
    def __getattr__(self, name):
75
        if not name in self._variables:
76
            raise ValueError(f"Unregistered env variable {name}")
77
        var_obj = self._variables[name]
78
        try:
79
            raw_value = self.raw_value(name)
80
        except KeyError as e:
81
            if var_obj.has_default:
82
                raw_value = var_obj.default() if callable(var_obj.default) else var_obj.default
83
            else:
84
                raise e
85
        if not var_obj.validator(raw_value):
86
            raise ValueError(f"'{name}' set to invalid value '{raw_value}'")
87
        return var_obj.parser(raw_value)
88
89
    def is_set(self, name):
90
        if not name in self._variables:
91
            raise ValueError(f"Unregistered env variable {name}")
92
        return name in environ
93
94
    def raw_value(self, name):
95
        if not name in self._variables:
96
            raise ValueError(f"Unregistered env variable {name}")
97
        return environ[name]
98
99
config = OcrdEnvConfig()
100
101
config.add('OCRD_METS_CACHING',
102
    description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
103
    validator=lambda val: val in ('true', 'false', '0', '1'),
104
    parser=lambda val: val in ('true', '1'))
105
106
config.add('OCRD_MAX_PROCESSOR_CACHE',
107
    description="Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) for processing workers or processor servers.",
108
    parser=int,
109
    default=(True, 128))
110
111
config.add("OCRD_PROFILE",
112
    description="""\
113
Whether to enable gathering runtime statistics
114
on the `ocrd.profile` logger (comma-separated):
115
- `CPU`: yields CPU and wall-time,
116
- `RSS`: also yields peak memory (resident set size)
117
- `PSS`: also yields peak memory (proportional set size)
118
""",
119
  validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
120
  default=(True, ''))
121
122
config.add("OCRD_PROFILE_FILE",
123
    description="If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz")
124
125
config.add("OCRD_DOWNLOAD_RETRIES",
126
    description="Number of times to retry failed attempts for downloads of workspace files.",
127
    validator=int,
128
    parser=int)
129
130
def _ocrd_download_timeout_parser(val):
131
    timeout = val.split(',')
132
    if len(timeout) > 1:
133
        timeout = tuple(float(x) for x in timeout)
134
    else:
135
        timeout = float(timeout[0])
136
    return timeout
137
138
config.add("OCRD_DOWNLOAD_TIMEOUT",
139
    description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
140
    parser=_ocrd_download_timeout_parser)
141
142
config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
143
        description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
144
        default=(True, ''))
145
146
config.add("OCRD_NETWORK_SERVER_ADDR_WORKFLOW",
147
        description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).",
148
        default=(True, ''))
149
150
config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
151
        description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).",
152
        default=(True, ''))
153
154
config.add("HOME",
155
    description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
156
    # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html",
157
    validator=lambda val: Path(val).is_dir(),
158
    parser=lambda val: Path(val),
159
    default=(True, lambda: Path.home()))
160
161
config.add("XDG_DATA_HOME",
162
    description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)",
163
    default=(True, lambda: Path(config.HOME, '.local/share')))
164
165
config.add("XDG_CONFIG_HOME",
166
    description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)",
167
    default=(True, lambda: Path(config.HOME, '.config')))
168