Passed
Push — master ( 407c02...ff81c6 )
by Konstantin
02:41
created

ocrd_utils.config.OcrdEnvConfig.reset_defaults()   A

Complexity

Conditions 4

Size

Total Lines 10
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 10
rs 10
c 0
b 0
f 0
cc 4
nop 1
1
"""
2
Most behavior of OCR-D is controlled via command-line flags or keyword args.
3
Some behavior is global or too cumbersome to handle via explicit code and
4
better solved by using environment variables.
5
6
OcrdEnvConfig is a base class to make this more streamlined, to be subclassed
7
in the `ocrd` package for the actual values
8
"""
9
10
from os import environ
11
from pathlib import Path
12
from tempfile import gettempdir
13
from textwrap import fill, indent
14
15
16
class OcrdEnvVariable():
17
18
    def __init__(self, name, description, parser=str, validator=lambda val: True, default=[False, None]):
19
        """
20
        An environment variable for use in OCR-D.
21
22
        Args:
23
            name (str): Name of the environment variable
24
            description (str): Description of what the variable is used for.
25
26
        Keyword Args:
27
            parser (callable): Function to transform the raw (string) value to whatever is needed.
28
            validator (callable): Function to validate that the raw (string) value is parseable.
29
            default (tuple(bool, any)): 2-tuple, first element is a bool whether there is a default
30
                value defined and second element contains that default value, which can be a callable
31
                for deferred evaluation
32
        """
33
        self.name = name
34
        self.description = description
35
        self.parser = parser
36
        self.validator = validator
37
        self.has_default = default[0]
38
        self.default = default[1]
39
40
    def __str__(self):
41
        return f'{self.name}: {self.description}'
42
43
    def describe(self, wrap_text=True, indent_text=True):
44
        desc = self.description
45
        if self.has_default:
46
            default = self.default() if callable(self.default) else self.default
47
            desc += f' (Default: "{default}")'
48
        ret = ''
49
        ret  = f'{self.name}\n'
50
        if wrap_text:
51
            desc = fill(desc, width=50)
52
        if indent_text:
53
            ret = f'  {ret}'
54
            desc = indent(desc, '    ')
55
        return ret + desc
56
57
class OcrdEnvConfig():
58
59
    def __init__(self):
60
        self._variables = {}
61
62
    def add(self, name, *args, **kwargs):
63
        self._variables[name] = OcrdEnvVariable(name, *args, **kwargs)
64
        return self._variables[name]
65
66
    def has_default(self, name):
67
        if not name in self._variables:
68
            raise ValueError(f"Unregistered env variable {name}")
69
        return self._variables[name].has_default
70
71
    def reset_defaults(self):
72
        for name in self._variables:
73
            try:
74
                # we cannot use hasattr, because that delegates to getattr,
75
                # which we override and provide defaults for (which of course
76
                # cannot be removed)
77
                if self.__getattribute__(name):
78
                    delattr(self, name)
79
            except AttributeError:
80
                pass
81
82
    def describe(self, name, *args, **kwargs):
83
        if not name in self._variables:
84
            raise ValueError(f"Unregistered env variable {name}")
85
        return self._variables[name].describe(*args, **kwargs)
86
87
    def __getattr__(self, name):
88
        # will be called if name is not accessible (has not been added directly yet)
89
        if not name in self._variables:
90
            raise AttributeError(f"Unregistered env variable {name}")
91
        var_obj = self._variables[name]
92
        try:
93
            raw_value = self.raw_value(name)
94
        except KeyError as e:
95
            if var_obj.has_default:
96
                raw_value = var_obj.default() if callable(var_obj.default) else var_obj.default
97
            else:
98
                raise e
99
        if not var_obj.validator(raw_value):
100
            raise ValueError(f"'{name}' set to invalid value '{raw_value}'")
101
        return var_obj.parser(raw_value)
102
103
    def is_set(self, name):
104
        if not name in self._variables:
105
            raise ValueError(f"Unregistered env variable {name}")
106
        return name in environ
107
108
    def raw_value(self, name):
109
        if not name in self._variables:
110
            raise ValueError(f"Unregistered env variable {name}")
111
        return environ[name]
112
113
config = OcrdEnvConfig()
114
115
config.add('OCRD_METS_CACHING',
116
    description='If set to `true`, access to the METS file is cached, speeding in-memory search and modification.',
117
    validator=lambda val: val in ('true', 'false', '0', '1'),
118
    parser=lambda val: val in ('true', '1'))
119
120
config.add('OCRD_MAX_PROCESSOR_CACHE',
121
    description="Maximum number of processor instances (for each set of parameters) to be kept in memory (including loaded models) for processing workers or processor servers.",
122
    parser=int,
123
    default=(True, 128))
124
125
config.add("OCRD_PROFILE",
126
    description="""\
127
Whether to enable gathering runtime statistics
128
on the `ocrd.profile` logger (comma-separated):
129
- `CPU`: yields CPU and wall-time,
130
- `RSS`: also yields peak memory (resident set size)
131
- `PSS`: also yields peak memory (proportional set size)
132
""",
133
  validator=lambda val : all(t in ('', 'CPU', 'RSS', 'PSS') for t in val.split(',')),
134
  default=(True, ''))
135
136
config.add("OCRD_PROFILE_FILE",
137
    description="If set, then the CPU profile is written to this file for later peruse with a analysis tools like snakeviz")
138
139
config.add("OCRD_DOWNLOAD_RETRIES",
140
    description="Number of times to retry failed attempts for downloads of workspace files.",
141
    validator=int,
142
    parser=int)
143
144
def _ocrd_download_timeout_parser(val):
145
    timeout = val.split(',')
146
    if len(timeout) > 1:
147
        timeout = tuple(float(x) for x in timeout)
148
    else:
149
        timeout = float(timeout[0])
150
    return timeout
151
152
config.add("OCRD_DOWNLOAD_TIMEOUT",
153
    description="Timeout in seconds for connecting or reading (comma-separated) when downloading.",
154
    parser=_ocrd_download_timeout_parser)
155
156
config.add("OCRD_NETWORK_SERVER_ADDR_PROCESSING",
157
        description="Default address of Processing Server to connect to (for `ocrd network client processing`).",
158
        default=(True, ''))
159
160
config.add("OCRD_NETWORK_CLIENT_POLLING_SLEEP",
161
           description="How many seconds to sleep before trying again.",
162
           parser=int,
163
           default=(True, 30))
164
165
config.add("OCRD_NETWORK_CLIENT_POLLING_TIMEOUT",
166
           description="Timeout for a blocking ocrd network client (in seconds).",
167
           parser=int,
168
           default=(True, 3600))
169
170
config.add("OCRD_NETWORK_SERVER_ADDR_WORKFLOW",
171
        description="Default address of Workflow Server to connect to (for `ocrd network client workflow`).",
172
        default=(True, ''))
173
174
config.add("OCRD_NETWORK_SERVER_ADDR_WORKSPACE",
175
        description="Default address of Workspace Server to connect to (for `ocrd network client workspace`).",
176
        default=(True, ''))
177
178
config.add("OCRD_NETWORK_RABBITMQ_CLIENT_CONNECT_ATTEMPTS",
179
    description="Number of attempts for a RabbitMQ client to connect before failing.",
180
    parser=int,
181
    default=(True, 3))
182
183
config.add(name="OCRD_NETWORK_SOCKETS_ROOT_DIR",
184
           description="The root directory where all mets server related socket files are created",
185
           parser=lambda val: Path(val),
186
           default=(True, Path(gettempdir(), "ocrd_network_sockets")))
187
config.OCRD_NETWORK_SOCKETS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
188
189
config.add(name="OCRD_NETWORK_LOGS_ROOT_DIR",
190
           description="The root directory where all ocrd_network related file logs are stored",
191
           parser=lambda val: Path(val),
192
           default=(True, Path(gettempdir(), "ocrd_network_logs")))
193
config.OCRD_NETWORK_LOGS_ROOT_DIR.mkdir(parents=True, exist_ok=True)
194
195
config.add("HOME",
196
    description="Directory to look for `ocrd_logging.conf`, fallback for unset XDG variables.",
197
    # description="HOME directory, cf. https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html",
198
    validator=lambda val: Path(val).is_dir(),
199
    parser=lambda val: Path(val),
200
    default=(True, lambda: Path.home()))
201
202
config.add("XDG_DATA_HOME",
203
    description="Directory to look for `./ocrd-resources/*` (i.e. `ocrd resmgr` data location)",
204
    parser=lambda val: Path(val),
205
    default=(True, lambda: Path(config.HOME, '.local/share')))
206
207
config.add("XDG_CONFIG_HOME",
208
    description="Directory to look for `./ocrd/resources.yml` (i.e. `ocrd resmgr` user database)",
209
    parser=lambda val: Path(val),
210
    default=(True, lambda: Path(config.HOME, '.config')))
211
212
config.add("OCRD_LOGGING_DEBUG",
213
    description="Print information about the logging setup to STDERR",
214
    default=(True, False),
215
    validator=lambda val: isinstance(val, bool) or str.lower(val) in ('true', 'false', '0', '1'),
216
    parser=lambda val:  val if isinstance(val, (int, bool)) else str.lower(val) in ('true', '1'))
217