Passed
Push — main ( a79885...50e5ea )
by Douglas
02:25
created

pocketutils.tools.unit_tools   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 251
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 135
dl 0
loc 251
rs 9.76
c 0
b 0
f 0
wmc 33

11 Methods

Rating   Name   Duplication   Size   Complexity  
A UnitTools.friendly_size() 0 7 1
B UnitTools.ms_to_minsec() 0 39 5
A UnitTools.delta_time_to_str() 0 21 3
A UnitTools.extract_dose() 0 6 1
A UnitTools.extract_micromolar() 0 24 3
A UnitTools.split_drug_dose() 0 6 1
A UnitTools.round_to_sigfigs() 0 20 3
A UnitTools.split_species_micromolar() 0 27 3
A UnitTools.concentration_to_micromolar() 0 14 1
A UnitTools.dose_to_micromolar() 0 6 1
C UnitTools.format_micromolar() 0 53 11
1
import logging
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import math
3
import re
4
from typing import Optional, SupportsFloat, Tuple, Union
5
6
from pocketutils.core.exceptions import OutOfRangeError, StringPatternError
7
from pocketutils.core.internal import nicesize
8
from pocketutils.tools.base_tools import BaseTools
9
from pocketutils.tools.string_tools import StringTools
10
11
logger = logging.getLogger("pocketutils")
12
13
14
class UnitTools(BaseTools):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
15
    @classmethod
16
    def delta_time_to_str(cls, delta_sec: float, space: str = "") -> str:
17
        """
18
        Returns a pretty string from a difference in time in seconds.
19
        Rounds hours and minutes to 2 decimal places, and seconds to 1.
20
        Ex: delta_time_to_str(313) == 5.22min
21
            delta_sec: The time in seconds
22
            space: Space char between digits and units;
23
                good choices are empty, ASCII space, Chars.narrownbsp, Chars.thinspace, and Chars.nbsp.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
24
25
        Returns:
26
            A string with units 'hr', 'min', or 's'
27
        """
28
        if abs(delta_sec) > 60 * 60 * 3:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
29
            return (
30
                StringTools.strip_empty_decimal(str(round(delta_sec / 60 / 60, 2))) + space + "hr"
31
            )
32
        elif abs(delta_sec) > 180:
33
            return StringTools.strip_empty_decimal(str(round(delta_sec / 60, 2))) + space + "min"
34
        else:
35
            return StringTools.strip_empty_decimal(str(round(delta_sec, 1))) + space + "s"
36
37
    @classmethod
38
    def ms_to_minsec(cls, ms: int, space: str = "") -> str:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ms" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
39
        """
40
        Converts a number of milliseconds to one of the following formats:
41
            - 10ms         if < 1 sec
42
            - 10:15        if < 1 hour
43
            - 10:15:33     if < 1 day
44
            - 5d:10:15:33  if > 1 day
45
        Prepends a minus sign (−) if negative.
46
47
        Args:
48
            ms: The milliseconds
49
            space: Space char between digits and 'ms' or 'd' for day (if used);
50
                   good choices are empty, ASCII space, Chars.narrownbsp, Chars.thinspace, and Chars.nbsp.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
51
52
        Returns:
53
            A string of one of the formats above
54
        """
55
        is_neg = ms < 0
56
        ms = abs(int(ms))
57
        seconds = int((ms / 1000) % 60)
58
        minutes = int((ms / (1000 * 60)) % 60)
59
        hours = int((ms / (1000 * 60 * 60)) % 24)
60
        days = int(ms / (1000 * 60 * 60 * 24))
61
        if ms < 1000:
62
            s = f"{space}{ms}ms"
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
63
        elif days > 1:
64
            s = "{}{}d:{}:{}:{}".format(
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
65
                days,
66
                space,
67
                str(hours).zfill(2),
68
                str(minutes).zfill(2),
69
                str(seconds).zfill(2),
70
            )
71
        elif hours > 1:
72
            s = "{}:{}:{}".format(str(hours).zfill(2), str(minutes).zfill(2), str(seconds).zfill(2))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
73
        else:
74
            s = "{}:{}".format(str(minutes).zfill(2), str(seconds).zfill(2))
0 ignored issues
show
Coding Style Naming introduced by
Variable name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
75
        return "−" + s if is_neg else s
76
77
    @classmethod
78
    def friendly_size(cls, n_bytes: int) -> str:
79
        """
80
        Returns a text representation of a number of bytes.
81
        Uses base 2 with IEC 1998, rounded to 0 decimal places, and without a space.
82
        """
83
        return nicesize(n_bytes)
84
85
    @classmethod
86
    def round_to_sigfigs(cls, num: SupportsFloat, sig_figs: int) -> float:
87
        """
88
        Round to specified number of sigfigs.
89
90
        Args:
91
            num: A Python or Numpy float or something that supports __float__
92
            sig_figs: The number of significant figures, non-negative
93
94
        Returns:
95
            A Python integer
96
        """
97
        if sig_figs < 0:
98
            raise OutOfRangeError(f"sig_figs {sig_figs} is negative", minimum=0)
99
        num = float(num)
100
        if num != 0:
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
101
            digits = -int(math.floor(math.log10(abs(num))) - (sig_figs - 1))
102
            return round(num, digits)
103
        else:
104
            return 0  # can't take the log of 0
105
106
    @classmethod
107
    def format_micromolar(
0 ignored issues
show
best-practice introduced by
Too many arguments (6/5)
Loading history...
108
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
109
        micromolar: float,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
110
        n_sigfigs: Optional[int] = 5,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
111
        adjust_units: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
112
        use_sigfigs: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
113
        space: str = "",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
114
    ) -> str:
115
        """
116
        Returns a concentration with units, with the units scaled as needed.
117
        Can handle millimolar, micromolar, nanomolar, and picomolar.
118
119
        Args:
120
            micromolar: Value
121
            n_sigfigs: For rounding; no rounding if None
122
            adjust_units: If False, will always use micromolar
123
            use_sigfigs: If True, rounds to a number of significant figures; otherwise round to decimal places
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
124
            space: Space char between digits and units;
125
                   good choices are empty, ASCII space, Chars.narrownbsp, Chars.thinspace, and Chars.nbsp.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
126
127
        Returns:
128
            The concentration with a suffix of µM, mM, nM, or mM
129
        """
130
        d = micromolar
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
131
        m = abs(d)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
132
        unit = "µM"
133
        if adjust_units:
134
            if m < 1e-6:
135
                d *= 1e9
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
136
                unit = "fM"
137
            elif m < 1e-3:
138
                d *= 1e6
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
139
                unit = "pM"
140
            elif m < 1:
141
                d *= 1e3
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
142
                unit = "nM"
143
            elif m >= 1e6:
144
                d /= 1e6
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
145
                unit = "M"
146
            elif m >= 1e3:
147
                d /= 1e3
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
148
                unit = "mM"
149
        if n_sigfigs is None:
150
            pass
151
        elif use_sigfigs:
152
            d = UnitTools.round_to_sigfigs(d, n_sigfigs)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
153
        else:
154
            d = round(d, n_sigfigs)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
155
        if round(d) == d and str(d).endswith(".0"):
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
156
            return str(d)[:-2] + space + unit
157
        else:
158
            return str(d) + space + unit
159
160
    @classmethod
161
    def split_drug_dose(cls, text: str) -> Tuple[str, Optional[float]]:
162
        """
163
        Deprecated; see ``split_micromolar``.
164
        """
165
        return cls.split_species_micromolar(text)
166
167
    @classmethod
168
    def split_species_micromolar(cls, text: str) -> Tuple[str, Optional[float]]:
169
        """
170
        Splits a name into a chemical/concentration pair, falling back with the full name.
171
        Ex: "abc 3.5uM" → (abc, 3.5)
172
        Ex: "abc 3.5 µM" → (abc, 3.5)
173
        Ex: "abc (3.5mM)" → (abc, 3500.0)
174
        Ex: "abc 3.5mM" → (abc, None)
175
        Ex: "3.5mM" → (3.5mM, None)  # an edge case: don't pass in only units
176
        Uses a moderately strict pattern for the drug and dose:
177
            - The dose must terminate the string, except for end parenthesis or whitespace.
178
            - The drug and dose must be separated by at least one non-alphanumeric, non-dot, non-hyphen character.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
179
            - Units must follow the digits, separated by at most whitespace, and are case-sensitive.
180
        """
181
        # note the lazy ops in the first group and in the non-(alphanumeric/dot/dash) separator between the drug and dose
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (121/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
182
        pat = re.compile(
183
            r"^\s*(.*?)(?:[^A-Za-z0-9.\-]+?[\s(\[{]*(\d+(?:.\d*)?)\s*([mµunpf]M)\s*[)\]}]*)?\s*$"
184
        )
185
        match = pat.fullmatch(text)
186
        if match is None:
187
            raise StringPatternError(f"Text {text} couldn't be parsed", value=text, pattern=pat)
188
        if match.group(2) is None:
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
189
            return text.strip(), None
190
        else:
191
            drug = match.group(1).strip("([{)]}")
192
            dose = UnitTools.dose_to_micromolar(float(match.group(2)), match.group(3))
193
            return drug, dose
194
195
    @classmethod
196
    def extract_dose(cls, text: str) -> Optional[float]:
197
        """
198
        Deprecated; see ``extract_micromolar``.
199
        """
200
        return cls.extract_micromolar(text)
201
202
    @classmethod
203
    def extract_micromolar(cls, text: str) -> Optional[float]:
204
        """
205
        Returns what looks like a concentration with units. Accepts one of: mM, µM, uM, nM, pM.
206
        Searches pretty flexibly.
207
        If no matches are found, returns None.
208
        If multiple matches are found, warns and returns None.
209
        """
210
        # we need to make sure mM ex isn't part of a larger name
211
        pat1 = re.compile(r"(\d+(?:.\d*)?)\s*([mµunpf]M)\s*[)\]}]*")
212
213
        def find(pat):
214
            return {
215
                UnitTools.dose_to_micromolar(float(match.group(1)), match.group(2))
216
                for match in pat.finditer(text)
217
                if match is not None
218
            }
219
220
        matches = find(pat1)
221
        if len(matches) == 1:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
222
            return next(iter(matches))
223
        elif len(matches) > 1:
224
            logger.warning(f"Found {len(matches)} potential doses: {matches} . Returning None.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
225
        return None
226
227
    @classmethod
228
    def dose_to_micromolar(cls, digits: Union[str, float], units: str) -> float:
229
        """
230
        Deprecated; see ``concentration_to_micromolar``.
231
        """
232
        return cls.concentration_to_micromolar(digits, units)
233
234
    @classmethod
235
    def concentration_to_micromolar(cls, digits: Union[str, float], units: str) -> float:
236
        """
237
        Ex: concentration_to_micromolar(53, 'nM')  # returns 0.053
238
        """
239
        return float(digits) * {
240
            "M": 1e6,
241
            "mM": 1e3,
242
            "µM": 1,
243
            "uM": 1,
244
            "nM": 1e-3,
245
            "pM": 1e-6,
246
            "fM": 1e-9,
247
        }[units]
248
249
250
__all__ = ["UnitTools"]
251