Passed
Push — main ( e0f181...a5d2a4 )
by Douglas
01:37
created

UnitTools.round_to_sigfigs()   A

Complexity

Conditions 3

Size

Total Lines 20
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nop 3
dl 0
loc 20
rs 9.95
c 0
b 0
f 0
1
import logging
0 ignored issues
show
introduced by
Missing module docstring
Loading history...
2
import math
3
from typing import Optional, SupportsFloat, Tuple, Union
4
5
import regex
0 ignored issues
show
introduced by
Unable to import 'regex'
Loading history...
6
from pint import Quantity, UnitRegistry
0 ignored issues
show
introduced by
Unable to import 'pint'
Loading history...
7
from pint.errors import PintTypeError
0 ignored issues
show
introduced by
Unable to import 'pint.errors'
Loading history...
8
9
from pocketutils.core._internal import nicesize
10
from pocketutils.core.exceptions import OutOfRangeError, StringPatternError
11
from pocketutils.tools.base_tools import BaseTools
12
from pocketutils.tools.string_tools import StringTools
13
14
logger = logging.getLogger("pocketutils")
15
_UNIT_REG = UnitRegistry()
16
17
18
class UnitTools(BaseTools):
0 ignored issues
show
introduced by
Missing class docstring
Loading history...
19
    @classmethod
20
    def delta_time_to_str(cls, delta_sec: float, space: str = "") -> str:
21
        """
22
        Returns a pretty string from a difference in time in seconds.
23
        Rounds hours and minutes to 2 decimal places, and seconds to 1.
24
        Ex: delta_time_to_str(313) == 5.22min
25
            delta_sec: The time in seconds
26
            space: Space char between digits and units;
27
                good choices are empty, ASCII space, Chars.narrownbsp, Chars.thinspace, and Chars.nbsp.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (103/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
28
29
        Returns:
30
            A string with units 'hr', 'min', or 's'
31
        """
32
        if abs(delta_sec) > 60 * 60 * 3:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
33
            return (
34
                StringTools.strip_empty_decimal(str(round(delta_sec / 60 / 60, 2))) + space + "hr"
35
            )
36
        elif abs(delta_sec) > 180:
37
            return StringTools.strip_empty_decimal(str(round(delta_sec / 60, 2))) + space + "min"
38
        else:
39
            return StringTools.strip_empty_decimal(str(round(delta_sec, 1))) + space + "s"
40
41
    @classmethod
42
    def ms_to_minsec(cls, ms: int, space: str = "") -> str:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "ms" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
43
        """
44
        Converts a number of milliseconds to one of the following formats:
45
            - 10ms         if < 1 sec
46
            - 10:15        if < 1 hour
47
            - 10:15:33     if < 1 day
48
            - 5d:10:15:33  if > 1 day
49
        Prepends a minus sign (−) if negative.
50
51
        Args:
52
            ms: The milliseconds
53
            space: Space char between digits and 'ms' (if used);
54
                   good choices are empty, ASCII space, Chars.narrownbsp,
55
                   Chars.thinspace, and Chars.nbsp.
56
57
        Returns:
58
            A string of one of the formats above
59
        """
60
        ms = abs(int(ms))
61
        seconds = int((ms / 1000) % 60)
62
        minutes = int((ms / (1000 * 60)) % 60)
63
        hours = int((ms / (1000 * 60 * 60)) % 24)
64
        days = int(ms / (1000 * 60 * 60 * 24))
65
        z_hr = str(hours).zfill(2)
66
        z_min = str(minutes).zfill(2)
67
        z_sec = str(seconds).zfill(2)
68
        sgn = "−" if ms < 0 else ""
69
        if ms < 1000:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
70
            return f"{sgn}{ms}{space}ms"
71
        elif days > 1:
72
            return f"{days}d:{z_hr}:{z_min}:{z_sec}"
73
        elif hours > 1:
74
            return f"{sgn}{z_hr}:{z_min}:{z_sec}"
75
        else:
76
            return f"{sgn}{z_min}:{z_sec}"
77
78
    @classmethod
79
    def friendly_size(cls, n_bytes: int, *, space: str = " ") -> str:
80
        """
81
        Returns a text representation of a number of bytes.
82
        Uses base 2 with IEC 1998, rounded to 0 decimal places, and without a space.
83
        """
84
        return nicesize(n_bytes, space=space)
85
86
    @classmethod
87
    def round_to_sigfigs(cls, num: SupportsFloat, sig_figs: int) -> float:
88
        """
89
        Round to specified number of sigfigs.
90
91
        Args:
92
            num: A Python or Numpy float or something that supports __float__
93
            sig_figs: The number of significant figures, non-negative
94
95
        Returns:
96
            A Python integer
97
        """
98
        if sig_figs < 0:
99
            raise OutOfRangeError(f"sig_figs {sig_figs} is negative", minimum=0)
100
        num = float(num)
101
        if num != 0:
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
102
            digits = -int(math.floor(math.log10(abs(num))) - (sig_figs - 1))
103
            return round(num, digits)
104
        else:
105
            return 0  # can't take the log of 0
106
107
    @classmethod
108
    def format_micromolar(
109
        cls,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
110
        micromolar: float,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
111
        n_sigfigs: Optional[int] = 5,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
112
        *,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
113
        adjust_units: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
114
        use_sigfigs: bool = True,
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
115
        space: str = "",
0 ignored issues
show
Coding Style introduced by
Wrong hanging indentation before block (add 4 spaces).
Loading history...
116
    ) -> str:
117
        """
118
        Returns a concentration with units, with the units scaled as needed.
119
        Can handle millimolar, micromolar, nanomolar, and picomolar.
120
121
        Args:
122
            micromolar: Value
123
            n_sigfigs: For rounding; no rounding if None
124
            adjust_units: If False, will always use micromolar
125
            use_sigfigs: If True, rounds to a number of significant figures; otherwise round to decimal places
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (110/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
126
            space: Space char between digits and units;
127
                   good choices are empty, ASCII space, Chars.narrownbsp, Chars.thinspace, and Chars.nbsp.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (106/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
128
129
        Returns:
130
            The concentration with a suffix of µM, mM, nM, or mM
131
        """
132
        d = micromolar
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
133
        m = abs(d)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "m" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
134
        unit = "µM"
135
        if adjust_units:
136
            if m < 1e-6:
137
                d *= 1e9
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
138
                unit = "fM"
139
            elif m < 1e-3:
140
                d *= 1e6
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
141
                unit = "pM"
142
            elif m < 1:
143
                d *= 1e3
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
144
                unit = "nM"
145
            elif m >= 1e6:
146
                d /= 1e6
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
147
                unit = "M"
148
            elif m >= 1e3:
149
                d /= 1e3
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
150
                unit = "mM"
151
        if n_sigfigs is None:
152
            pass
153
        elif use_sigfigs:
154
            d = cls.round_to_sigfigs(d, n_sigfigs)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
155
        else:
156
            d = round(d, n_sigfigs)
0 ignored issues
show
Coding Style Naming introduced by
Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
157
        if round(d) == d and str(d).endswith(".0"):
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
158
            return str(d)[:-2] + space + unit
159
        else:
160
            return str(d) + space + unit
161
162
    @classmethod
163
    def split_species_micromolar(cls, text: str) -> Tuple[str, Optional[float]]:
164
        """
165
        Splits a name into a chemical/concentration pair, falling back with the full name.
166
        Ex: "abc 3.5uM" → (abc, 3.5)
167
        Ex: "abc 3.5 µM" → (abc, 3.5)
168
        Ex: "abc (3.5mM)" → (abc, 3500.0)
169
        Ex: "abc 3.5mM" → (abc, None)
170
        Ex: "3.5mM" → (3.5mM, None)  # an edge case: don't pass in only units
171
        Uses a moderately strict pattern for the drug and dose:
172
            - The dose must terminate the string, except for end parenthesis or whitespace.
173
            - The drug and dose must be separated by at least one non-alphanumeric, non-dot, non-hyphen character.
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (114/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
174
            - Units must follow the digits, separated by at most whitespace, and are case-sensitive.
175
        """
176
        # note the lazy ops in the first group and in the non-(alphanumeric/dot/dash) separator between the drug and dose
0 ignored issues
show
Coding Style introduced by
This line is too long as per the coding-style (121/100).

This check looks for lines that are too long. You can specify the maximum line length.

Loading history...
177
        pat = regex.compile(
178
            r"^\s*(.*?)(?:[^A-Za-z0-9.\-]+?[\s(\[{]*(\d+(?:.\d*)?)\s*([mµunpf]M)\s*[)\]}]*)?\s*$",
179
            flags=regex.V1,
180
        )
181
        match = pat.fullmatch(text)
182
        if match is None:
183
            raise StringPatternError(f"Text {text} couldn't be parsed", value=text, pattern=pat)
184
        if match.group(2) is None:
0 ignored issues
show
unused-code introduced by
Unnecessary "else" after "return"
Loading history...
185
            return text.strip(), None
186
        else:
187
            drug = match.group(1).strip("([{)]}")
188
            dose = UnitTools.concentration_to_micromolar(float(match.group(2)), match.group(3))
189
            return drug, dose
190
191
    @classmethod
192
    def extract_micromolar(cls, text: str) -> Optional[float]:
193
        """
194
        Returns what looks like a concentration with units. Accepts one of: mM, µM, uM, nM, pM.
195
        Searches pretty flexibly.
196
        If no matches are found, returns None.
197
        If multiple matches are found, warns and returns None.
198
        """
199
        # we need to make sure mM ex isn't part of a larger name
200
        pat1 = regex.compile(r"(\d+(?:.\d*)?)\s*([mµunpf]M)\s*[)\]}]*", flags=regex.V1)
201
202
        def find(pat):
203
            return {
204
                UnitTools.concentration_to_micromolar(float(match.group(1)), match.group(2))
205
                for match in pat.finditer(text)
206
                if match is not None
207
            }
208
209
        matches = find(pat1)
210
        if len(matches) == 1:
0 ignored issues
show
unused-code introduced by
Unnecessary "elif" after "return"
Loading history...
211
            return next(iter(matches))
212
        elif len(matches) > 1:
213
            logger.warning(f"Found {len(matches)} potential doses: {matches} . Returning None.")
0 ignored issues
show
introduced by
Use lazy % formatting in logging functions
Loading history...
214
        return None
215
216
    @classmethod
217
    def concentration_to_micromolar(cls, digits: Union[str, float], units: str) -> float:
218
        """
219
        Ex: concentration_to_micromolar(53, 'nM')  # returns 0.053
220
        """
221
        return float(digits) * {
222
            "M": 1e6,
223
            "mM": 1e3,
224
            "µM": 1,
225
            "uM": 1,
226
            "nM": 1e-3,
227
            "pM": 1e-6,
228
            "fM": 1e-9,
229
        }[units]
230
231
    @classmethod
232
    def canonicalize_quantity(cls, s: str, dimensionality: str) -> Quantity:
0 ignored issues
show
Coding Style Naming introduced by
Argument name "s" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
233
        """
234
        Returns a quantity in reduced units from a magnitude with units.
235
236
        Args:
237
            s: The string to parse; e.g. ``"1 m/s^2"``.
238
               Unit names and symbols permitted, and spaces may be omitted.
239
            dimensionality: The resulting Quantity is check against this;
240
                            e.g. ``"[length]/[meter]^2"``
241
242
        Returns:
243
            a pint ``Quantity``
244
245
        Raise:
246
            PintTypeError: If the dimensionality is inconsistent
247
        """
248
        q = _UNIT_REG.Quantity(s).to_reduced_units()
0 ignored issues
show
Coding Style Naming introduced by
Variable name "q" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,|_[^\\WA-Z]*|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern)

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
249
        if not q.is_compatible_with(dimensionality):
250
            raise PintTypeError(f"{s} not of dimensionality {dimensionality}")
251
        return q
252
253
254
__all__ = ["UnitTools"]
255