pocketutils.tools.string_tools.StringUtils.roman_to_arabic() - Code Metrics - dmyersturnbull/pocketutils - Measure and Improve Code Quality continuously with Scrutinizer

StringUtils.roman_to_arabic() B
last analyzed 2024-01-16 02:11 UTC

↳ Parent: pocketutils.tools.string_tools

Complexity

Conditions

Size

Total Lines	40
Code Lines	26

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	26
dl	0
loc	40
rs	7.856
c	0
b	0
f	0
cc	7
nop	4

# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/pocketutils
# SPDX-License-Identifier: Apache-2.0
"""

"""

import re
from collections.abc import Callable, Iterable, Mapping, Sequence
from dataclasses import dataclass
from typing import Any, Self, TypeVar

import orjson
import regex

from pocketutils.core.exceptions import ValueIllegalError, ValueOutOfRangeError

__all__ = ["StringUtils", "StringTools"]

K_contra = TypeVar("K_contra", contravariant=True)
V_co = TypeVar("V_co", covariant=True)
_control_chars = regex.compile(r"\p{C}", flags=regex.VERSION1)


def is_true_iterable(s: Any) -> bool:
    return (
        s is not None
        and isinstance(s, Iterable)
        and not isinstance(s, str)
        and not isinstance(s, bytes | bytearray | memoryview)
    )


@dataclass(slots=True, frozen=True)
class StringUtils:
    def pretty_dict(self: Self, dct: Mapping[Any, Any]) -> str:
        """
        Returns a pretty-printed dict, complete with indentation. Will fail on non-JSON-serializable datatypes.
        """
        # return Pretty.condensed(dct)
        return orjson.dumps(dct, option=orjson.OPT_INDENT_2).decode(encoding="utf-8")

    def join_to_str(self: Self, *items: Any, last: str, sep: str = ", ") -> str:
        """
        Joins items to something like "cat, dog, and pigeon" or "cat, dog, or pigeon".

        Args:
            *items: Items to join; `str(item) for item in items` will be used
            last: Probably "and", "or", "and/or", or ""
                    Spaces are added/removed as needed if `suffix` is alphanumeric
                    or "and/or", after stripping whitespace off the ends.
            sep: Used to separate all words; include spaces as desired

        Examples:
            - `join_to_str(["cat", "dog", "elephant"], last="and")  # cat, dog, and elephant`
            - `join_to_str(["cat", "dog"], last="and")  # cat and dog`
            - `join_to_str(["cat", "dog", "elephant"], last="", sep="/")  # cat/dog/elephant`
        """

    def strip_control_chars(self: Self, s: str) -> str:
        """
        Strips all characters under the Unicode 'Cc' category.
        """
        return _control_chars.sub("", s)

    def roman_to_arabic(self: Self, roman: str, min_val: int | None = None, max_val: int | None = None) -> int:
        """
        Converts roman numerals to an integer.

        Args:
            roman: A string like "MCIV"
            min_val: Raise a ValueError if the parsed value is less than this
            max_val: Raise a ValueError if the parsed value is more than this

        Returns:
            The arabic numeral as a Python int
        """
        # this order is IMPORTANT!
        mp = {
            "IV": 4,
            "IX": 9,
            "XL": 40,
            "XC": 90,
            "CD": 400,
            "CM": 900,
            "I": 1,
            "V": 5,
            "X": 10,
            "L": 50,
            "C": 100,
            "D": 500,
            "M": 1000,
        }
        for k, v in mp.items():
            roman = roman.replace(k, str(v))
        # it'll just error if it's empty
        try:
            value = sum(int(num) for num in roman)
        except (ValueError, StopIteration):
            msg = f"Cannot parse roman numerals '{roman}'"
            raise ValueIllegalError(msg, value=roman)
        if min_val is not None and value < min_val or max_val is not None and value > max_val:
            msg = f"Value {roman} (int={value}) is out of range ({min_val}, {max_val})"
            raise ValueIllegalError(msg, value=roman)
        return value

    def tabs_to_list(self: Self, s: str) -> Sequence[str]:
        """
        Splits by tabs, but preserving quoted tabs, stripping quotes.
        In other words, will not split within a quoted substring.
        Double and single quotes are handled.
        """
        pat = re.compile(r"""((?:[^\t"']|"[^"]*"|'[^']*')+)""")

        # Don't strip double 2x quotes: ex ""55"" should be "55", not 55
        def strip(i: str) -> str:
            if i.endswith(('"', "'")):
                i = i[:-1]
            if i.startswith(('"', "'")):
                i = i[1:]
            return i.strip()

        return [strip(i) for i in pat.findall(s)]

    def truncate(self: Self, s: str | None, n: int = 40, *, null: str | None = None) -> str | None:
        """
        Truncates a string and adds ellipses, if needed.

        Returns a string if it has `n` or fewer characters;
        otherwise truncates to length `n-1` and appends `…` (UTF character).
        If `s` is None and `always_dots` is True, returns `n` copies of `.` (as a string).
        If `s` is None otherwise, returns None.

        Args:
            s: The string
            n: The maximum length, inclusive
            null: Replace `None` with this string

        Returns:
            A string or None
        """
        if s is None:
            return null
        if len(s) > n:
            nx = max(0, n - 1)
            return s[:nx] + "…"
        return s

    def strip_any_ends(self: Self, s: str, prefixes: str | Sequence[str], suffixes: str | Sequence[str]) -> str:
        """
        Flexible variant that strips any number of prefixes and any number of suffixes.
        Also less type-safe than more specific variants.
        Note that the order of the prefixes (or suffixes) DOES matter.
        """
        prefixes = [str(z) for z in prefixes] if is_true_iterable(prefixes) else [str(prefixes)]
        suffixes = [str(z) for z in suffixes] if is_true_iterable(suffixes) else [str(suffixes)]
        s = str(s)
        for pre in prefixes:
            if s.startswith(pre):
                s = s[len(pre) :]
        for suf in suffixes:
            if s.endswith(suf):
                s = s[: -len(suf)]
        return s

    def strip_brackets(self: Self, text: str) -> str:
        """
        Strips any and all pairs of brackets from start and end of a string, but only if they're paired.

        See Also:
             strip_paired
        """
        pieces = [
            "()",
            "[]",
            "[]",
            "{}",
            "<>",
            "⦗⦘",
            "⟨⟩",
            "⸨⸩",
            "⟦〛",
            "《》",
            "〘〙",
        ]
        return StringTools.strip_paired(text, pieces)

    def strip_quotes(self: Self, text: str) -> str:
        """
        Strips any and all pairs of quotes from start and end of a string, but only if they're paired.

        See Also:
            strip_paired
        """
        pieces = [
            "`",
            "`",
            "”“",
            "''",
            '""',
        ]
        return StringTools.strip_paired(text, pieces)

    def strip_brackets_and_quotes(self: Self, text: str) -> str:
        """
        Strips any and all pairs of brackets and quotes from start and end of a string, but only if they're paired.

        See Also:
            strip_paired
        """
        pieces = [
            "()",
            "[]",
            "[]",
            "{}",
            "<>",
            "⦗⦘",
            "⟨⟩",
            "⸨⸩",
            "⟦〛",
            "《》",
            "〘〙",
            "`",
            "`",
            "”“",
            "''",
            '""',
        ]
        return StringTools.strip_paired(text, pieces)

    def strip_paired(self: Self, text: str, pieces: Iterable[tuple[str, str] | str]) -> str:
        """
        Strips pairs of (start, end) from the ends of strings.

        Example:

            StringTools.strip_paired("[(abc]", [("()"), ("[]"))  # returns "(abc"

        See Also:
            [`strip_brackets`](pocketutils.tools.string_tools.StringUtils.strip_brackets)
        """
        if any(a for a in pieces if len(a) != 2):
            msg = f"Each item must be a string of length 2: (stard, end); got {pieces}"
            raise ValueIllegalError(msg, value=str(pieces))
        text = str(text)
        while len(text) > 0:
            yes = False
            for a, b in pieces:
                while text.startswith(a) and text.endswith(b):
                    text = text[1:-1]
                    yes = True
            if not yes:
                break
        return text

    def replace_digits_with_superscript_chars(self: Self, s: str | float) -> str:
        """
        Replaces digits, +, =, (, and ) with equivalent Unicode superscript chars (ex ¹).
        """
        return "".join(dict(zip("0123456789-+=()", "⁰¹²³⁴⁵⁶⁷⁸⁹⁻⁺⁼⁽⁾")).get(c, c) for c in s)

    def replace_digits_with_subscript_chars(self: Self, s: str | float) -> str:
        """
        Replaces digits, +, =, (, and ) with equivalent Unicode subscript chars (ex ₁).
        """
        return "".join(dict(zip("0123456789+-=()", "₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎")).get(c, c) for c in s)

    def replace_superscript_chars_with_digits(self: Self, s: str | float) -> str:
        """
        Replaces Unicode superscript digits, +, =, (, and ) with normal chars.
        """
        return "".join(dict(zip("⁰¹²³⁴⁵⁶⁷⁸⁹⁻⁺⁼⁽⁾", "0123456789-+=()")).get(c, c) for c in s)

    def replace_subscript_chars_with_digits(self: Self, s: str | float) -> str:
        """
        Replaces Unicode superscript digits, +, =, (, and ) with normal chars.
        """
        return "".join(dict(zip("₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎", "0123456789+-=()")).get(c, c) for c in s)

    def pretty_float(self: Self, v: float | int, n_sigfigs: int | None = 5) -> str:
        """
        Represents a float as a string, with symbols for NaN and infinity.
        The returned string always has a minus or + prepended. Strip off the plus with `.lstrip('+')`.
        If v is an integer (by isinstance), makes sure to display without a decimal point.
        If `n_sigfigs < 2`, will never have a

        For example:
            - StringTools.pretty_float(.2222222)       # '+0.22222'
            - StringTools.pretty_float(-.2222222)      # '-0.22222' (Unicode minus)
            - StringTools.pretty_float(-float('inf'))  # '-∞'
            - StringTools.pretty_float(np.NaN)         # 'NaN'
        """
        # TODO this seems absurdly long for what it does
        if n_sigfigs is None or n_sigfigs < 1:
            msg = f"Sigfigs of {n_sigfigs} is nonpositive"
            raise ValueOutOfRangeError(
                msg,
                value=n_sigfigs,
                minimum=1,
            )
        # first, handle NaN and infinities
        if v == float("-Inf"):
            return "-∞"
        if v == float("Inf"):
            return "+∞"
        elif not isinstance(v, str) and str(v) in ["nan", "na", "NaN"]:
            return "NaN"
        elif not isinstance(v, str) and str(v) == "NaT":
            return "NaT"
        # sweet. it's a regular float or int.
        if n_sigfigs is None:
            s = str(v).removesuffix(".0")
        else:
            # yes, this is weird. we need to convert from str to float then back to str
            s = str(float(str(("%." + str(n_sigfigs) + "g") % v)))
        # remove the .0 if the precision doesn't support it
        # if v >= 1 and n_sigfigs<2, it couldn't have a decimal
        # and if n_sigfigs<1, it definitely can't
        # and ... %g does this.
        if isinstance(v, int) or n_sigfigs is not None and n_sigfigs < 2:
            s = s.removesuffix(".0")
        # prepend + or - (unless 0)
        if float(s) == 0.0:
            return s
        s = s.replace("-", "-")
        if not s.startswith("-"):
            s = "+" + s[1:]
        if len(s) > 1 and s[1] == ".":
            s = s[0] + "0." + s[2:]
        return s

    def pretty_function(self: Self, function: Callable, *, with_addr: bool = False) -> str:
        n_args = str(function.__code__.co_argcount) if hasattr(function, "__code__") else "?"
        pat = re.compile(r"^<bound method [^ .]+\.([^ ]+) of (.+)>$")
        boundmatch = pat.fullmatch(str(function))
        addr = " @ " + hex(id(function)) if with_addr else ""
        # if isinstance(function, FunctionType):
        #    # simplify lambda functions!
        #    return "⟨" + "λ(" + n_args + ")" + addr + "⟩"
        if boundmatch is not None:
            # it's a method (bound function)
            # don't show the address of the instance AND its method
            pat = re.compile(r"@ ?0x[0-9a-hA-H]+\)?$")
            s = pat.sub("", boundmatch.group(2)).strip()
            return "⟨" + "`" + s + "`." + boundmatch.group(1) + "(" + n_args + ")" + addr + "⟩"
        elif callable(function):
            # it's an actual function
            name = function.__name__
            if name is None:
                return "⟨<fn>" + addr + "⟩"
            if name == "<lambda>":
                return "⟨λ" + addr + "⟩"
            return "⟨" + function.__name__ + addr + "⟩"
        msg = f"Wrong type {type(function)} for '{function}"
        raise ValueIllegalError(msg, value=type(function).__name__)

    def pretty_object(self: Self, thing: Any, *, with_addr: bool = False) -> str:
        """
        Get a better and shorter name for a function than str(function).
        Ex: `pprint_function(lambda s: s)  == '<λ>'`

        - Instead of '<bound method ...', you'll get '<name(nargs)>'
        - Instead of 'lambda ...', you'll get '<λ(nargs)>'
        - etc.

        Note:
          - If function is None, returns '⌀'
          - If function does not have __name__, returns prefix + type(function) + <address> + suffix
          - If it's a primitive, returns str(function)

        Args:
            thing: Can be anything, but especially useful for functions
            with_addr: Include `@ hex-mem-addr` in the name
        """
        addr = " @ " + hex(id(thing)) if with_addr else ""
        pat = re.compile(r"<([A-Za-z0-9_.<>]+)[ ']*object")
        objmatch = pat.search(str(thing))  # instance of global or local class
        if thing is None:
            return "⌀"
        if isinstance(thing, type):
            # it's a class
            return "⟨" + "type:" + thing.__name__ + "⟩"
        elif callable(thing):
            return self.pretty_function(thing, with_addr=with_addr)
        elif hasattr(thing, "__dict__") and len(thing.__dict__) > 0:
            # it's a member with attributes
            # it's interesting enough that it may have a good __str__
            # strip prefix and suffix because we'll re-add it
            s = str(thing).removeprefix("⟨").removesuffix("⟩")
            return "⟨" + s + addr + "⟩"
        elif objmatch is not None:
            # it's an instance without attributes
            s = objmatch.group(1)
            if "." in s:
                s = s[s.rindex(".") + 1 :]
            return "⟨" + s + addr + "⟩"
        # it's a primitive, etc
        return str(thing)

    def greek_chars_to_letter_names(self: Self) -> Mapping[str, str]:
        """
        Returns a dict from Greek lowercase+uppercase Unicode chars to their full names.
        """
        return dict(StringTools._greek_alphabet)

    def greek_letter_names_to_chars(self: Self) -> Mapping[str, str]:
        """
        Returns a dict from Greek lowercase+uppercase letter names to their Unicode chars.
        """
        return {v: k for k, v in StringTools._greek_alphabet.items()}

    def replace_greek_letter_names_with_chars(self: Self, s: str, lowercase: bool = False) -> str:
        """
        Replaces Greek letter names with their Unicode equivalents.
        Does this correctly by replacing superstrings before substrings.
        Ex: '1-beta' is '1-β' rather than '1-bη'
        If lowercase is True: Replaces Beta, BeTa, and BETA with β
        Else: Replaces Beta with a capital Greek Beta and ignores BETA and BeTa.
        """
        # Clever if I may say so:
        # If we just sort from longest to shortest, we can't replace substrings by accident
        # For example we'll replace 'beta' before 'eta', so '1-beta' won't become '1-bη'
        greek = sorted(
            [(v, k) for k, v in StringTools._greek_alphabet.items()],
            key=lambda t: -len(t[1]),
        )
        for k, v in greek:
            if k[0].isupper() and lowercase:
                continue
            s = re.compile(k | regex.IGNORECASE).sub(v, s) if lowercase else s.replace(k, v)
        return s

    def dict_to_compact_str(self: Self, seq: Mapping[K_contra, V_co], *, eq: str = "=", sep: str = ", ") -> str:
        return self.dict_to_str(seq, sep=sep, eq=eq)

    def dict_to_quote_str(self: Self, seq: Mapping[K_contra, V_co], *, eq: str = ": ", sep: str = "; ") -> str:
        return self.dict_to_str(seq, sep=sep, eq=eq, prefix="'", suffix="'")

    def dict_to_str(
        self: Self,
        seq: Mapping[K_contra, V_co],
        *,
        sep: str = "\t",
        eq: str = "=",
        prefix: str = "",
        suffix: str = "",
    ) -> str:
        """
        Joins dict elements into a str like 'a=1, b=2, c=3`.
        Won't break with ValueError if the keys or values aren't strs.

        Args:
            seq: Dict-like, with `items()`
            sep: Delimiter
            eq: Separates a key with its value
            prefix: Prepend before every key
            suffix: Append after every value
        """
        return sep.join([prefix + str(k) + eq + str(v) + suffix for k, v in seq.items()])

    _greek_alphabet = {
        "\u0391": "Alpha",
        "\u0392": "Beta",
        "\u0393": "Gamma",
        "\u0394": "Delta",
        "\u0395": "Epsilon",
        "\u0396": "Zeta",
        "\u0397": "Eta",
        "\u0398": "Theta",
        "\u0399": "Iota",
        "\u039A": "Kappa",
        "\u039B": "Lambda",
        "\u039C": "Mu",
        "\u039D": "Nu",
        "\u039E": "Xi",
        "\u039F": "Omicron",
        "\u03A0": "Pi",
        "\u03A1": "Rho",
        "\u03A3": "Sigma",
        "\u03A4": "Tau",
        "\u03A5": "Upsilon",
        "\u03A6": "Phi",
        "\u03A7": "Chi",
        "\u03A8": "Psi",
        "\u03A9": "Omega",
        "\u03B1": "alpha",
        "\u03B2": "beta",
        "\u03B3": "gamma",
        "\u03B4": "delta",
        "\u03B5": "epsilon",
        "\u03B6": "zeta",
        "\u03B7": "eta",
        "\u03B8": "theta",
        "\u03B9": "iota",
        "\u03BA": "kappa",
        "\u03BB": "lambda",
        "\u03BC": "mu",
        "\u03BD": "nu",
        "\u03BE": "xi",
        "\u03BF": "omicron",
        "\u03C0": "pi",
        "\u03C1": "rho",
        "\u03C3": "sigma",
        "\u03C4": "tau",
        "\u03C5": "upsilon",
        "\u03C6": "phi",
        "\u03C7": "chi",
        "\u03C8": "psi",
        "\u03C9": "omega",
    }


StringTools = StringUtils()


1			# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to pocketutils
2			# SPDX-PackageHomePage: https://github.com/dmyersturnbull/pocketutils
3			# SPDX-License-Identifier: Apache-2.0
4			"""
5
6			"""
7
8			import re
9			from collections.abc import Callable, Iterable, Mapping, Sequence
10			from dataclasses import dataclass
11			from typing import Any, Self, TypeVar
12
13			import orjson
14			import regex
15
16			from pocketutils.core.exceptions import ValueIllegalError, ValueOutOfRangeError
17
18			__all__ = ["StringUtils", "StringTools"]
19
20			K_contra = TypeVar("K_contra", contravariant=True)
21			V_co = TypeVar("V_co", covariant=True)
22			_control_chars = regex.compile(r"\p{C}", flags=regex.VERSION1)
23
24
25			def is_true_iterable(s: Any) -> bool:
26			return (
27			s is not None
28			and isinstance(s, Iterable)
29			and not isinstance(s, str)
30			and not isinstance(s, bytes \| bytearray \| memoryview)
31			)
32
33
34			@dataclass(slots=True, frozen=True)
35			class StringUtils:
36			def pretty_dict(self: Self, dct: Mapping[Any, Any]) -> str:
37			"""
38			Returns a pretty-printed dict, complete with indentation. Will fail on non-JSON-serializable datatypes.
39			"""
40			# return Pretty.condensed(dct)
41			return orjson.dumps(dct, option=orjson.OPT_INDENT_2).decode(encoding="utf-8")
42
43			def join_to_str(self: Self, *items: Any, last: str, sep: str = ", ") -> str:
44			"""
45			Joins items to something like "cat, dog, and pigeon" or "cat, dog, or pigeon".
46
47			Args:
48			*items: Items to join; `str(item) for item in items` will be used
49			last: Probably "and", "or", "and/or", or ""
50			Spaces are added/removed as needed if `suffix` is alphanumeric
51			or "and/or", after stripping whitespace off the ends.
52			sep: Used to separate all words; include spaces as desired
53
54			Examples:
55			- `join_to_str(["cat", "dog", "elephant"], last="and") # cat, dog, and elephant`
56			- `join_to_str(["cat", "dog"], last="and") # cat and dog`
57			- `join_to_str(["cat", "dog", "elephant"], last="", sep="/") # cat/dog/elephant`
58			"""
59
60			def strip_control_chars(self: Self, s: str) -> str:
61			"""
62			Strips all characters under the Unicode 'Cc' category.
63			"""
64			return _control_chars.sub("", s)
65
66			def roman_to_arabic(self: Self, roman: str, min_val: int \| None = None, max_val: int \| None = None) -> int:
67			"""
68			Converts roman numerals to an integer.
69
70			Args:
71			roman: A string like "MCIV"
72			min_val: Raise a ValueError if the parsed value is less than this
73			max_val: Raise a ValueError if the parsed value is more than this
74
75			Returns:
76			The arabic numeral as a Python int
77			"""
78			# this order is IMPORTANT!
79			mp = {
80			"IV": 4,
81			"IX": 9,
82			"XL": 40,
83			"XC": 90,
84			"CD": 400,
85			"CM": 900,
86			"I": 1,
87			"V": 5,
88			"X": 10,
89			"L": 50,
90			"C": 100,
91			"D": 500,
92			"M": 1000,
93			}
94			for k, v in mp.items():
95			roman = roman.replace(k, str(v))
96			# it'll just error if it's empty
97			try:
98			value = sum(int(num) for num in roman)
99			except (ValueError, StopIteration):
100			msg = f"Cannot parse roman numerals '{roman}'"
101			raise ValueIllegalError(msg, value=roman)
102			if min_val is not None and value < min_val or max_val is not None and value > max_val:
103			msg = f"Value {roman} (int={value}) is out of range ({min_val}, {max_val})"
104			raise ValueIllegalError(msg, value=roman)
105			return value
106
107			def tabs_to_list(self: Self, s: str) -> Sequence[str]:
108			"""
109			Splits by tabs, but preserving quoted tabs, stripping quotes.
110			In other words, will not split within a quoted substring.
111			Double and single quotes are handled.
112			"""
113			pat = re.compile(r"""((?:[^\t"']\|"[^"]"\|'[^']')+)""")
114
115			# Don't strip double 2x quotes: ex ""55"" should be "55", not 55
116			def strip(i: str) -> str:
117			if i.endswith(('"', "'")):
118			i = i[:-1]
119			if i.startswith(('"', "'")):
120			i = i[1:]
121			return i.strip()
122
123			return [strip(i) for i in pat.findall(s)]
124
125			def truncate(self: Self, s: str \| None, n: int = 40, *, null: str \| None = None) -> str \| None:
126			"""
127			Truncates a string and adds ellipses, if needed.
128
129			Returns a string if it has `n` or fewer characters;
130			otherwise truncates to length `n-1` and appends `…` (UTF character).
131			If `s` is None and `always_dots` is True, returns `n` copies of `.` (as a string).
132			If `s` is None otherwise, returns None.
133
134			Args:
135			s: The string
136			n: The maximum length, inclusive
137			null: Replace `None` with this string
138
139			Returns:
140			A string or None
141			"""
142			if s is None:
143			return null
144			if len(s) > n:
145			nx = max(0, n - 1)
146			return s[:nx] + "…"
147			return s
148
149			def strip_any_ends(self: Self, s: str, prefixes: str \| Sequence[str], suffixes: str \| Sequence[str]) -> str:
150			"""
151			Flexible variant that strips any number of prefixes and any number of suffixes.
152			Also less type-safe than more specific variants.
153			Note that the order of the prefixes (or suffixes) DOES matter.
154			"""
155			prefixes = [str(z) for z in prefixes] if is_true_iterable(prefixes) else [str(prefixes)]
156			suffixes = [str(z) for z in suffixes] if is_true_iterable(suffixes) else [str(suffixes)]
157			s = str(s)
158			for pre in prefixes:
159			if s.startswith(pre):
160			s = s[len(pre) :]
161			for suf in suffixes:
162			if s.endswith(suf):
163			s = s[: -len(suf)]
164			return s
165
166			def strip_brackets(self: Self, text: str) -> str:
167			"""
168			Strips any and all pairs of brackets from start and end of a string, but only if they're paired.
169
170			See Also:
171			strip_paired
172			"""
173			pieces = [
174			"()",
175			"[]",
176			"[]",
177			"{}",
178			"<>",
179			"⦗⦘",
180			"⟨⟩",
181			"⸨⸩",
182			"⟦〛",
183			"《》",
184			"〘〙",
185			]
186			return StringTools.strip_paired(text, pieces)
187
188			def strip_quotes(self: Self, text: str) -> str:
189			"""
190			Strips any and all pairs of quotes from start and end of a string, but only if they're paired.
191
192			See Also:
193			strip_paired
194			"""
195			pieces = [
196			"`",
197			"`",
198			"”“",
199			"''",
200			'""',
201			]
202			return StringTools.strip_paired(text, pieces)
203
204			def strip_brackets_and_quotes(self: Self, text: str) -> str:
205			"""
206			Strips any and all pairs of brackets and quotes from start and end of a string, but only if they're paired.
207
208			See Also:
209			strip_paired
210			"""
211			pieces = [
212			"()",
213			"[]",
214			"[]",
215			"{}",
216			"<>",
217			"⦗⦘",
218			"⟨⟩",
219			"⸨⸩",
220			"⟦〛",
221			"《》",
222			"〘〙",
223			"`",
224			"`",
225			"”“",
226			"''",
227			'""',
228			]
229			return StringTools.strip_paired(text, pieces)
230
231			def strip_paired(self: Self, text: str, pieces: Iterable[tuple[str, str] \| str]) -> str:
232			"""
233			Strips pairs of (start, end) from the ends of strings.
234
235			Example:
236
237			StringTools.strip_paired("[(abc]", [("()"), ("[]")) # returns "(abc"
238
239			See Also:
240			[`strip_brackets`](pocketutils.tools.string_tools.StringUtils.strip_brackets)
241			"""
242			if any(a for a in pieces if len(a) != 2):
243			msg = f"Each item must be a string of length 2: (stard, end); got {pieces}"
244			raise ValueIllegalError(msg, value=str(pieces))
245			text = str(text)
246			while len(text) > 0:
247			yes = False
248			for a, b in pieces:
249			while text.startswith(a) and text.endswith(b):
250			text = text[1:-1]
251			yes = True
252			if not yes:
253			break
254			return text
255
256			def replace_digits_with_superscript_chars(self: Self, s: str \| float) -> str:
257			"""
258			Replaces digits, +, =, (, and ) with equivalent Unicode superscript chars (ex ¹).
259			"""
260			return "".join(dict(zip("0123456789-+=()", "⁰¹²³⁴⁵⁶⁷⁸⁹⁻⁺⁼⁽⁾")).get(c, c) for c in s)
261
262			def replace_digits_with_subscript_chars(self: Self, s: str \| float) -> str:
263			"""
264			Replaces digits, +, =, (, and ) with equivalent Unicode subscript chars (ex ₁).
265			"""
266			return "".join(dict(zip("0123456789+-=()", "₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎")).get(c, c) for c in s)
267
268			def replace_superscript_chars_with_digits(self: Self, s: str \| float) -> str:
269			"""
270			Replaces Unicode superscript digits, +, =, (, and ) with normal chars.
271			"""
272			return "".join(dict(zip("⁰¹²³⁴⁵⁶⁷⁸⁹⁻⁺⁼⁽⁾", "0123456789-+=()")).get(c, c) for c in s)
273
274			def replace_subscript_chars_with_digits(self: Self, s: str \| float) -> str:
275			"""
276			Replaces Unicode superscript digits, +, =, (, and ) with normal chars.
277			"""
278			return "".join(dict(zip("₀₁₂₃₄₅₆₇₈₉₊₋₌₍₎", "0123456789+-=()")).get(c, c) for c in s)
279
280			def pretty_float(self: Self, v: float \| int, n_sigfigs: int \| None = 5) -> str:
281			"""
282			Represents a float as a string, with symbols for NaN and infinity.
283			The returned string always has a minus or + prepended. Strip off the plus with `.lstrip('+')`.
284			If v is an integer (by isinstance), makes sure to display without a decimal point.
285			If `n_sigfigs < 2`, will never have a
286
287			For example:
288			- StringTools.pretty_float(.2222222) # '+0.22222'
289			- StringTools.pretty_float(-.2222222) # '-0.22222' (Unicode minus)
290			- StringTools.pretty_float(-float('inf')) # '-∞'
291			- StringTools.pretty_float(np.NaN) # 'NaN'
292			"""
293			# TODO this seems absurdly long for what it does
294			if n_sigfigs is None or n_sigfigs < 1:
295			msg = f"Sigfigs of {n_sigfigs} is nonpositive"
296			raise ValueOutOfRangeError(
297			msg,
298			value=n_sigfigs,
299			minimum=1,
300			)
301			# first, handle NaN and infinities
302			if v == float("-Inf"):
303			return "-∞"
304			if v == float("Inf"):
305			return "+∞"
306			elif not isinstance(v, str) and str(v) in ["nan", "na", "NaN"]:
307			return "NaN"
308			elif not isinstance(v, str) and str(v) == "NaT":
309			return "NaT"
310			# sweet. it's a regular float or int.
311			if n_sigfigs is None:
312			s = str(v).removesuffix(".0")
313			else:
314			# yes, this is weird. we need to convert from str to float then back to str
315			s = str(float(str(("%." + str(n_sigfigs) + "g") % v)))
316			# remove the .0 if the precision doesn't support it
317			# if v >= 1 and n_sigfigs<2, it couldn't have a decimal
318			# and if n_sigfigs<1, it definitely can't
319			# and ... %g does this.
320			if isinstance(v, int) or n_sigfigs is not None and n_sigfigs < 2:
321			s = s.removesuffix(".0")
322			# prepend + or - (unless 0)
323			if float(s) == 0.0:
324			return s
325			s = s.replace("-", "-")
326			if not s.startswith("-"):
327			s = "+" + s[1:]
328			if len(s) > 1 and s[1] == ".":
329			s = s[0] + "0." + s[2:]
330			return s
331
332			def pretty_function(self: Self, function: Callable, *, with_addr: bool = False) -> str:
333			n_args = str(function.__code__.co_argcount) if hasattr(function, "__code__") else "?"
334			pat = re.compile(r"^<bound method [^ .]+\.([^ ]+) of (.+)>$")
335			boundmatch = pat.fullmatch(str(function))
336			addr = " @ " + hex(id(function)) if with_addr else ""
337			# if isinstance(function, FunctionType):
338			# # simplify lambda functions!
339			# return "⟨" + "λ(" + n_args + ")" + addr + "⟩"
340			if boundmatch is not None:
341			# it's a method (bound function)
342			# don't show the address of the instance AND its method
343			pat = re.compile(r"@ ?0x[0-9a-hA-H]+\)?$")
344			s = pat.sub("", boundmatch.group(2)).strip()
345			return "⟨" + "`" + s + "`." + boundmatch.group(1) + "(" + n_args + ")" + addr + "⟩"
346			elif callable(function):
347			# it's an actual function
348			name = function.__name__
349			if name is None:
350			return "⟨<fn>" + addr + "⟩"
351			if name == "<lambda>":
352			return "⟨λ" + addr + "⟩"
353			return "⟨" + function.__name__ + addr + "⟩"
354			msg = f"Wrong type {type(function)} for '{function}"
355			raise ValueIllegalError(msg, value=type(function).__name__)
356
357			def pretty_object(self: Self, thing: Any, *, with_addr: bool = False) -> str:
358			"""
359			Get a better and shorter name for a function than str(function).
360			Ex: `pprint_function(lambda s: s) == '<λ>'`
361
362			- Instead of '<bound method ...', you'll get '<name(nargs)>'
363			- Instead of 'lambda ...', you'll get '<λ(nargs)>'
364			- etc.
365
366			Note:
367			- If function is None, returns '⌀'
368			- If function does not have __name__, returns prefix + type(function) + <address> + suffix
369			- If it's a primitive, returns str(function)
370
371			Args:
372			thing: Can be anything, but especially useful for functions
373			with_addr: Include `@ hex-mem-addr` in the name
374			"""
375			addr = " @ " + hex(id(thing)) if with_addr else ""
376			pat = re.compile(r"<([A-Za-z0-9_.<>]+)[ ']*object")
377			objmatch = pat.search(str(thing)) # instance of global or local class
378			if thing is None:
379			return "⌀"
380			if isinstance(thing, type):
381			# it's a class
382			return "⟨" + "type:" + thing.__name__ + "⟩"
383			elif callable(thing):
384			return self.pretty_function(thing, with_addr=with_addr)
385			elif hasattr(thing, "__dict__") and len(thing.__dict__) > 0:
386			# it's a member with attributes
387			# it's interesting enough that it may have a good __str__
388			# strip prefix and suffix because we'll re-add it
389			s = str(thing).removeprefix("⟨").removesuffix("⟩")
390			return "⟨" + s + addr + "⟩"
391			elif objmatch is not None:
392			# it's an instance without attributes
393			s = objmatch.group(1)
394			if "." in s:
395			s = s[s.rindex(".") + 1 :]
396			return "⟨" + s + addr + "⟩"
397			# it's a primitive, etc
398			return str(thing)
399
400			def greek_chars_to_letter_names(self: Self) -> Mapping[str, str]:
401			"""
402			Returns a dict from Greek lowercase+uppercase Unicode chars to their full names.
403			"""
404			return dict(StringTools._greek_alphabet)
405
406			def greek_letter_names_to_chars(self: Self) -> Mapping[str, str]:
407			"""
408			Returns a dict from Greek lowercase+uppercase letter names to their Unicode chars.
409			"""
410			return {v: k for k, v in StringTools._greek_alphabet.items()}
411
412			def replace_greek_letter_names_with_chars(self: Self, s: str, lowercase: bool = False) -> str:
413			"""
414			Replaces Greek letter names with their Unicode equivalents.
415			Does this correctly by replacing superstrings before substrings.
416			Ex: '1-beta' is '1-β' rather than '1-bη'
417			If lowercase is True: Replaces Beta, BeTa, and BETA with β
418			Else: Replaces Beta with a capital Greek Beta and ignores BETA and BeTa.
419			"""
420			# Clever if I may say so:
421			# If we just sort from longest to shortest, we can't replace substrings by accident
422			# For example we'll replace 'beta' before 'eta', so '1-beta' won't become '1-bη'
423			greek = sorted(
424			[(v, k) for k, v in StringTools._greek_alphabet.items()],
425			key=lambda t: -len(t[1]),
426			)
427			for k, v in greek:
428			if k[0].isupper() and lowercase:
429			continue
430			s = re.compile(k \| regex.IGNORECASE).sub(v, s) if lowercase else s.replace(k, v)
431			return s
432
433			def dict_to_compact_str(self: Self, seq: Mapping[K_contra, V_co], *, eq: str = "=", sep: str = ", ") -> str:
434			return self.dict_to_str(seq, sep=sep, eq=eq)
435
436			def dict_to_quote_str(self: Self, seq: Mapping[K_contra, V_co], *, eq: str = ": ", sep: str = "; ") -> str:
437			return self.dict_to_str(seq, sep=sep, eq=eq, prefix="'", suffix="'")
438
439			def dict_to_str(
440			self: Self,
441			seq: Mapping[K_contra, V_co],
442			*,
443			sep: str = "\t",
444			eq: str = "=",
445			prefix: str = "",
446			suffix: str = "",
447			) -> str:
448			"""
449			Joins dict elements into a str like 'a=1, b=2, c=3`.
450			Won't break with ValueError if the keys or values aren't strs.
451
452			Args:
453			seq: Dict-like, with `items()`
454			sep: Delimiter
455			eq: Separates a key with its value
456			prefix: Prepend before every key
457			suffix: Append after every value
458			"""
459			return sep.join([prefix + str(k) + eq + str(v) + suffix for k, v in seq.items()])
460
461			_greek_alphabet = {
462			"\u0391": "Alpha",
463			"\u0392": "Beta",
464			"\u0393": "Gamma",
465			"\u0394": "Delta",
466			"\u0395": "Epsilon",
467			"\u0396": "Zeta",
468			"\u0397": "Eta",
469			"\u0398": "Theta",
470			"\u0399": "Iota",
471			"\u039A": "Kappa",
472			"\u039B": "Lambda",
473			"\u039C": "Mu",
474			"\u039D": "Nu",
475			"\u039E": "Xi",
476			"\u039F": "Omicron",
477			"\u03A0": "Pi",
478			"\u03A1": "Rho",
479			"\u03A3": "Sigma",
480			"\u03A4": "Tau",
481			"\u03A5": "Upsilon",
482			"\u03A6": "Phi",
483			"\u03A7": "Chi",
484			"\u03A8": "Psi",
485			"\u03A9": "Omega",
486			"\u03B1": "alpha",
487			"\u03B2": "beta",
488			"\u03B3": "gamma",
489			"\u03B4": "delta",
490			"\u03B5": "epsilon",
491			"\u03B6": "zeta",
492			"\u03B7": "eta",
493			"\u03B8": "theta",
494			"\u03B9": "iota",
495			"\u03BA": "kappa",
496			"\u03BB": "lambda",
497			"\u03BC": "mu",
498			"\u03BD": "nu",
499			"\u03BE": "xi",
500			"\u03BF": "omicron",
501			"\u03C0": "pi",
502			"\u03C1": "rho",
503			"\u03C3": "sigma",
504			"\u03C4": "tau",
505			"\u03C5": "upsilon",
506			"\u03C6": "phi",
507			"\u03C7": "chi",
508			"\u03C8": "psi",
509			"\u03C9": "omega",
510			}
511
512
513			StringTools = StringUtils()
514

dmyersturnbull / pocketutils

StringUtils.roman_to_arabic() B last analyzed 2024-01-16 02:11 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

StringUtils.roman_to_arabic() B
last analyzed 2024-01-16 02:11 UTC