apoor.fdir() - Code Metrics - Inspection of "Update __init__.py" - a-poor/apoor - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( ead7bc...45a1f9 )

by Austin

created 2020-09-24 13:17 UTC

apoor.fdir() A

↳ Parent: apoor

Complexity

Conditions

Size

Total Lines	6
Code Lines	4

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	4
dl	0
loc	6
rs	10
c	0
b	0
f	0
cc	2
nop	1

"""A small personal package created to store code and data I often reuse. 


I'll continue to update it with useful functions that I find myself reusing. The `apoor.data` module has some common datasets and functions for reading them in as pandas DataFrames.

"""

# Version string
__version__ = "1.1.3"



import itertools as it
import numpy as np
from . import data



def fdir(o=None):

    """Same as builtin dir() without private attributes.    

    """
    if o is None: d = dir()

    else: d = dir(o)

    return [a for a in d if a[0] != "_"]
    





def set_seed(n:int):

    """Sets numpy's random seed.

    Args:
        n (int): The value used to set numpy's random seed.
    """
    np.random.seed(n)


def make_scale(dmin:float,dmax:float,rmin:float,rmax:float,clamp:bool=False):

    """Scale function factory.
    

    Creates a scale function to map a number from a domain to a range.

    Args:
        dmin (float): Domain's start value
        dmax (float): Domain's end value
        rmin (float): Range's start value
        rmax (float): Range's end value
        clamp (bool): If the result is outside the range, return clamped value (default: False)
    Returns:
        A scale function taking one numeric argument and returns the value mapped from the domain to the range (and clamped if `clamp` flag is set).


        For example:
        

        >>> s = make_scale(0,1,0,10)
        >>> s(0.1)
        1.0

        >>> s = make_scale(0,10,10,0)
        >>> s(1.0)
        9.0

        >>> s = make_scale(0,1,0,1,clamp=True)
        >>> s(100)
        1.0
    """
    drange = dmax - dmin
    rrange = rmax - rmin
    scale_factor = rrange / drange
    def scale(n):

        n_ = (n - dmin) * scale_factor + rmin

        if clamp: return min(max(n_,rmin),rmax)

        else: return n_ 

    return scale
    


def train_test_split(*arrays,test_pct:float=0.15,val_set:bool=False,val_pct:float=0.15):

    """Splits arrays into train & test sets.

    Splits arrays into train, test, and (optionally) validation sets using the supplied percentages.

    Args:
        *arrays: 

            An arbitrary number of sequences to be split
            into train, test, and (optionally) validation 

            sets. Must have at least one array.

        test_pct: 

            Float of the range [0,1]
            Percent of total n values to include in test set.
            

            The train set will have `1.0 - test_pct` pct of
            values (or `1.0 - test_pct - val_pct` pct of values
            if `val_set == True`).

        val_set:  

            Whether or not to return a validation set,
            in addition to a test set.

        val_pct: 

            float of the range [0,1]
            Percent of total n values to include in test set.
            

            Ignored if `val_set == False`.
            

            The train set will have `1.0 - test_pct - val_pct` 

            pct of values.

    Returns:
        splits: tuple of numpy arrays
        Input arrays split into train, test, val sets.
        

        If `val_set == False`, `len(splits) == 2 * len(arrays)`,
        or if `val_set == True`, `len(splits) == 3 * len(arrays)`.

        For example:
        >>> x = np.arange(10)
        >>> train_test_split(x)
        (array([3, 9, 4, 2, 1, 0, 7, 5, 8]), array([6]))

        >>> x = np.arange(10)
        >>> y = x[::-1]
        >>> x_train, x_test, y_train, y_test = train_test_split(x,y)
        >>> x_train, x_test, y_train, y_test
        (array([1, 3, 5, 8, 4, 7, 6, 9]),
         array([0, 2]),
         array([8, 6, 4, 1, 5, 2, 3, 0]),
         array([9, 7]))

        >>> train_test_split(x,test_pct=0.3,val_set=True,val_pct=0.2)
        (array([0, 9, 5, 7, 6, 2, 8]), 

         array([1, 3, 4]), 

         array([3, 4]))

    """
    # Perform input checks
    assert arrays, "No arrays supplied"
    lens = [len(a) for a in arrays]
    assert len(set(lens)) == 1, "arrays have varying lengths"
    assert lens[0] > 0, "supplied arrays have `len == 0`"
    if val_set:
        assert 0.0 <= test_pct <= 1.0, "`test_pct` must be in the range `0.0 <= test_pct <= 1.0`"
        assert 0.0 <= val_pct <= 1.0, "`val_pct` must be in the range `0.0 <= val_pct <= 1.0`"
        assert test_pct + val_pct <= 1.0, "Can't have `test_pc + val_pct >= 1.0`"
    else:
        assert 0.0 <= test_pct <= 1.0, "`test_pct` must be in the range `0.0 <= test_pct <= 1.0`"
        assert test_pct <= 1.0, "Can't have `test_pc >= 1.0`"
    # Calculate lengths
    n = lens[0]

    n_test = int(n * test_pct)
    # Shuffle the indexes
    indexes = np.arange(n)
    np.random.shuffle(indexes)
    # Split the data
    if val_set:
        n_val = int(n * val_pct)
        n_train = n - n_test - n_val
        splits = (
            (
                a[indexes[:n_train]], 

                a[indexes[n_train:n_train+n_test]], 

                a[indexes[-n_val:]]
            )
            for a in map(np.asarray,arrays)

        )
    else:
        n_train = n - n_test
        splits = (
            (a[indexes[:n_train]], a[indexes[n_train:]])
            for a in map(np.asarray,arrays)

        )
    return tuple(it.chain(*splits))




1			"""A small personal package created to store code and data I often reuse.
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
2
3			I'll continue to update it with useful functions that I find myself reusing. The `apoor.data` module has some common datasets and functions for reading them in as pandas DataFrames.
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (181/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
4			"""
5
6			# Version string
7			__version__ = "1.1.3"
8
9
10
11			import itertools as it
12			import numpy as np
13			from . import data
14
15
16
17			def fdir(o=None):
			0 ignored issues – show Coding Style Naming introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Argument name "o" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
18			"""Same as builtin dir() without private attributes.
			0 ignored issues – show Coding Style introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
19			"""
20			if o is None: d = dir()
			0 ignored issues – show Coding Style introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report More than one statement on a single line Loading history... Coding Style Naming introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
21			else: d = dir(o)
			0 ignored issues – show Coding Style Naming introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Variable name "d" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
22			return [a for a in d if a[0] != "_"]
23
			0 ignored issues – show Coding Style introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
24
25
26
27
28			def set_seed(n:int):
			0 ignored issues – show Coding Style introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Exactly one space required after : Loading history... Coding Style Naming introduced 2020-09-24 13:21 UTC by Report Bug Copy Issue Report Argument name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
29			"""Sets numpy's random seed.
30
31			Args:
32			n (int): The value used to set numpy's random seed.
33			"""
34			np.random.seed(n)
35
36
37			def make_scale(dmin:float,dmax:float,rmin:float,rmax:float,clamp:bool=False):
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required after : Loading history... Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required after comma Loading history... Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required around keyword argument assignment Loading history...
38			"""Scale function factory.
39
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
40			Creates a scale function to map a number from a domain to a range.
41
42			Args:
43			dmin (float): Domain's start value
44			dmax (float): Domain's end value
45			rmin (float): Range's start value
46			rmax (float): Range's end value
47			clamp (bool): If the result is outside the range, return clamped value (default: False)
48			Returns:
49			A scale function taking one numeric argument and returns the value mapped from the domain to the range (and clamped if `clamp` flag is set).
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (148/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
50
51			For example:
52
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
53			>>> s = make_scale(0,1,0,10)
54			>>> s(0.1)
55			1.0
56
57			>>> s = make_scale(0,10,10,0)
58			>>> s(1.0)
59			9.0
60
61			>>> s = make_scale(0,1,0,1,clamp=True)
62			>>> s(100)
63			1.0
64			"""
65			drange = dmax - dmin
66			rrange = rmax - rmin
67			scale_factor = rrange / drange
68			def scale(n):
			0 ignored issues – show Coding Style Naming introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Argument name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
69			n_ = (n - dmin) * scale_factor + rmin
			0 ignored issues – show Coding Style Naming introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Variable name "n_" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
70			if clamp: return min(max(n_,rmin),rmax)
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Exactly one space required after comma Loading history... unused-code introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Unnecessary "else" after "return" Loading history... Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report More than one statement on a single line Loading history...
71			else: return n_
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
72			return scale
73
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
74
75			def train_test_split(*arrays,test_pct:float=0.15,val_set:bool=False,val_pct:float=0.15):
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required after comma Loading history... Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required after : Loading history... Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Exactly one space required around keyword argument assignment Loading history...
76			"""Splits arrays into train & test sets.
77
78			Splits arrays into train, test, and (optionally) validation sets using the supplied percentages.
79
80			Args:
81			*arrays:
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
82			An arbitrary number of sequences to be split
83			into train, test, and (optionally) validation
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
84			sets. Must have at least one array.
85
86			test_pct:
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
87			Float of the range [0,1]
88			Percent of total n values to include in test set.
89
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
90			The train set will have `1.0 - test_pct` pct of
91			values (or `1.0 - test_pct - val_pct` pct of values
92			if `val_set == True`).
93
94			val_set:
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
95			Whether or not to return a validation set,
96			in addition to a test set.
97
98			val_pct:
			0 ignored issues – show Coding Style introduced 2020-08-20 19:50 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
99			float of the range [0,1]
100			Percent of total n values to include in test set.
101
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
102			Ignored if `val_set == False`.
103
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
104			The train set will have `1.0 - test_pct - val_pct`
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
105			pct of values.
106
107			Returns:
108			splits: tuple of numpy arrays
109			Input arrays split into train, test, val sets.
110
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
111			If `val_set == False`, `len(splits) == 2 * len(arrays)`,
112			or if `val_set == True`, `len(splits) == 3 * len(arrays)`.
113
114			For example:
115			>>> x = np.arange(10)
116			>>> train_test_split(x)
117			(array([3, 9, 4, 2, 1, 0, 7, 5, 8]), array([6]))
118
119			>>> x = np.arange(10)
120			>>> y = x[::-1]
121			>>> x_train, x_test, y_train, y_test = train_test_split(x,y)
122			>>> x_train, x_test, y_train, y_test
123			(array([1, 3, 5, 8, 4, 7, 6, 9]),
124			array([0, 2]),
125			array([8, 6, 4, 1, 5, 2, 3, 0]),
126			array([9, 7]))
127
128			>>> train_test_split(x,test_pct=0.3,val_set=True,val_pct=0.2)
129			(array([0, 9, 5, 7, 6, 2, 8]),
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
130			array([1, 3, 4]),
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
131			array([3, 4]))
132
133			"""
134			# Perform input checks
135			assert arrays, "No arrays supplied"
136			lens = [len(a) for a in arrays]
137			assert len(set(lens)) == 1, "arrays have varying lengths"
138			assert lens[0] > 0, "supplied arrays have `len == 0`"
139			if val_set:
140			assert 0.0 <= test_pct <= 1.0, "`test_pct` must be in the range `0.0 <= test_pct <= 1.0`"
141			assert 0.0 <= val_pct <= 1.0, "`val_pct` must be in the range `0.0 <= val_pct <= 1.0`"
142			assert test_pct + val_pct <= 1.0, "Can't have `test_pc + val_pct >= 1.0`"
143			else:
144			assert 0.0 <= test_pct <= 1.0, "`test_pct` must be in the range `0.0 <= test_pct <= 1.0`"
145			assert test_pct <= 1.0, "Can't have `test_pc >= 1.0`"
146			# Calculate lengths
147			n = lens[0]
			0 ignored issues – show Coding Style Naming introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Variable name "n" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
148			n_test = int(n * test_pct)
149			# Shuffle the indexes
150			indexes = np.arange(n)
151			np.random.shuffle(indexes)
152			# Split the data
153			if val_set:
154			n_val = int(n * val_pct)
155			n_train = n - n_test - n_val
156			splits = (
157			(
158			a[indexes[:n_train]],
			0 ignored issues – show Comprehensibility Best Practice introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report The variable `a` does not seem to be defined. Loading history... Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
159			a[indexes[n_train:n_train+n_test]],
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing whitespace Loading history...
160			a[indexes[-n_val:]]
161			)
162			for a in map(np.asarray,arrays)
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Exactly one space required after comma Loading history...
163			)
164			else:
165			n_train = n - n_test
166			splits = (
167			(a[indexes[:n_train]], a[indexes[n_train:]])
168			for a in map(np.asarray,arrays)
			0 ignored issues – show Coding Style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Exactly one space required after comma Loading history...
169			)
170			return tuple(it.chain(*splits))
171
			0 ignored issues – show coding-style introduced 2020-08-20 18:55 UTC by Report Bug Copy Issue Report Trailing newlines Loading history...
172

a-poor / apoor

Push — master ( ead7bc...45a1f9 )

apoor.fdir() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like