tabpy.models.scripts.PCA.PCA() - Code Metrics - tableau/TabPy - Measure and Improve Code Quality continuously with Scrutinizer

tabpy.models.scripts.PCA.PCA() B
last analyzed 2024-11-25 22:00 UTC

↳ Parent: tabpy.models.scripts.PCA

Complexity

Conditions

Size

Total Lines	47
Code Lines	32

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	32
dl	0
loc	47
rs	7.2453
c	0
b	0
f	0
cc	8
nop	4

import pandas as pd
from numpy import array
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from tabpy.models.utils import setup_utils


def PCA(component, _arg1, _arg2, *_argN):
    """
    Principal Component Analysis is a technique that extracts the key
    distinct components from a high dimensional space whie attempting
    to capture as much of the variance as possible. For more information
    on the function and how to use it please refer to tabpy-tools.md
    """
    cols = [_arg1, _arg2] + list(_argN)
    encodedCols = []
    labelEncoder = LabelEncoder()
    oneHotEncoder = OneHotEncoder(categories="auto", sparse=False)

    for col in cols:
        if isinstance(col[0], (int, float)):
            encodedCols.append(col)
        elif type(col[0]) is bool:
            intCol = array(col)
            encodedCols.append(intCol.astype(int))
        else:
            if len(set(col)) > 25:
                print(
                    "ERROR: Non-numeric arguments cannot have more than "
                    "25 unique values"
                )
                raise ValueError
            integerEncoded = labelEncoder.fit_transform(array(col))
            integerEncoded = integerEncoded.reshape(len(col), 1)
            oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
            transformedMatrix = oneHotEncoded.transpose()
            encodedCols += list(transformedMatrix)

    dataDict = {}
    for i in range(len(encodedCols)):
        dataDict[f"col{1 + i}"] = list(encodedCols[i])

    if component <= 0 or component > len(dataDict):
        print("ERROR: Component specified must be >= 0 and " "<= number of arguments")
        raise ValueError

    df = pd.DataFrame(data=dataDict, dtype=float)
    scale = StandardScaler()
    scaledData = scale.fit_transform(df)

    pca = sklearnPCA()
    pcaComponents = pca.fit_transform(scaledData)

    return pcaComponents[:, component - 1].tolist()


if __name__ == "__main__":
    setup_utils.deploy_model("PCA", PCA, "Returns the specified principal component")


1			import pandas as pd
2			from numpy import array
3			from sklearn.decomposition import PCA as sklearnPCA
4			from sklearn.preprocessing import StandardScaler
5			from sklearn.preprocessing import LabelEncoder
6			from sklearn.preprocessing import OneHotEncoder
7			from tabpy.models.utils import setup_utils
8
9
10			def PCA(component, _arg1, _arg2, *_argN):
11			"""
12			Principal Component Analysis is a technique that extracts the key
13			distinct components from a high dimensional space whie attempting
14			to capture as much of the variance as possible. For more information
15			on the function and how to use it please refer to tabpy-tools.md
16			"""
17			cols = [_arg1, _arg2] + list(_argN)
18			encodedCols = []
19			labelEncoder = LabelEncoder()
20			oneHotEncoder = OneHotEncoder(categories="auto", sparse=False)
21
22			for col in cols:
23			if isinstance(col[0], (int, float)):
24			encodedCols.append(col)
25			elif type(col[0]) is bool:
26			intCol = array(col)
27			encodedCols.append(intCol.astype(int))
28			else:
29			if len(set(col)) > 25:
30			print(
31			"ERROR: Non-numeric arguments cannot have more than "
32			"25 unique values"
33			)
34			raise ValueError
35			integerEncoded = labelEncoder.fit_transform(array(col))
36			integerEncoded = integerEncoded.reshape(len(col), 1)
37			oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
38			transformedMatrix = oneHotEncoded.transpose()
39			encodedCols += list(transformedMatrix)
40
41			dataDict = {}
42			for i in range(len(encodedCols)):
43			dataDict[f"col{1 + i}"] = list(encodedCols[i])
44
45			if component <= 0 or component > len(dataDict):
46			print("ERROR: Component specified must be >= 0 and " "<= number of arguments")
47			raise ValueError
48
49			df = pd.DataFrame(data=dataDict, dtype=float)
50			scale = StandardScaler()
51			scaledData = scale.fit_transform(df)
52
53			pca = sklearnPCA()
54			pcaComponents = pca.fit_transform(scaledData)
55
56			return pcaComponents[:, component - 1].tolist()
57
58
59			if __name__ == "__main__":
60			setup_utils.deploy_model("PCA", PCA, "Returns the specified principal component")
61

tableau / TabPy

tabpy.models.scripts.PCA.PCA() B last analyzed 2024-11-25 22:00 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

tabpy.models.scripts.PCA.PCA() B
last analyzed 2024-11-25 22:00 UTC