tabpy.models.scripts.PCA - Code Metrics - tableau/TabPy - Measure and Improve Code Quality continuously with Scrutinizer

tabpy.models.scripts.PCA A
last analyzed 2024-11-25 22:00 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	61
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	8
eloc	42
dl	0
loc	61
rs	10
c	0
b	0
f	0

1 Function

Rating	Name	Duplication	Size	Complexity
B	PCA()	0	47	8

import pandas as pd
from numpy import array
from sklearn.decomposition import PCA as sklearnPCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from tabpy.models.utils import setup_utils


def PCA(component, _arg1, _arg2, *_argN):
    """
    Principal Component Analysis is a technique that extracts the key
    distinct components from a high dimensional space whie attempting
    to capture as much of the variance as possible. For more information
    on the function and how to use it please refer to tabpy-tools.md
    """
    cols = [_arg1, _arg2] + list(_argN)
    encodedCols = []
    labelEncoder = LabelEncoder()
    oneHotEncoder = OneHotEncoder(categories="auto", sparse=False)

    for col in cols:
        if isinstance(col[0], (int, float)):
            encodedCols.append(col)
        elif type(col[0]) is bool:
            intCol = array(col)
            encodedCols.append(intCol.astype(int))
        else:
            if len(set(col)) > 25:
                print(
                    "ERROR: Non-numeric arguments cannot have more than "
                    "25 unique values"
                )
                raise ValueError
            integerEncoded = labelEncoder.fit_transform(array(col))
            integerEncoded = integerEncoded.reshape(len(col), 1)
            oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
            transformedMatrix = oneHotEncoded.transpose()
            encodedCols += list(transformedMatrix)

    dataDict = {}
    for i in range(len(encodedCols)):
        dataDict[f"col{1 + i}"] = list(encodedCols[i])

    if component <= 0 or component > len(dataDict):
        print("ERROR: Component specified must be >= 0 and " "<= number of arguments")
        raise ValueError

    df = pd.DataFrame(data=dataDict, dtype=float)
    scale = StandardScaler()
    scaledData = scale.fit_transform(df)

    pca = sklearnPCA()
    pcaComponents = pca.fit_transform(scaledData)

    return pcaComponents[:, component - 1].tolist()


if __name__ == "__main__":
    setup_utils.deploy_model("PCA", PCA, "Returns the specified principal component")


1			import pandas as pd
2			from numpy import array
3			from sklearn.decomposition import PCA as sklearnPCA
4			from sklearn.preprocessing import StandardScaler
5			from sklearn.preprocessing import LabelEncoder
6			from sklearn.preprocessing import OneHotEncoder
7			from tabpy.models.utils import setup_utils
8
9
10			def PCA(component, _arg1, _arg2, *_argN):
11			"""
12			Principal Component Analysis is a technique that extracts the key
13			distinct components from a high dimensional space whie attempting
14			to capture as much of the variance as possible. For more information
15			on the function and how to use it please refer to tabpy-tools.md
16			"""
17			cols = [_arg1, _arg2] + list(_argN)
18			encodedCols = []
19			labelEncoder = LabelEncoder()
20			oneHotEncoder = OneHotEncoder(categories="auto", sparse=False)
21
22			for col in cols:
23			if isinstance(col[0], (int, float)):
24			encodedCols.append(col)
25			elif type(col[0]) is bool:
26			intCol = array(col)
27			encodedCols.append(intCol.astype(int))
28			else:
29			if len(set(col)) > 25:
30			print(
31			"ERROR: Non-numeric arguments cannot have more than "
32			"25 unique values"
33			)
34			raise ValueError
35			integerEncoded = labelEncoder.fit_transform(array(col))
36			integerEncoded = integerEncoded.reshape(len(col), 1)
37			oneHotEncoded = oneHotEncoder.fit_transform(integerEncoded)
38			transformedMatrix = oneHotEncoded.transpose()
39			encodedCols += list(transformedMatrix)
40
41			dataDict = {}
42			for i in range(len(encodedCols)):
43			dataDict[f"col{1 + i}"] = list(encodedCols[i])
44
45			if component <= 0 or component > len(dataDict):
46			print("ERROR: Component specified must be >= 0 and " "<= number of arguments")
47			raise ValueError
48
49			df = pd.DataFrame(data=dataDict, dtype=float)
50			scale = StandardScaler()
51			scaledData = scale.fit_transform(df)
52
53			pca = sklearnPCA()
54			pcaComponents = pca.fit_transform(scaledData)
55
56			return pcaComponents[:, component - 1].tolist()
57
58
59			if __name__ == "__main__":
60			setup_utils.deploy_model("PCA", PCA, "Returns the specified principal component")
61

tableau / TabPy

tabpy.models.scripts.PCA A last analyzed 2024-11-25 22:00 UTC

Complexity

Size/Duplication

Importance

1 Function

Duplication Side-by-Side

Filter issues like

tabpy.models.scripts.PCA A
last analyzed 2024-11-25 22:00 UTC