Code Duplication    Length = 39-64 lines in 4 locations

libtlda/iw.py 1 location

@@ 25-88 (lines=64) @@
22
    functions.
23
    """
24
25
    def __init__(self, loss='logistic', l2=1.0, iwe='lr', smoothing=True,
26
                 clip=-1, kernel_type='rbf', bandwidth=1):
27
        """
28
        Select a particular type of importance-weighted classifier.
29
30
        Parameters
31
        ----------
32
        loss : str
33
            loss function for weighted classifier, options: 'logistic',
34
            'quadratic', 'hinge' (def: 'logistic')
35
        l2 : float
36
            l2-regularization parameter value (def:0.01)
37
        iwe : str
38
            importance weight estimator, options: 'lr', 'nn', 'rg', 'kmm',
39
            'kde' (def: 'lr')
40
        smoothing : bool
41
            whether to apply Laplace smoothing to the nearest-neighbour
42
            importance-weight estimator (def: True)
43
        clip : float
44
            maximum allowable importance-weight value; if set to -1, then the
45
            weights are not clipped (def:-1)
46
        kernel_type : str
47
            what type of kernel to use for kernel density estimation or kernel
48
            mean matching, options: 'diste', 'rbf' (def: 'rbf')
49
        bandwidth : float
50
            kernel bandwidth parameter value for kernel-based weight
51
            estimators (def: 1)
52
53
        Returns
54
        -------
55
        None
56
57
        Examples
58
        --------
59
        >>>> clf = ImportanceWeightedClassifier()
60
61
        """
62
        self.loss = loss
63
        self.l2 = l2
64
        self.iwe = iwe
65
        self.smoothing = smoothing
66
        self.clip = clip
67
        self.kernel_type = kernel_type
68
        self.bandwidth = bandwidth
69
70
        # Initialize untrained classifiers based on choice of loss function
71
        if self.loss == 'logistic':
72
            # Logistic regression model
73
            self.clf = LogisticRegression()
74
        elif self.loss == 'quadratic':
75
            # Least-squares model
76
            self.clf = LinearRegression()
77
        elif self.loss == 'hinge':
78
            # Linear support vector machine
79
            self.clf = LinearSVC()
80
        else:
81
            # Other loss functions are not implemented
82
            raise NotImplementedError('Loss function not implemented.')
83
84
        # Whether model has been trained
85
        self.is_trained = False
86
87
        # Dimensionality of training data
88
        self.train_data_dim = ''
89
90
    def iwe_ratio_gaussians(self, X, Z):
91
        """

libtlda/tca.py 1 location

@@ 24-74 (lines=51) @@
21
    Methods contain component analysis and general utilities.
22
    """
23
24
    def __init__(self, loss='logistic', l2=1.0, mu=1.0, num_components=1,
25
                 kernel_type='rbf', bandwidth=1.0, order=2.0):
26
        """
27
        Select a particular type of transfer component classifier.
28
29
        INPUT   (1) str 'loss': loss function for weighted classifier, options:
30
                    'logistic', 'quadratic', 'hinge' (def: 'logistic')
31
                (2) float 'l2': l2-regularization parameter value (def:0.01)
32
                (3) float 'mu': trade-off parameter (def: 1.0)
33
                (4) int 'num_components': number of transfer components to
34
                    maintain (def: 1)
35
                (5) str 'kernel_type': type of kernel to use, options: 'rbf'
36
                    (def: 'rbf')
37
                (6) float 'bandwidth': kernel bandwidth for transfer component
38
                    analysis (def: 1.0)
39
                (7) float 'order': order of polynomial for kernel (def: 2.0)
40
        """
41
        self.loss = loss
42
        self.l2 = l2
43
        self.mu = mu
44
        self.num_components = num_components
45
46
        self.kernel_type = kernel_type
47
        self.bandwidth = bandwidth
48
        self.order = order
49
50
        # Initialize untrained classifiers
51
        if self.loss == 'logistic':
52
            # Logistic regression model
53
            self.clf = LogisticRegression()
54
        elif self.loss == 'quadratic':
55
            # Least-squares model
56
            self.clf = LinearRegression()
57
        elif self.loss == 'hinge':
58
            # Linear support vector machine
59
            self.clf = LinearSVC()
60
        else:
61
            # Other loss functions are not implemented
62
            raise NotImplementedError
63
64
        # Maintain source and transfer data for computing kernels
65
        self.XZ = ''
66
67
        # Maintain transfer components
68
        self.C = ''
69
70
        # Whether model has been trained
71
        self.is_trained = False
72
73
        # Dimensionality of training data
74
        self.train_data_dim = ''
75
76
    def kernel(self, X, Z, type='rbf', order=2, bandwidth=1.0):
77
        """

libtlda/scl.py 1 location

@@ 25-63 (lines=39) @@
22
    functions.
23
    """
24
25
    def __init__(self, loss='logistic', l2=1.0, num_pivots=1,
26
                 num_components=1):
27
        """
28
        Select a particular type of importance-weighted classifier.
29
30
        INPUT   (1) str 'loss': loss function for weighted classifier, options:
31
                    'logistic', 'quadratic', 'hinge' (def: 'logistic')
32
                (2) float 'l2': l2-regularization parameter value (def:0.01)
33
                (3) int 'num_pivots': number of pivot features to use (def: 1)
34
                (4) int 'num_components': number of components to use after
35
                    extracting pivot features (def: 1)
36
        """
37
        self.loss = loss
38
        self.l2 = l2
39
        self.num_pivots = num_pivots
40
        self.num_components = num_components
41
42
        # Initialize untrained classifiers based on choice of loss function
43
        if self.loss == 'logistic':
44
            # Logistic regression model
45
            self.clf = LogisticRegression()
46
        elif self.loss == 'quadratic':
47
            # Least-squares model
48
            self.clf = LinearRegression()
49
        elif self.loss == 'hinge':
50
            # Linear support vector machine
51
            self.clf = LinearSVC()
52
        else:
53
            # Other loss functions are not implemented
54
            raise NotImplementedError
55
56
        # Whether model has been trained
57
        self.is_trained = False
58
59
        # Maintain pivot component matrix
60
        self.C = 0
61
62
        # Dimensionality of training data
63
        self.train_data_dim = ''
64
65
    def augment_features(self, X, Z, l2=0.0):
66
        """

libtlda/suba.py 1 location

@@ 25-70 (lines=46) @@
22
    Methods contain the alignment itself, classifiers and general utilities.
23
    """
24
25
    def __init__(self, loss='logistic', l2=1.0, num_components=1):
26
        """
27
        Select a particular type of subspace aligned classifier.
28
29
        Arguments
30
        ---------
31
        loss : str
32
            loss function for weighted classifier, options: 'logistic',
33
            'quadratic', 'hinge' (def: 'logistic')
34
        l2 : float
35
            l2-regularization parameter value (def:0.01)
36
        num_components : int
37
            number of transfer components to maintain (def: 1)
38
39
        Returns
40
        -------
41
        None
42
43
        Examples
44
        --------
45
        clf = SubspaceAlignedClassifier(loss='hinge', l2=0.1)
46
47
        """
48
        self.loss = loss
49
        self.l2 = l2
50
        self.num_components = num_components
51
52
        # Initialize untrained classifiers
53
        if self.loss == 'logistic':
54
            # Logistic regression model
55
            self.clf = LogisticRegression()
56
        elif self.loss == 'quadratic':
57
            # Least-squares model
58
            self.clf = LinearRegression()
59
        elif self.loss == 'hinge':
60
            # Linear support vector machine
61
            self.clf = LinearSVC()
62
        else:
63
            # Other loss functions are not implemented
64
            raise NotImplementedError('Loss function not implemented.')
65
66
        # Whether model has been trained
67
        self.is_trained = False
68
69
        # Dimensionality of training data
70
        self.train_data_dim = ''
71
72
    def subspace_alignment(self, X, Z, num_components=1):
73
        """