MeanDiffTesting   A
last analyzed

Complexity

Total Complexity 7

Size/Duplication

Total Lines 37
Duplicated Lines 67.57 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 25
loc 37
rs 10
c 1
b 0
f 0
wmc 7

2 Methods

Rating   Name   Duplication   Size   Complexity  
B __init__() 23 25 6
A will_reject() 0 3 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import random
2
3
from pysie.stats.distributions import DistributionFamily
4
from scipy.stats import norm, t
5
import math
6
7
8
class MeanDiffTesting(object):
9
    sampling_distribution = None
10
    p_value_one_tail = None
11
    p_value_two_tail = None
12
    test_statistic = None
13
    significance_level = None
14 View Code Duplication
    reject_mean_same = None
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
15
16
    def __init__(self, sampling_distribution, significance_level=None):
17
        self.sampling_distribution = sampling_distribution
18
        if significance_level is not None:
19
            self.significance_level = significance_level
20
21
        if self.sampling_distribution.distribution_family == DistributionFamily.normal:
22
            Z = sampling_distribution.point_estimate / sampling_distribution.standard_error
23
            self.test_statistic = Z
24
            pf = norm.cdf(Z)
25
            if Z < 0:
26
                pf = 1 - pf
27
            self.p_value_one_tail = 1 - pf
28
            self.p_value_two_tail = self.p_value_one_tail * 2
29
        else:
30
            td_df = sampling_distribution.point_estimate / sampling_distribution.standard_error
31
            self.test_statistic = td_df
32
            pf = t.cdf(td_df, sampling_distribution.df)
33
            if td_df < 0:
34
                pf = 1 - pf
35
            self.p_value_one_tail = 1 - pf
36
            self.p_value_two_tail = self.p_value_one_tail * 2
37
38
        if significance_level is not None:
39
            self.reject_mean_same = (self.p_value_one_tail < significance_level,
40
                                     self.p_value_two_tail < significance_level)
41
42
    def will_reject(self, significance_level):
43
44
        return self.p_value_one_tail < significance_level, self.p_value_two_tail < significance_level
45
46
47
class ProportionDiffTesting(object):
48
    sampling_distribution = None
49
    p_value_one_tail = None
50
    p_value_two_tail = None
51
    p_null = None
52
    test_statistic = None
53
    significance_level = None
54
    reject_proportion_same = None
55
56
    def __init__(self, sampling_distribution, significance_level=None):
57
        self.sampling_distribution = sampling_distribution
58
        p_null = (sampling_distribution.grp1_point_estimate + sampling_distribution.grp2_point_estimate) / 2
59
        self.p_null = p_null
60
        if significance_level is not None:
61
            self.significance_level = significance_level
62
63
        if self.sampling_distribution.distribution_family == DistributionFamily.normal:
64
            standard_error_null = math.sqrt(p_null * (1 - p_null) / sampling_distribution.grp1_sample_size + p_null * (1-p_null) / sampling_distribution.grp2_sample_size)
65
            Z = sampling_distribution.point_estimate / standard_error_null
66
            self.test_statistic = Z
67
            pf = norm.cdf(Z)
68
            if Z < 0:
69
                pf = 1 - pf
70
            self.p_value_one_tail = 1 - pf
71
            self.p_value_two_tail = self.p_value_one_tail * 2
72
        else:
73
            simulated_proportions = self.simulate()
74
            diff = sampling_distribution.grp1_point_estimate - sampling_distribution.grp2_point_estimate
75
            pf = sum(1.0 for x in simulated_proportions if x > diff) / 1000.0
76
            self.p_value_one_tail = pf
77
            self.p_value_two_tail = sum(1.0 for x in simulated_proportions if x > diff or x < -diff) / 1000.0
78
79
        if significance_level is not None:
80
            self.reject_proportion_same = (self.p_value_one_tail < significance_level,
81
                                           self.p_value_two_tail < significance_level)
82
83
    def simulate(self):
84
        simulated_proportions = [0] * 1000
85
        for i in range(1000):
86
            count1 = 0
87
            for trials in range(self.sampling_distribution.grp1_sample_size):
88
                if random.random() <= self.p_null:
89
                    count1 += 1
90
            count2 = 0
91
            for trials in range(self.sampling_distribution.grp2_sample_size):
92
                if random.random() <= self.p_null:
93
                    count2 += 1
94
95
            simulated_proportions[i] = float(count1) / self.sampling_distribution.grp1_sample_size - float(count2) / self.sampling_distribution.grp2_sample_size
96
        return sorted(simulated_proportions)
97
98
    def will_reject(self, significance_level):
99
100
        return self.p_value_one_tail < significance_level, self.p_value_two_tail < significance_level