Completed
Push — master ( e2ceb6...712fe6 )
by Xianshun
01:17
created

confidence_interval()   A

Complexity

Conditions 2

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 10
rs 9.4285
1
import math
2
import random
3
4
from enum import Enum
5
6
from scipy.stats import norm, t
7
8
9
class DistributionFamily(Enum):
10
    normal = 1
11
    student_t = 2
12
    fisher = 3
13
    chi_square = 4
14
    simulation = 5
15
16
17
class MeanSamplingDistribution(object):
18
    sample_distribution = None
19
    point_estimate = None
20
    distribution_family = None
21
    df = None
22
23
    def __init__(self, sample_distribution=None, sample_mean=None, sample_sd=None, sample_size=None):
24
        if sample_mean is not None:
25
            self.point_estimate = sample_mean
26
27
        if sample_sd is not None:
28
            self.sample_sd = sample_sd
29
30
        if sample_size is not None:
31
            self.sample_size = sample_size
32
33
        if sample_distribution is not None:
34
            self.sample_distribution = sample_distribution
35
            self.point_estimate = sample_distribution.mean
36
            self.sample_sd = sample_distribution.sd
37
            self.sample_size = sample_distribution.sample_size
38
39
        self.standard_error = MeanSamplingDistribution.calculate_standard_error(self.sample_sd, self.sample_size)
40
41
        self.df = self.sample_size - 1.0
42
        if self.sample_size < 30:
43
            self.distribution_family = DistributionFamily.student_t
44
        else:
45
            self.distribution_family = DistributionFamily.normal
46
47
    @staticmethod
48
    def calculate_standard_error(sample_sd, sample_size):
49
        return sample_sd / math.sqrt(sample_size)
50
51
    def confidence_interval(self, confidence_level):
52
        q = 1 - (1 - confidence_level) / 2
53
        if self.distribution_family == DistributionFamily.normal:
54
            z = norm.ppf(q)
55
            pf = z * self.standard_error
56
            return self.point_estimate - pf, self.point_estimate + pf
57
        else:
58
            t_df = t.ppf(q, self.df)
59
            pf = t_df * self.standard_error + self.point_estimate
60
            return self.point_estimate - pf, self.point_estimate + pf
61
62
63
class ProportionSamplingDistribution(object):
64
    sample_distribution = None
65
    point_estimate = None
66
    distribution_family = None
67
    sample_size = None
68
    categorical_value = None
69
    standard_error = None
70
    simulated_proportions = None
71
72
    def __init__(self, sample_distribution=None, categorical_value=None, sample_proportion=None, sample_size=None):
73
        if sample_proportion is not None:
74
            self.point_estimate = sample_proportion
75
76
        if sample_size is not None:
77
            self.sample_size = sample_size
78
79
        if categorical_value is not None:
80
            self.categorical_value = categorical_value
81
82
        if sample_distribution is not None:
83
            self.build(sample_distribution)
84
85
        if self.sample_size * self.point_estimate < 10 or self.sample_size * (1 - self.point_estimate) < 10:
86
            self.distribution_family = DistributionFamily.simulation
87
            self.simulate()
88
        else:
89
            self.distribution_family = DistributionFamily.normal
90
            self.standard_error = math.sqrt(self.point_estimate * (1 - self.point_estimate) / self.sample_size)
91
92
    def build(self, sample_distribution):
93
        self.sample_distribution = sample_distribution
94
        self.point_estimate = sample_distribution.proportion
95
        self.categorical_value = sample_distribution.categorical_value
96
        self.sample_size = sample_distribution.sample_size
97
98
    def simulate(self):
99
        self.simulated_proportions = [0] * 1000
100
        for iter in range(1000):
101
            count = 0
102
            for trials in range(self.sample_size):
103
                if random.random() <= self.point_estimate:
104
                    count += 1
105
            self.simulated_proportions[iter] = float(count) / self.sample_size
106
        self.simulated_proportions = sorted(self.simulated_proportions)
107
108
    def confidence_interval(self, confidence_level):
109
        q = 1 - (1 - confidence_level) / 2
110
        if self.distribution_family == DistributionFamily.normal:
111
            z = norm.ppf(q)
112
            pf = z * self.standard_error
113
            return self.point_estimate - pf, self.point_estimate + pf
114
        else:
115
            threshold1 = int(1000 * (1 - confidence_level) / 2)
116
            threshold2 = int(1000 * q)
117
            return self.simulated_proportions[threshold1], self.simulated_proportions[threshold2]
118
119