|
1
|
|
|
import unittest |
|
2
|
|
|
|
|
3
|
|
|
import numpy |
|
4
|
|
|
from numpy.random.mtrand import normal |
|
5
|
|
|
|
|
6
|
|
|
from pysie.dsl.variable_independence_testing import Anova, ContingencyTable, ChiSquare |
|
7
|
|
|
from pysie.stats.samples import Sample |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
class AnovaUnitTest(unittest.TestCase): |
|
11
|
|
|
def test_anova(self): |
|
12
|
|
|
sample = Sample() |
|
13
|
|
|
|
|
14
|
|
|
mu1 = 1.0 |
|
15
|
|
|
sigma1 = 1.0 |
|
16
|
|
|
|
|
17
|
|
|
mu2 = 1.1 |
|
18
|
|
|
sigma2 = 1.0 |
|
19
|
|
|
|
|
20
|
|
|
mu3 = 1.09 |
|
21
|
|
|
sigma3 = 1.0 |
|
22
|
|
|
|
|
23
|
|
|
for i in range(100): |
|
24
|
|
|
sample.add_numeric(normal(mu1, sigma1), 'group1') |
|
25
|
|
|
sample.add_numeric(normal(mu2, sigma2), 'group2') |
|
26
|
|
|
sample.add_numeric(normal(mu3, sigma3), 'group3') |
|
27
|
|
|
|
|
28
|
|
|
testing = Anova(sample=sample) |
|
29
|
|
|
|
|
30
|
|
|
print('p-value: ' + str(testing.p_value)) |
|
31
|
|
|
reject = testing.will_reject(0.01) |
|
32
|
|
|
print('will reject [same mean for all groups] ? ' + str(reject)) |
|
33
|
|
|
self.assertFalse(reject) |
|
34
|
|
|
|
|
35
|
|
|
|
|
36
|
|
|
class ContingencyTableUnitTest(unittest.TestCase): |
|
37
|
|
|
def test_table(self): |
|
38
|
|
|
table = ContingencyTable() |
|
39
|
|
|
table.set_cell('eventA', 'eventB', 10) |
|
40
|
|
|
table.set_cell('eventC', 'eventB', 20) |
|
41
|
|
|
table.set_cell('eventA', 'eventD', 15) |
|
42
|
|
|
table.set_cell('eventC', 'eventD', 10) |
|
43
|
|
|
|
|
44
|
|
|
print(table.get_column_total('eventB')) |
|
45
|
|
|
self.assertEqual(table.get_column_total('eventB'), 30) |
|
46
|
|
|
print(table.get_column_total('eventD')) |
|
47
|
|
|
self.assertEqual(table.get_column_total('eventD'), 25) |
|
48
|
|
|
print(table.get_row_total('eventA')) |
|
49
|
|
|
self.assertEqual(table.get_row_total('eventA'), 25) |
|
50
|
|
|
print(table.get_row_total('eventC')) |
|
51
|
|
|
self.assertEqual(table.get_row_total('eventC'), 30) |
|
52
|
|
|
self.assertEqual(table.get_total(), 55) |
|
53
|
|
|
|
|
54
|
|
|
|
|
55
|
|
|
class ChiSquareUnitTest(unittest.TestCase): |
|
56
|
|
|
def test_anova(self): |
|
57
|
|
|
sample = Sample() |
|
58
|
|
|
|
|
59
|
|
|
for i in range(1000): |
|
60
|
|
|
sample.add_category('itemA' if numpy.random.randn() > 0 else 'itemB', 'group1') |
|
61
|
|
|
sample.add_category('itemA' if numpy.random.randn() > 0 else 'itemB', 'group2') |
|
62
|
|
|
sample.add_category('itemA' if numpy.random.randn() > 0 else 'itemB', 'group3') |
|
63
|
|
|
|
|
64
|
|
|
testing = ChiSquare(sample=sample) |
|
65
|
|
|
|
|
66
|
|
|
print('p-value: ' + str(testing.p_value)) |
|
67
|
|
|
reject = testing.will_reject(0.01) |
|
68
|
|
|
print('will reject [two categorical variables are independent of each other] ? ' + str(reject)) |
|
69
|
|
|
|
|
70
|
|
|
if __name__ == '__main__': |
|
71
|
|
|
unittest.main() |
|
72
|
|
|
|