Total Complexity | 6 |
Total Lines | 45 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | from scipy import stats |
||
2 | import sys |
||
3 | from pathlib import Path |
||
4 | sys.path.append(str(Path(__file__).resolve().parent.parent.parent / 'models')) |
||
5 | from utils import setup_utils |
||
6 | |||
7 | |||
8 | def ttest(_arg1, _arg2): |
||
9 | ''' |
||
10 | T-Test is a statistical hypothesis test that is used to compare |
||
11 | two sample means or a sample’s mean against a known population mean. |
||
12 | For more information on the function and how to use it please refer |
||
13 | to tabpy-tools.md |
||
14 | ''' |
||
15 | # one sample test with mean |
||
16 | if len(_arg2) == 1: |
||
17 | test_stat, p_value = stats.ttest_1samp(_arg1, _arg2) |
||
18 | return p_value |
||
19 | # two sample t-test where _arg1 is numeric and _arg2 is a binary factor |
||
20 | elif len(set(_arg2)) == 2: |
||
21 | # each sample in _arg1 needs to have a corresponding classification |
||
22 | # in _arg2 |
||
23 | if not (len(_arg1) == len(_arg2)): |
||
24 | raise ValueError |
||
25 | class1, class2 = set(_arg2) |
||
26 | sample1 = [] |
||
27 | sample2 = [] |
||
28 | for i in range(len(_arg1)): |
||
29 | if _arg2[i] == class1: |
||
30 | sample1.append(_arg1[i]) |
||
31 | else: |
||
32 | sample2.append(_arg1[i]) |
||
33 | test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False) |
||
34 | return p_value |
||
35 | # arg1 is a sample and arg2 is a sample |
||
36 | else: |
||
37 | test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False) |
||
38 | return p_value |
||
39 | |||
40 | |||
41 | if __name__ == '__main__': |
||
42 | setup_utils.main('ttest', |
||
43 | ttest, |
||
44 | 'Returns the p-value form a t-test') |
||
45 |