| Total Complexity | 6 |
| Total Lines | 40 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | from scipy import stats |
||
| 2 | from tabpy.models.utils import setup_utils |
||
| 3 | |||
| 4 | |||
| 5 | def ttest(_arg1, _arg2): |
||
| 6 | """ |
||
| 7 | T-Test is a statistical hypothesis test that is used to compare |
||
| 8 | two sample means or a sample’s mean against a known population mean. |
||
| 9 | For more information on the function and how to use it please refer |
||
| 10 | to tabpy-tools.md |
||
| 11 | """ |
||
| 12 | # one sample test with mean |
||
| 13 | if len(_arg2) == 1: |
||
| 14 | test_stat, p_value = stats.ttest_1samp(_arg1, _arg2) |
||
| 15 | return p_value |
||
| 16 | # two sample t-test where _arg1 is numeric and _arg2 is a binary factor |
||
| 17 | elif len(set(_arg2)) == 2: |
||
| 18 | # each sample in _arg1 needs to have a corresponding classification |
||
| 19 | # in _arg2 |
||
| 20 | if not (len(_arg1) == len(_arg2)): |
||
| 21 | raise ValueError |
||
| 22 | class1, class2 = set(_arg2) |
||
| 23 | sample1 = [] |
||
| 24 | sample2 = [] |
||
| 25 | for i in range(len(_arg1)): |
||
| 26 | if _arg2[i] == class1: |
||
| 27 | sample1.append(_arg1[i]) |
||
| 28 | else: |
||
| 29 | sample2.append(_arg1[i]) |
||
| 30 | test_stat, p_value = stats.ttest_ind(sample1, sample2, equal_var=False) |
||
| 31 | return p_value |
||
| 32 | # arg1 is a sample and arg2 is a sample |
||
| 33 | else: |
||
| 34 | test_stat, p_value = stats.ttest_ind(_arg1, _arg2, equal_var=False) |
||
| 35 | return p_value |
||
| 36 | |||
| 37 | |||
| 38 | if __name__ == "__main__": |
||
| 39 | setup_utils.deploy_model("ttest", ttest, "Returns the p-value form a t-test") |
||
| 40 |