import statsmodels.api as sm
#from statsmodels.stats.proportion import proportions_ztest
####################################
# params
# -----------------------------------
## count : the number of successes in nobs, <nobs
## nobs : the numbrer of trials
## value : null hypothesis(ex: p=0.54)
## alternative: [‘two-sided’, ‘smaller’, ‘larger’]
# -----------------------------------
# return
# -----------------------------------
## zstat : test statistic for the z-test
## p-value
####################################
# 당연하지만, number of trial가 커질 수록 p-value가 작아지며,
# 즉 null hypothesis가 위배될 가능성이 커짐.
for count in [100, 110, 120, 130]:
zstat, p_value = sm.stats.proportions_ztest(
count = count,
nobs = 200,
value =0.50,
alternative = 'larger'
)
print(f"count: {count}")
print(f"zstat : {zstat:.5f}, p_value: {p_value:.5f}")
print("--"*30)
count: 100
zstat : 0.00000, p_value: 0.50000
------------------------------------------------------------
count: 110
zstat : 1.42134, p_value: 0.07761
------------------------------------------------------------
count: 120
zstat : 2.88675, p_value: 0.00195
------------------------------------------------------------
count: 130
zstat : 4.44750, p_value: 0.00000
------------------------------------------------------------
import numpy as np
import statsmodels.api as sm
np.random.seed(0)
print("=="*20)
# sample1은 고정하고,
sample1 = np.random.normal(0, 1, 100)
# sample2의 평균을 증가하면서, p_value의 변화를 예측함.
sample2 = np.random.normal(0.5, 1, 100)
t_p_d= sm.stats.ttest_ind(
x1=sample1,
x2=sample2,
alternative='two-sided',
usevar='pooled' # same variance
)
tstat, p_value, degree_of_freedom = t_p_d
print(f"tstat : {tstat:.6f}")
print(f"p_value : {p_value:.6f}")
print(f"degree_of_freedom: {degree_of_freedom}")
print("--"*20)
댓글남기기