import numpy as np
import pandas as pd
sample_size = 100
"""분포별로 랜덤하게 값을 만들어줍니다.
"""
test_dist_dict = {
"uniform":np.random.randint(1, 100, sample_size),
"norm_10_5":np.random.normal(10,5, sample_size),
"norm_0_1":np.random.normal(0,1, sample_size),
"exp":np.random.exponential(1, sample_size),
"poisson":np.random.poisson(10, sample_size)
}
"""append outlier in each columns: max*2
"""
for k in test_dist_dict.keys():
l = list(test_dist_dict[k])
l.append(max(l)*2)
test_dist_dict[k] = np.array(l)
test_dist_df = pd.DataFrame(test_dist_dict)
print("not standardized yet")
print(test_dist_df.head())
print()
"""각 칼럼이 norm(0, 1)을 따른다고 가정하고 standardization해줍니다.
"""
from sklearn import preprocessing
print("standardized")
new_X = pd.DataFrame(preprocessing.scale(test_dist_df, axis=0),
index=test_dist_df.index,
columns=test_dist_df.columns
)
print(new_X.head())
print()
print("out of 2 sigma")
print(new_X[
np.logical_or(np.any(new_X >= 2, axis=1), np.any(new_X <= -2, axis=1))
])
print()
print("inside of 2 sigma")
print(new_X[
np.logical_and(np.all(new_X <= 2, axis=1), np.all(new_X >= -2, axis=1))
].head())
not standardized yet
exp norm_0_1 norm_10_5 poisson uniform
0 3.097040 0.925267 11.764456 7 88
1 1.516744 1.312897 17.350357 11 80
2 1.067566 -0.166201 14.717949 13 82
3 0.209375 1.448831 3.403329 12 82
4 0.522443 0.841368 16.816397 7 82
standardized
exp norm_0_1 norm_10_5 poisson uniform
0 1.735699 1.011912 0.189654 -0.811404 1.153538
1 0.411391 1.400518 1.214113 0.111997 0.885165
2 0.034973 -0.082302 0.731327 0.573697 0.952258
3 -0.684202 1.536793 -1.343784 0.342847 0.952258
4 -0.421846 0.927802 1.116184 -0.811404 0.952258
out of 2 sigma
exp norm_0_1 norm_10_5 poisson uniform
43 -0.172024 0.483546 -0.730286 -2.196504 -1.664376
49 0.932857 2.069857 0.512488 0.342847 0.482606
56 -0.623548 1.154390 -2.714261 -0.118853 -1.765016
65 -0.833061 -0.457480 -2.021482 -0.349704 0.918712
84 3.037033 -0.459161 -0.033519 -0.811404 1.086445
100 6.933725 4.055396 5.882849 6.344948 4.172731
inside of 2 sigma
exp norm_0_1 norm_10_5 poisson uniform
0 1.735699 1.011912 0.189654 -0.811404 1.153538
1 0.411391 1.400518 1.214113 0.111997 0.885165
2 0.034973 -0.082302 0.731327 0.573697 0.952258
3 -0.684202 1.536793 -1.343784 0.342847 0.952258
4 -0.421846 0.927802 1.116184 -0.811404 0.952258
댓글남기기