import pandas as pd
import numpy as np
N = 100
df = pd.DataFrame({
'col_a':np.random.random(N),
'col_b':np.random.randint(4, 100),
'col_c':np.random.randint(0, 10, N),
})
print(df.head(5))
print("==")
# N sampling
print(
df.sample(n=5, random_state=1)
)
# Fraction sampling
print("==")
print(
df.sample(frac=0.05, random_state=1)
)
print("==")
# sampling by weight
# weight가 클수록 잘 나오도록 sampling 한다.
# column name을 넘기면 됨.
print(
df.sample(
frac=0.05,
weights='col_c',
random_state=1
)
)
print("==")
col_a col_b col_c
0 0.277943 21 8
1 0.318188 21 6
2 0.383598 21 6
3 0.383731 21 6
4 0.620893 21 8
==
col_a col_b col_c
80 0.312053 21 3
84 0.605246 21 4
33 0.573956 21 3
81 0.971475 21 2
93 0.485356 21 3
==
col_a col_b col_c
80 0.312053 21 3
84 0.605246 21 4
33 0.573956 21 3
81 0.971475 21 2
93 0.485356 21 3
==
col_a col_b col_c
36 0.677911 21 5
65 0.285437 21 6
0 0.277943 21 8
25 0.280924 21 3
10 0.654636 21 2
==
댓글남기기