import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
df_airline = pd.read_csv('/Users/frhyme/Downloads/international-airline-passengers.csv')[:-1]
df_airline.columns = ['m', 'q']
df_female = pd.read_csv('/Users/frhyme/Downloads/daily-total-female-births-in-cal.csv')[:-1]
df_female.columns = ['m', 'q']
## 1) just plotting
f, axes = plt.subplots(1, 2, figsize=(14, 4))
axes[0].plot(df_airline['q'])
axes[0].set_title("air line passenger")
axes[1].plot(df_female['q'])
axes[1].set_title("femail birth")
plt.savefig("../../assets/images/markdown_img/180612_1811_just_plotting.svg")
plt.show()
## 2) plot histogram to check to follow gaussian dist.
f, axes = plt.subplots(1, 2, figsize=(14, 4))
sns.distplot(df_airline['q'], ax=axes[0])
axes[0].set_title("air line passenger")
sns.distplot(df_female['q'], ax=axes[1])
axes[1].set_title("femail birth")
plt.savefig("../../assets/images/markdown_img/180612_1815_hist_comp.svg")
plt.show()
## 3) mean variance comparision
print("---airline passenger---")
print("mean of left group, right group: {}, {}".format(
df_airline['q'][:len(df_airline)//2].mean(), df_airline['q'][len(df_airline)//2:].mean())
)
print("std of left group, right group: {}, {}".format(
df_airline['q'][:len(df_airline)//2].std(), df_airline['q'][len(df_airline)//2:].std())
)
print("---female birth---")
print("mean of left group, right group: {}, {}".format(
df_female['q'][:len(df_female)//2].mean(), df_female['q'][len(df_female)//2:].mean())
)
print("std of left group, right group: {}, {}".format(
df_female['q'][:len(df_female)//2].std(), df_female['q'][len(df_female)//2:].std())
)
---airline passenger---
mean of left group, right group: 182.90277777777777, 377.69444444444446
std of left group, right group: 47.7042413215282, 86.4392058427729
---female birth---
mean of left group, right group: 39.76373626373626, 44.185792349726775
std of left group, right group: 7.034579412457393, 6.998305548491794
ADF Statistic: 0.815369
p-value: 0.991880
Critical Values:
1%: -3.482
5%: -2.884
10%: -2.579
--------
ADF Statistic: -4.808291
p-value: 0.000052
Critical Values:
1%: -3.449
5%: -2.870
10%: -2.571
--------
댓글남기기