import numpy as np
import statsmodels.api as sm
import scipy.stats as stats
import matplotlib.pyplot as plt

# set the random seed:
np.random.seed(123456)

# set sample size:
n_5 = 5
n_10 = 10
n_20 = 20
n_30 = 30

# initialize ybar to an array of length r=10000 to later store results:
r = 10000

ybar_5 = np.empty(r)
ybar_10 = np.empty(r)
ybar_20 = np.empty(r)
ybar_30 = np.empty(r)

# repeat r times:
for j in range(r):
    # draw a sample and store the sample mean in pos. j=0,1,... of ybar:
    sample_5 = stats.norm.rvs(10, 2, size=n_5)
    ybar_5[j] = np.mean(sample_5)

for j in range(r):
    # draw a sample and store the sample mean in pos. j=0,1,... of ybar:
    sample_10 = stats.norm.rvs(10, 2, size=n_10)
    ybar_10[j] = np.mean(sample_10)

for j in range(r):
    # draw a sample and store the sample mean in pos. j=0,1,... of ybar:
    sample_20 = stats.norm.rvs(10, 2, size=n_20)
    ybar_20[j] = np.mean(sample_20)

for j in range(r):
    # draw a sample and store the sample mean in pos. j=0,1,... of ybar:
    sample_30 = stats.norm.rvs(10, 2, size=n_30)
    ybar_30[j] = np.mean(sample_30)

mean_5 = np.mean(ybar_5)
variance_5 = np.var(ybar_5)
print("Mean of sampling distribution w/ n=5 is :", mean_5)
print("Variance of sampling distribution w/ n=5 is :", variance_5)

mean_10 = np.mean(ybar_10)
variance_10 = np.var(ybar_10)
print("Mean of sampling distribution w/ n=10 is :", mean_10)
print("Variance of sampling distribution w/ n=10 is :", variance_10)

mean_20 = np.mean(ybar_20)
variance_20 = np.var(ybar_20)
print("Mean of sampling distribution w/ n=20 is :", mean_20)
print("Variance of sampling distribution w/ n=20 is :", variance_20)

mean_30 = np.mean(ybar_30)
variance_30 = np.var(ybar_30)
print("Mean of sampling distribution w/ n=30 is :", mean_30)
print("Variance of sampling distribution w/ n=30 is :", variance_30)

# simulated density:
kde_5 = sm.nonparametric.KDEUnivariate(ybar_5)
kde_5.fit()

kde_10 = sm.nonparametric.KDEUnivariate(ybar_10)
kde_10.fit()

kde_20 = sm.nonparametric.KDEUnivariate(ybar_20)
kde_20.fit()

kde_30 = sm.nonparametric.KDEUnivariate(ybar_30)
kde_30.fit()

# normal density:
x_range_5 = np.linspace(9, 11)
y_5 = stats.norm.pdf(x_range_5, 10, np.sqrt(4/n_5))

x_range_10 = np.linspace(9, 11)
y_10 = stats.norm.pdf(x_range_10, 10, np.sqrt(4/n_10))

x_range_20 = np.linspace(9, 11)
y_20 = stats.norm.pdf(x_range_20, 10, np.sqrt(4/n_20))

x_range_30 = np.linspace(9, 11)
y_30 = stats.norm.pdf(x_range_30, 10, np.sqrt(4/n_30))

# create graph:
fig = plt.figure(figsize=(7,7)) 
    
plt.subplot(221)
_ = plt.plot(kde_5.support, kde_5.density, color='black', label='ybar_5')
_ = plt.plot(x_range_5, y_5, linestyle='--', color='black', label='normal distribution')
_ = plt.ylim(0, 1.2)
_ = plt.ylabel('density')
_ = plt.xlabel('ybar')
_ = plt.legend()

plt.subplot(222)
_ = plt.plot(kde_10.support, kde_10.density, color='black', label='ybar_10')
_ = plt.plot(x_range_10, y_10, linestyle='--', color='black', label='normal distribution')
_ = plt.ylim(0, 1.2)
_ = plt.ylabel('density')
_ = plt.xlabel('ybar')
_ = plt.legend()

plt.subplot(223)
_ = plt.plot(kde_20.support, kde_20.density, color='black', label='ybar_20')
_ = plt.plot(x_range_20, y_20, linestyle='--', color='black', label='normal distribution')
_ = plt.ylim(0, 1.2)
_ = plt.ylabel('density')
_ = plt.xlabel('ybar')
_ = plt.legend()

plt.subplot(224)
_ = plt.plot(kde_30.support, kde_30.density, color='black', label='ybar_30')
_ = plt.plot(x_range_30, y_30, linestyle='--', color='black', label='normal distribution')
_ = plt.ylim(0, 1.2)
_ = plt.ylabel('density')
_ = plt.xlabel('ybar')
_ = plt.legend()

fig.subplots_adjust(hspace=.3, wspace=0.4) # 하위그림의 위아래 간격을 조정

#plt.savefig('C:/BOOK/PyBasics/PyStat/code/b1-ch5-4.png')