import numpy as np import scipy.stats as stats import seaborn as sns import matplotlib.pyplot as plt #import statsmodels.api as sm # set the random seed: np.random.seed(123456) # set sample size: n1=10 n2=20 n3=30 n4=100 # initialize sampling dist. to an array of length r=10000 to later store results: r = 10000 yvar_10 = np.empty(r) yvar_20 = np.empty(r) yvar_30 = np.empty(r) yvar_100 = np.empty(r) # repeat r times: for j in range(1,r): # draw a sample and store the sample mean in pos. j=0,1,... of sample_dist: sample_10 = stats.norm.rvs(10, 2, size=n1) yvar_10[j] = np.var(sample_10, ddof=1) # repeat r times: for j in range(1,r): # draw a sample and store the sample mean in pos. j=0,1,... of sample_dist: sample_20 = stats.norm.rvs(10, 2, size=n2) yvar_20[j] = np.var(sample_20, ddof=1) # repeat r times: for j in range(1,r): # draw a sample and store the sample mean in pos. j=0,1,... of sample_dist: sample_30 = stats.norm.rvs(10, 2, size=n3) yvar_30[j] = np.var(sample_30, ddof=1) # repeat r times: for j in range(1,r): # draw a sample and store the sample mean in pos. j=0,1,... of sample_dist: sample_100 = stats.norm.rvs(10, 2, size=n4) yvar_100[j] = np.var(sample_100, ddof=1) mean_10 = np.mean(yvar_10) variance_10 = np.var(yvar_10, ddof=1) print("Mean of sample variance distribution w/ n=10 is :", mean_10) print("Variance of sample variance distribution w/ n=10 is :", variance_10) mean_20 = np.mean(yvar_20) variance_20 = np.var(yvar_20, ddof=1) print("Mean of sample variance distribution w/ n=20 is :", mean_20) print("Variance of sample variance distribution w/ n=20 is :", variance_20) mean_30 = np.mean(yvar_30) variance_30 = np.var(yvar_30, ddof=1) print("Mean of sample variance distribution w/ n=30 is :", mean_30) print("Variance of sample variance distribution w/ n=30 is :", variance_30) mean_100 = np.mean(yvar_100) variance_100 = np.var(yvar_100, ddof=1) print("Mean of sample variance distribution w/ n=100 is :", mean_100) print("Variance of sample variance distribution w/ n=100 is :", variance_100) fig, ax = plt.subplots() sns.histplot(data=yvar_10, x=None, kde=True).set(title='Histogram of Sampling_dist_10') ax.set_xlim(0,15) plt.show() fig, ax = plt.subplots() sns.histplot(data=yvar_20, x=None, kde=True).set(title='Histogram of Sampling_dist_20') ax.set_xlim(0,15) plt.show() fig, ax = plt.subplots() sns.histplot(data=yvar_30, x=None, kde=True).set(title='Histogram of Sampling_dist_30') ax.set_xlim(0,15) plt.show() fig, ax = plt.subplots() sns.histplot(data=yvar_100, x=None, kde=True).set(title='Histogram of Sampling_dist_100') ax.set_xlim(0,15) plt.show() fig = plt.figure(figsize=(14,7)) # 두 개의 그래프를 한 페이지에 그림 fig, axs = plt.subplots(ncols=2) fig, ays = plt.subplots(ncols=2) sns.histplot(data=yvar_10, x=None, ax=axs[0]).set(title='Histogram of Sample_dist_10 & 20') sns.histplot(data=yvar_20, x=None, ax=axs[1]) fig.subplots_adjust(wspace=0.5) # 우측그림의 좌우 간격을 조정 plt.savefig('C:/BOOK/PyBasics/PyStat/code/sample_dist_9-1.png') sns.histplot(data=yvar_30, x=None, ax=ays[0]).set(title='Histogram of Sample_dist_30 & 100') sns.histplot(data=yvar_100, x=None, ax=ays[1]) fig.subplots_adjust(wspace=0.5) # 우측그림의 좌우 간격을 조정 plt.savefig('C:/BOOK/PyBasics/PyStat/code/sample_dist_9-2.png')