%matplotlib inline

The Central Limit Theorem#

from scipy.stats import uniform
from matplotlib.pyplot import subplots, rc
from numpy.random import default_rng

rc('font', size=14)
rc('figure', facecolor='white')

population = uniform(0, 100)

rng = default_rng(0)

# Simulate 10_000 draws samples of size 100
samples = population.rvs(size=(10_000, 100), random_state=rng)
sample_means = samples.mean(axis=1)

fig, (ax0, ax1) = subplots(2, 1, figsize=(12, 8), gridspec_kw={'hspace': .4})
ax0.hist(samples.flat, bins='auto', edgecolor='white')
ax1.hist(sample_means, bins='auto', edgecolor='white')

ax0.set_title('Histogram of all Samples Together')
ax1.set_title('Histogram of all Sample Means');
../_images/06eeaf55c14922129ba99eba8b165af0f5656c69028ddd42a3e157165a68409d.png
from matplotlib.pyplot import subplots, rc, setp, rcdefaults
from matplotlib.lines import Line2D
from numpy import linspace
from numpy.random import default_rng
from scipy.stats import norm

rcdefaults()
rc('figure', facecolor='white')
rc('axes.spines', top=False, right=False, left=False)

rng = default_rng(0)
population = norm(loc=70, scale=3)

fig, axes = subplots(
    8, 1, figsize=(6, 6),
    sharex=True, sharey=False, 
    gridspec_kw={'hspace': 0, 'height_ratios': [1.5] + ([1] * 7)}
)

xs = linspace(*population.ppf([.001, .999]))
axes.flat[0].fill_between(xs, population.pdf(xs), alpha=.5)
axes.flat[0].axvline(population.mean(), 0, .95, ls='dashed', color='k')

for i, ax in enumerate(axes.flat[1:], start=1):
    sample = population.rvs(size=10, random_state=rng)
    smean, sdev = sample.mean(), sample.std()
    
    avg = ax.axvline(smean, .3, .7, color='tab:orange', label=r'$\bar{x}$ (mean)')
    ax.hlines(.5, xmin=smean - sdev, xmax=smean + sdev, color='tab:orange', alpha=.4, label=r'$S_x$ (std. dev.)')
    ax.scatter(sample, y=[.5] * sample.size, s=8, transform=ax.get_xaxis_transform(), label=r'Observations')
    
    ax.axvline(population.mean(), 0, 1, ls='dashed', color='k')
    
    ax.text(-.05, .5, f'Simulation {i}', transform=ax.transAxes, ha='right')
    ax.margins(y=0)

for ax in axes.flat:
    ax.yaxis.set_visible(False)
    
axes.flat[0].set_title('Visualizing Sampling Variability')

class VerticalLineHandler:
    def legend_artist(self, legend, orig_handle, fontsize, handlebox):
        halfway = handlebox.xdescent + (handlebox.width / 2)
        line = Line2D(
            [halfway, halfway], 
            [handlebox.ydescent, handlebox.ydescent + handlebox.height],
            color=orig_handle.get_color(),
        )

        handlebox.add_artist(line)


ax.legend(
    handler_map={avg: VerticalLineHandler()}, 
    bbox_transform=fig.transFigure, 
    loc='center left',
    bbox_to_anchor=(.95, .5),
    title='Sample',
    title_fontproperties={'size': 14},
    scatterpoints=4,
)
<matplotlib.legend.Legend at 0x7f799ef8cd90>
../_images/20c8f51182c200ba88a38da6f5a5c96a253548f4af999e6823a2d9c3c0491dfa.png