import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Plot histogram of data
df = pd.read_csv('SOCR-HeightWeight.csv')
df['Height(cm)'] = 2.54 * df['Height(Inches)']
plt.figure(figsize=(10, 6))
sns.histplot(df['Height(cm)'] , kde=False, stat='density', bins=30, color='blue', alpha=0.6)

# Fit a normal distribution to the data
mean, std = np.mean(df['Height(cm)']), np.std(df['Height(cm)'])
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = stats.norm.pdf(x, mean, std)
plt.plot(x, p, 'k', linewidth=2)

# Add labels and title
plt.xlabel('Height (cm)')
plt.ylabel('Density')
plt.title('Histogram of Adult Heights with Normal Distribution Curve')

# Show plot
plt.show()


import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Load datasets
penguins = sns.load_dataset('penguins')
iris = sns.load_dataset('iris')
diamonds = sns.load_dataset('diamonds')
exercise = sns.load_dataset('exercise')

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(10, 6))

# Plot 1: Penguin flipper length
flipper_length = penguins['flipper_length_mm'].dropna()
axes[0, 0].hist(flipper_length, bins=20, density=True, color='skyblue', alpha=0.6)
mean_flipper, std_flipper = np.mean(flipper_length), np.std(flipper_length)
x_flipper = np.linspace(flipper_length.min(), flipper_length.max(), 100)
p_flipper = stats.norm.pdf(x_flipper, mean_flipper, std_flipper)
axes[0, 0].plot(x_flipper, p_flipper, 'k', linewidth=2)
axes[0, 0].set_title('Penguin Flipper Length')
axes[0, 0].set_xlabel('Flipper Length (mm)')
axes[0, 0].set_ylabel('Density')

# Plot 2: Iris sepal length
sepal_length = iris['sepal_length']
axes[0, 1].hist(sepal_length, bins=20, density=True, color='lightgreen', alpha=0.6)
mean_sepal, std_sepal = np.mean(sepal_length), np.std(sepal_length)
x_sepal = np.linspace(sepal_length.min(), sepal_length.max(), 100)
p_sepal = stats.norm.pdf(x_sepal, mean_sepal, std_sepal)
axes[0, 1].plot(x_sepal, p_sepal, 'k', linewidth=2)
axes[0, 1].set_title('Iris Sepal Length')
axes[0, 1].set_xlabel('Sepal Length (cm)')
axes[0, 1].set_ylabel('Density')

# Plot 3: Diamonds carat
carat = diamonds['carat'].dropna()
axes[1, 0].hist(carat, bins=20, density=True, color='salmon', alpha=0.6)
mean_carat, std_carat = np.mean(carat), np.std(carat)
x_carat = np.linspace(carat.min(), carat.max(), 100)
p_carat = stats.norm.pdf(x_carat, mean_carat, std_carat)
axes[1, 0].plot(x_carat, p_carat, 'k', linewidth=2)
axes[1, 0].set_title('Diamonds Carat')
axes[1, 0].set_xlabel('Carat')
axes[1, 0].set_ylabel('Density')

# Plot 4: Exercise heart rate
heart_rate = exercise['pulse'].dropna()
axes[1, 1].hist(heart_rate, bins=20, density=True, color='orange', alpha=0.6)
mean_heart_rate, std_heart_rate = np.mean(heart_rate), np.std(heart_rate)
x_heart_rate = np.linspace(heart_rate.min(), heart_rate.max(), 100)
p_heart_rate = stats.norm.pdf(x_heart_rate, mean_heart_rate, std_heart_rate)
axes[1, 1].plot(x_heart_rate, p_heart_rate, 'k', linewidth=2)
axes[1, 1].set_title('Exercise Heart Rate')
axes[1, 1].set_xlabel('Heart Rate (bpm)')
axes[1, 1].set_ylabel('Density')

# Adjust layout
plt.tight_layout()
plt.show()


import random
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

fig, axs = plt.subplots(2, 3, figsize=(10, 6))
axs = axs.flatten()
n_values = [1, 2, 3, 5, 10, 100]

for idx, n in enumerate(n_values):
    sample = list()
    for i in range(0, 1000000):
        item = sum(random.uniform(0, 1) for _ in range(n))
        sample.append(item)
    mean_value = np.mean(sample)
    std_value = np.std(sample)
    axs[idx].hist(sample, bins=100, alpha=0.7, color='blue', density=True)
    x = np.linspace(min(sample), max(sample), 1000)
    normal_dist = stats.norm.pdf(x, mean_value, std_value)
    axs[idx].plot(x, normal_dist, color='red', linewidth=2)
    axs[idx].set_title(f'n = {n}')
    axs[idx].set_xlabel('Sum of Uniform(0,1) Samples')
    axs[idx].set_ylabel('Count')
plt.tight_layout()
plt.show()


import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(-3, 3, 400)
y_original = np.exp(-x**2)
y_abs = np.exp(-np.abs(x))
y_cubed = np.exp(-abs(x**3))
y_fourth = np.exp(-x**4)

fig, axs = plt.subplots(2, 2, figsize=(10, 6))
axs[0, 0].plot(x, y_original, label='exp(-x^2)')
axs[0, 0].set_title('Original Normal Distribution')
axs[0, 0].legend()
axs[0, 1].plot(x, y_abs, label='exp(-|x|)', color='orange')
axs[0, 1].set_title('exp(-|x|)')
axs[0, 1].legend()
axs[1, 0].plot(x, y_cubed, label='exp(-x^3)', color='green')
axs[1, 0].set_title('exp(-x^3)')
axs[1, 0].legend()
axs[1, 1].plot(x, y_fourth, label='exp(-x^4)', color='red')
axs[1, 1].set_title('exp(-x^4)')
axs[1, 1].legend()
plt.tight_layout()
plt.show()

The Trouble with Normal¶

Table of Contents¶

It's Only Natural¶

Central Limit Theorem¶

Maximum Entropy¶

A Closing Thought¶