Python練習(不偏分散の続き)
- Python練習(分散と不偏分散)の続き。
population
として与えられた数のリストからsample_size
の標本をsamples
個だけ抽出し、その「分散の平均」を求める。
- 「母分散」と「分散の平均」を比べると、\(n-1\)で割ったほうが差が小さくなる(ことを期待)。
import random
import numpy as np
def print_vars(population, ddof=0, samples=100, sample_size=100):
random.seed(314)
print('ddof =', ddof)
print('samples =', samples)
print('sample_size =', sample_size)
print('population_size = ', len(population))
vs = []
for k in range(0, samples):
l = random.sample(population, sample_size)
vs.append(np.var(l, ddof=ddof))
a = np.mean(vs)
b = np.var(population, ddof=ddof)
print('(a) mean of var = ', a)
print('(b) var = ', b)
print('(a) - (b) = ', a - b)
print()
np.random.seed(314159)
na = np.random.rand(10000) * 100
print(na)
print()
s = list(na)
print_vars(s, ddof=0)
print_vars(s, ddof=1)
[ 81.7923308 55.10462969 41.97753548 ..., 97.3474609 80.14223298
50.86926748]
ddof = 0
samples = 100
sample_size = 100
population_size = 10000
(a) mean of var = 835.758169126
(b) var = 847.233737255
(a) - (b) = -11.4755681283
ddof = 1
samples = 100
sample_size = 100
population_size = 10000
(a) mean of var = 844.200170835
(b) var = 847.318469101
(a) - (b) = -3.11829826685
参照