Source code for simoa.mser
# -*- coding: utf-8 -*-
from __future__ import division
import logging
import math
from collections import namedtuple
import numpy as np
import scipy.stats
logger = logging.getLogger(__name__)
# check python 3
if not (3 / 2 == 1.5):
raise RuntimeError
ONE_SIGMA = scipy.stats.norm().cdf(1) - scipy.stats.norm().cdf(-1)
MSER5_TRUNCATED_MEAN_KEY = r'\bar{Z}(k,d^*)'
MSER5_TRUNCATED_VARIANCE_KEY = r'S^2_Z(k,d)'
MSER5_NEW_BATCH_MEANS_KEY = r'\bar{Z}_l(m^*,d^*)'
MSER5_NEW_BATCH_MEANS_VARIANCE_KEY = r'S^2_{\bar{Z}}(k^*, m^*, d^*)'
[docs]class MSERException(BaseException):
pass
[docs]class MSERInsufficientDataError(MSERException, RuntimeError):
pass
MSERReturnValue = namedtuple(
'MSERReturnValue',
['mean', 'ci', 'env']
)
[docs]def compute_mser5_interval(
data, confidence_level=ONE_SIGMA, continue_insufficient_data=False
):
"""
Estimate the steady-state mean and the interval by the MSER-5 method
Notes
-----
"MSER-5 (Franklin and White 2008) is a modification of the Marginal
Confidence Rule (MCR) or the Marginal Standard Error Rule (MSER) proposed
by White (1997)." [1]_
"MSER-5 is a sequential procedure that uses the method of nonoverlapping
batch means (NBM) to deliver not only a data-truncation point (i.e., end of
the “warm-up period”) but also point and CI estimators [...] computed from
the truncated (“warmed-up”) sequence of batch means." [1]_
References
----------
.. [1] Mokashi, A. C. et al. Performance comparison of MSER-5 and N-Skart
on the simulation start-up problem. In *Proceedings of the Winter
Simulation Conference*, 971-982 (2010).
http://www.ise.ncsu.edu/jwilson/files/mokashi10wsc.pdf
.. [2] Franklin, W. W. & White, K. P. Stationarity tests and MSER-5:
Exploring the intuition behind mean-squared-error-reduction in
detecting and correcting initialization bias. In Winter Simulation
Conference, 541-546 (2008).
http://www.informs-sim.org/wsc08papers/064.pdf
.. [3] White, K. P. An effective truncation heuristic for bias reduction in
simulation output. Simulation 69, 323-334 (1997).
http://dx.doi.org/10.1177/003754979706900601
"""
env = dict()
env[r'1 - \alpha'] = confidence_level
env['X_i'] = data
# number of data points
env['N'] = env['X_i'].size
# batch size
env['m'] = 5
# number of batches
env['k'] = int(math.floor(env['N'] / env['m']))
# compute batch means
env['Z_j'] = (
env['X_i']
[:env['m'] * env['k']]
.reshape(env['k'], env['m'])
.mean(axis=1)
)
env['objective'] = np.empty(env['k'] - 5)
for d in range(env['k'] - 5):
# compute objective function
env['objective'][d] = (
env['Z_j'][d:env['k']].var(ddof=0) / (env['k'] - d)
)
# compute truncation point
env['d^*'] = env['objective'].argmin()
if env['d^*'] >= math.floor(env['k'] / 2):
# insufficient data
error_msg = (
'Insufficient data: truncation point at {}, should be below {}'
).format(env['d^*'], math.floor(env['k'] / 2))
if continue_insufficient_data:
logger.error(error_msg)
else:
raise MSERInsufficientDataError(error_msg)
# compute truncated mean
env[MSER5_TRUNCATED_MEAN_KEY] = env['Z_j'][env['d^*']:].mean()
# compute truncated sample variance
env[MSER5_TRUNCATED_VARIANCE_KEY] = env['Z_j'][env['d^*']:].var(ddof=0)
"""
env['k^*'] = 20
env['m^*'] = int(math.floor((env['k'] - env['d^*']) / env['k^*']))
# To compute a CI estimator, MSER-5 organizes the truncated sequence
# into 20 “new” batch means with batch size as suggested by White and
# Robinson (2009)
env[MSER5_NEW_BATCH_MEANS_KEY] = (
env['Z_j'][
env['d^*']:
env['d^*'] + env['k^*'] * env['m^*']
]
.reshape(env['k^*'], env['m^*'])
.mean(axis=1)
)
# compute sample variance of the new batch means
env[MSER5_NEW_BATCH_MEANS_VARIANCE_KEY] = (
env[MSER5_NEW_BATCH_MEANS_KEY].var(ddof=1)
)
"""
critical_values = np.asarray(
scipy.stats.norm.interval(confidence_level)
)
env['CI'] = (
env[MSER5_TRUNCATED_MEAN_KEY] +
critical_values *
env[MSER5_TRUNCATED_VARIANCE_KEY] /
math.sqrt(env['k'] - env['d^*'])
)
return MSERReturnValue(
mean=env[MSER5_TRUNCATED_MEAN_KEY],
ci=env['CI'],
env=env,
)