Source code for simoa.mser

# -*- coding: utf-8 -*-

from __future__ import division

import logging
import math
from collections import namedtuple

import numpy as np
import scipy.stats

logger = logging.getLogger(__name__)


# check python 3
if not (3 / 2 == 1.5):
    raise RuntimeError


ONE_SIGMA = scipy.stats.norm().cdf(1) - scipy.stats.norm().cdf(-1)


MSER5_TRUNCATED_MEAN_KEY = r'\bar{Z}(k,d^*)'
MSER5_TRUNCATED_VARIANCE_KEY = r'S^2_Z(k,d)'
MSER5_NEW_BATCH_MEANS_KEY = r'\bar{Z}_l(m^*,d^*)'
MSER5_NEW_BATCH_MEANS_VARIANCE_KEY = r'S^2_{\bar{Z}}(k^*, m^*, d^*)'


[docs]class MSERException(BaseException):
    pass


[docs]class MSERInsufficientDataError(MSERException, RuntimeError):
    pass


MSERReturnValue = namedtuple(
    'MSERReturnValue',
    ['mean', 'ci', 'env']
)


[docs]def compute_mser5_interval(
    data, confidence_level=ONE_SIGMA, continue_insufficient_data=False
):
    """
    Estimate the steady-state mean and the interval by the MSER-5 method

    Notes
    -----
    "MSER-5 (Franklin and White 2008) is a modification of the Marginal
    Confidence Rule (MCR) or the Marginal Standard Error Rule (MSER) proposed
    by White (1997)." [1]_
    "MSER-5 is a sequential procedure that uses the method of nonoverlapping
    batch means (NBM) to deliver not only a data-truncation point (i.e., end of
    the “warm-up period”) but also point and CI estimators [...] computed from
    the truncated (“warmed-up”) sequence of batch means." [1]_

    References
    ----------
    .. [1] Mokashi, A. C. et al. Performance comparison of MSER-5 and N-Skart
       on the simulation start-up problem. In *Proceedings of the Winter
       Simulation Conference*, 971-982 (2010).
       http://www.ise.ncsu.edu/jwilson/files/mokashi10wsc.pdf
    .. [2] Franklin, W. W. & White, K. P. Stationarity tests and MSER-5:
       Exploring the intuition behind mean-squared-error-reduction in
       detecting and correcting initialization bias. In Winter Simulation
       Conference, 541-546 (2008).
       http://www.informs-sim.org/wsc08papers/064.pdf
    .. [3] White, K. P. An effective truncation heuristic for bias reduction in
       simulation output. Simulation 69, 323-334 (1997).
       http://dx.doi.org/10.1177/003754979706900601
    """

    env = dict()

    env[r'1 - \alpha'] = confidence_level

    env['X_i'] = data

    # number of data points
    env['N'] = env['X_i'].size

    # batch size
    env['m'] = 5

    # number of batches
    env['k'] = int(math.floor(env['N'] / env['m']))

    # compute batch means
    env['Z_j'] = (
        env['X_i']
        [:env['m'] * env['k']]
        .reshape(env['k'], env['m'])
        .mean(axis=1)
    )

    env['objective'] = np.empty(env['k'] - 5)
    for d in range(env['k'] - 5):
        # compute objective function
        env['objective'][d] = (
            env['Z_j'][d:env['k']].var(ddof=0) / (env['k'] - d)
        )

    # compute truncation point
    env['d^*'] = env['objective'].argmin()

    if env['d^*'] >= math.floor(env['k'] / 2):
        # insufficient data
        error_msg = (
            'Insufficient data: truncation point at {}, should be below {}'
        ).format(env['d^*'], math.floor(env['k'] / 2))

        if continue_insufficient_data:
            logger.error(error_msg)
        else:
            raise MSERInsufficientDataError(error_msg)

    # compute truncated mean
    env[MSER5_TRUNCATED_MEAN_KEY] = env['Z_j'][env['d^*']:].mean()

    # compute truncated sample variance
    env[MSER5_TRUNCATED_VARIANCE_KEY] = env['Z_j'][env['d^*']:].var(ddof=0)

    """
    env['k^*'] = 20
    env['m^*'] = int(math.floor((env['k'] - env['d^*']) / env['k^*']))

    # To compute a CI estimator, MSER-5 organizes the truncated sequence
    # into 20 “new” batch means with batch size as suggested by White and
    # Robinson (2009)
    env[MSER5_NEW_BATCH_MEANS_KEY] = (
        env['Z_j'][
            env['d^*']:
            env['d^*'] + env['k^*'] * env['m^*']
        ]
        .reshape(env['k^*'], env['m^*'])
        .mean(axis=1)
    )

    # compute sample variance of the new batch means
    env[MSER5_NEW_BATCH_MEANS_VARIANCE_KEY] = (
        env[MSER5_NEW_BATCH_MEANS_KEY].var(ddof=1)
    )
    """

    critical_values = np.asarray(
        scipy.stats.norm.interval(confidence_level)
    )

    env['CI'] = (
        env[MSER5_TRUNCATED_MEAN_KEY] +
        critical_values *
        env[MSER5_TRUNCATED_VARIANCE_KEY] /
        math.sqrt(env['k'] - env['d^*'])
    )

    return MSERReturnValue(
        mean=env[MSER5_TRUNCATED_MEAN_KEY],
        ci=env['CI'],
        env=env,
    )
Source code for simoa.mser

pysimoa

Navigation