Source code for mbproj2d.mcmc

# Copyright (C) 2020 Jeremy Sanders <jeremy@jeremysanders.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

# Old MCMC routines, included for backward compatibility

try:
    import emcee
except ImportError:
    emcee = None

try:
    import zeus
except ImportError:
    zeus = None

import h5py
import numpy as N

from . import forkparallel
from .fit import Likelihood
from . import utils


[docs]
class MCMC:
    """Handles MCMC analysis of Fit. Please note this is deprecated for
    new code - please see MCMCSampler/MCMCSamplerEmcee and
    MCMCStore/MCMCStoreHDF5.

    :param Fit fit: Fit object to use for mcmc
    :param int walkers: number of walkers to use
    :param int processes: number of simultaneous processes to compute likelihoods
    :param float initspread: random Gaussian width added to create initial parameters
    :param moves: moves parameter to emcee.EnsembleSampler
    :param verbose: whether to write progress
    :param sampler: 'emcee' or 'zeus'
    """

    def __init__(
            self, fit,
            nwalkers=50, processes=1, initspread=0.01, moves=None, verbose=True,
            sampler='emcee',
    ):

        self.fit = fit
        self.nwalkers = nwalkers
        self.numpars = fit.pars.numFree()
        self.initspread = initspread
        self.verbose = verbose
        self.sampler_mode = sampler

        mcmcpars = fit.pars.copy()

        def likefunc(parvals):
            mcmcpars.setFree(parvals)
            like = Likelihood(fit.images, fit.model, mcmcpars)
            return like.total

        # create a pool if more than one process
        pool = None if processes <= 1 else forkparallel.ForkQueuePool(
            likefunc, processes)

        # for doing the mcmc sampling
        if sampler == 'emcee':
            if emcee is None:
                raise RuntimeError('emcee module not installed')
            self.sampler = emcee.EnsembleSampler(
                nwalkers,
                self.numpars,
                likefunc,
                pool=pool,
                moves=moves
            )
        elif sampler == 'zeus':
            if zeus is None:
                raise RuntimeError('zeus module not installed')
            self.sampler = zeus.EnsembleSampler(
                nwalkers,
                self.numpars,
                likefunc,
                pool=pool,
            )
        else:
            raise RuntimeError('Unknown sampler')

        # starting point
        self.pos0 = None

        # header items to write to output file
        self.header = {
            'burn': 0,
        }


[docs]
    def verbprint(self, *args, **argsv):
        """Output if verbose."""
        if self.verbose:
            utils.uprint(*args, **argsv)


    def _generateInitPars(self):
        """Generate initial set of parameters from fit."""

        newpars = self.fit.pars.copy()
        thawedpars = N.array(newpars.freeVals())
        assert N.all(N.isfinite(thawedpars))

        # create enough parameters with finite likelihoods
        p0 = []
        ct = 0
        while len(p0) < self.nwalkers:
            if ct / 1000 > self.nwalkers:
                # avoid running for ever in case of a likelihood issue
                raise RuntimeError("Could not create enough parameters with a finite likelihood")
            p = N.random.normal(0, self.initspread, size=self.numpars) + thawedpars
            newpars.setFree(p)
            like = Likelihood(self.fit.images, self.fit.model, newpars).total
            if N.isfinite(like):
                p0.append(p)
            ct += 1

        return p0

    def _innerburn(self, length, autorefit, minfrac, minimprove):
        """Returns False if new minimum found and autorefit is set"""

        bestfit = None
        like = Likelihood(
            self.fit.images, self.fit.model, self.fit.pars).total
        bestlike = initlike = like
        p0 = self._generateInitPars()

        # record period
        self.header['burn'] = length

        # iterate over burn-in period
        for i, result in enumerate(self.sampler.sample(
                p0, iterations=length, store=False)):

            if i % 10 == 0:
                self.verbprint(' Burn %i / %i (%.1f%%)' % (
                    i, length, i*100/length))

            self.pos0 = result.coords
            lnlike = result.log_prob

            # new better fit
            if lnlike.max()-bestlike > minimprove:
                bestlike = lnlike.max()
                maxidx = lnlike.argmax()
                bestfit = self.pos0[maxidx]

            # abort if new minimum found
            if ( autorefit and i>length*minfrac and
                 bestfit is not None ):

                self.verbprint(
                    '  Restarting burn as new best fit has been found '
                    ' (%g > %g)' % (bestlike, initlike) )
                self.fit.pars.setFree(bestfit)
                self.sampler.reset()
                return False

        self.sampler.reset()
        return True

    def _innerburnzeus(self, length):
        """Do burn-in for zeus sampler."""

        # record period
        self.header['burn'] = length

        p0 = self._generateInitPars()
        self.sampler.run(p0, length)
        self.pos0 = self.sampler.get_chain()[-1,:,:]
        self.sampler.reset()
        return True


[docs]
    def burnIn(self, length, autorefit=True, minfrac=0.2, minimprove=0.01):
        """Burn in, restarting fit and burn if necessary.

        :param bool autorefit: refit position if new minimum is found during burn in
        :param float minfrac: minimum fraction of burn in to do if new minimum found
        :param float minimprove: minimum improvement in fit statistic to do a new fit
        """

        self.verbprint('Burning in')

        if self.sampler_mode == 'emcee':
            while not self._innerburn(length, autorefit, minfrac, minimprove):
                self.verbprint('Restarting, as new minimum found')
                self.fit.run()
        elif self.sampler_mode == 'zeus':
            self._innerburnzeus(length)



[docs]
    def run(self, length, progress=True):
        """Run chain.

        :param int length: length of chain
        """

        self.verbprint('Sampling')
        self.header['length'] = length

        # initial parameters
        if self.pos0 is None:
            self.verbprint(' Generating initial parameters')
            p0 = self._generateInitPars()
        else:
            self.verbprint(' Starting from end of burn-in position')
            p0 = self.pos0

        self.verbprint(' doing sampling')
        self.sampler.run_mcmc(p0, nsteps=length, progress=progress)

        self.verbprint('Done')



[docs]
    def save(self, outfilename, thin=1, discard=0):
        """Save chain to HDF5 file.

        :param str outfilename: output hdf5 filename
        :param int thin: save every N samples from chain
        :param int discard: discard first N samples
        """

        self.header['thin'] = thin

        self.verbprint('Saving chain to', outfilename)
        with h5py.File(outfilename, 'w') as f:
            # write header entries
            for h in sorted(self.header):
                f.attrs[h] = self.header[h]

            # write list of parameters which are thawed
            f['thawed_params'] = N.array([
                x.encode('utf-8') for x in self.fit.pars.freeKeys()])

            # output chain
            data = self.sampler.get_chain(
                thin=thin, discard=discard).astype(N.float32)
            f.create_dataset(
                'chain',
                data=data,
                maxshape=(None,data.shape[1],data.shape[2]),
                compression='gzip',
                shuffle=True)
            # likelihoods for each walker, iteration
            data = self.sampler.get_log_prob(
                thin=thin, discard=discard).astype(N.float64)
            f.create_dataset(
                'likelihood',
                data=data,
                maxshape=(None,data.shape[1]),
                compression='gzip',
                shuffle=True
            )
            if self.sampler_mode == 'emcee':
                # acceptance fraction
                f['acceptfrac'] = self.sampler.acceptance_fraction.astype(N.float32)
            # last position in chain
            f['lastpos'] = self.sampler.get_chain()[-1,:,:]

        self.verbprint('Done')