# Copyright (C) 2020 Jeremy Sanders <jeremy@jeremysanders.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 3 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# Old MCMC routines, included for backward compatibility
try:
import emcee
except ImportError:
emcee = None
try:
import zeus
except ImportError:
zeus = None
import h5py
import numpy as N
from . import forkparallel
from .fit import Likelihood
from . import utils
[docs]
class MCMC:
"""Handles MCMC analysis of Fit. Please note this is deprecated for
new code - please see MCMCSampler/MCMCSamplerEmcee and
MCMCStore/MCMCStoreHDF5.
:param Fit fit: Fit object to use for mcmc
:param int walkers: number of walkers to use
:param int processes: number of simultaneous processes to compute likelihoods
:param float initspread: random Gaussian width added to create initial parameters
:param moves: moves parameter to emcee.EnsembleSampler
:param verbose: whether to write progress
:param sampler: 'emcee' or 'zeus'
"""
def __init__(
self, fit,
nwalkers=50, processes=1, initspread=0.01, moves=None, verbose=True,
sampler='emcee',
):
self.fit = fit
self.nwalkers = nwalkers
self.numpars = fit.pars.numFree()
self.initspread = initspread
self.verbose = verbose
self.sampler_mode = sampler
mcmcpars = fit.pars.copy()
def likefunc(parvals):
mcmcpars.setFree(parvals)
like = Likelihood(fit.images, fit.model, mcmcpars)
return like.total
# create a pool if more than one process
pool = None if processes <= 1 else forkparallel.ForkQueuePool(
likefunc, processes)
# for doing the mcmc sampling
if sampler == 'emcee':
if emcee is None:
raise RuntimeError('emcee module not installed')
self.sampler = emcee.EnsembleSampler(
nwalkers,
self.numpars,
likefunc,
pool=pool,
moves=moves
)
elif sampler == 'zeus':
if zeus is None:
raise RuntimeError('zeus module not installed')
self.sampler = zeus.EnsembleSampler(
nwalkers,
self.numpars,
likefunc,
pool=pool,
)
else:
raise RuntimeError('Unknown sampler')
# starting point
self.pos0 = None
# header items to write to output file
self.header = {
'burn': 0,
}
[docs]
def verbprint(self, *args, **argsv):
"""Output if verbose."""
if self.verbose:
utils.uprint(*args, **argsv)
def _generateInitPars(self):
"""Generate initial set of parameters from fit."""
newpars = self.fit.pars.copy()
thawedpars = N.array(newpars.freeVals())
assert N.all(N.isfinite(thawedpars))
# create enough parameters with finite likelihoods
p0 = []
ct = 0
while len(p0) < self.nwalkers:
if ct / 1000 > self.nwalkers:
# avoid running for ever in case of a likelihood issue
raise RuntimeError("Could not create enough parameters with a finite likelihood")
p = N.random.normal(0, self.initspread, size=self.numpars) + thawedpars
newpars.setFree(p)
like = Likelihood(self.fit.images, self.fit.model, newpars).total
if N.isfinite(like):
p0.append(p)
ct += 1
return p0
def _innerburn(self, length, autorefit, minfrac, minimprove):
"""Returns False if new minimum found and autorefit is set"""
bestfit = None
like = Likelihood(
self.fit.images, self.fit.model, self.fit.pars).total
bestlike = initlike = like
p0 = self._generateInitPars()
# record period
self.header['burn'] = length
# iterate over burn-in period
for i, result in enumerate(self.sampler.sample(
p0, iterations=length, store=False)):
if i % 10 == 0:
self.verbprint(' Burn %i / %i (%.1f%%)' % (
i, length, i*100/length))
self.pos0 = result.coords
lnlike = result.log_prob
# new better fit
if lnlike.max()-bestlike > minimprove:
bestlike = lnlike.max()
maxidx = lnlike.argmax()
bestfit = self.pos0[maxidx]
# abort if new minimum found
if ( autorefit and i>length*minfrac and
bestfit is not None ):
self.verbprint(
' Restarting burn as new best fit has been found '
' (%g > %g)' % (bestlike, initlike) )
self.fit.pars.setFree(bestfit)
self.sampler.reset()
return False
self.sampler.reset()
return True
def _innerburnzeus(self, length):
"""Do burn-in for zeus sampler."""
# record period
self.header['burn'] = length
p0 = self._generateInitPars()
self.sampler.run(p0, length)
self.pos0 = self.sampler.get_chain()[-1,:,:]
self.sampler.reset()
return True
[docs]
def burnIn(self, length, autorefit=True, minfrac=0.2, minimprove=0.01):
"""Burn in, restarting fit and burn if necessary.
:param bool autorefit: refit position if new minimum is found during burn in
:param float minfrac: minimum fraction of burn in to do if new minimum found
:param float minimprove: minimum improvement in fit statistic to do a new fit
"""
self.verbprint('Burning in')
if self.sampler_mode == 'emcee':
while not self._innerburn(length, autorefit, minfrac, minimprove):
self.verbprint('Restarting, as new minimum found')
self.fit.run()
elif self.sampler_mode == 'zeus':
self._innerburnzeus(length)
[docs]
def run(self, length, progress=True):
"""Run chain.
:param int length: length of chain
"""
self.verbprint('Sampling')
self.header['length'] = length
# initial parameters
if self.pos0 is None:
self.verbprint(' Generating initial parameters')
p0 = self._generateInitPars()
else:
self.verbprint(' Starting from end of burn-in position')
p0 = self.pos0
self.verbprint(' doing sampling')
self.sampler.run_mcmc(p0, nsteps=length, progress=progress)
self.verbprint('Done')
[docs]
def save(self, outfilename, thin=1, discard=0):
"""Save chain to HDF5 file.
:param str outfilename: output hdf5 filename
:param int thin: save every N samples from chain
:param int discard: discard first N samples
"""
self.header['thin'] = thin
self.verbprint('Saving chain to', outfilename)
with h5py.File(outfilename, 'w') as f:
# write header entries
for h in sorted(self.header):
f.attrs[h] = self.header[h]
# write list of parameters which are thawed
f['thawed_params'] = N.array([
x.encode('utf-8') for x in self.fit.pars.freeKeys()])
# output chain
data = self.sampler.get_chain(
thin=thin, discard=discard).astype(N.float32)
f.create_dataset(
'chain',
data=data,
maxshape=(None,data.shape[1],data.shape[2]),
compression='gzip',
shuffle=True)
# likelihoods for each walker, iteration
data = self.sampler.get_log_prob(
thin=thin, discard=discard).astype(N.float64)
f.create_dataset(
'likelihood',
data=data,
maxshape=(None,data.shape[1]),
compression='gzip',
shuffle=True
)
if self.sampler_mode == 'emcee':
# acceptance fraction
f['acceptfrac'] = self.sampler.acceptance_fraction.astype(N.float32)
# last position in chain
f['lastpos'] = self.sampler.get_chain()[-1,:,:]
self.verbprint('Done')