Source code for cpnest.cpnest

#! /usr/bin/env python
# coding: utf-8

import multiprocessing as mp
from ctypes import c_double, c_int
import numpy as np
import os
import sys
import signal
import logging

from multiprocessing.sharedctypes import Value, Array
from multiprocessing import Lock
from multiprocessing.managers import SyncManager

import cProfile

from .utils import LEVELS, LogFile

# module logger takes name according to its path
LOGGER = logging.getLogger('cpnest.cpnest')

[docs]class CheckPoint(Exception):
    pass


[docs]def sighandler(signal, frame):
    # print("Handling signal {}".format(signal))
    LOGGER.critical("Handling signal {}".format(signal))
    raise CheckPoint()


[docs]class CPNest(object):
    """
    Class to control CPNest sampler
    cp = CPNest(usermodel,nlive=100,output='./',verbose=0,seed=None,maxmcmc=100,nthreads=None,balanced_sampling = True)

    Input variables:
    =====

    usermodel: :obj:`cpnest.Model`
        a user-defined model to analyse

    nlive: `int`
        Number of live points (100)

    poolsize: `int`
        Number of objects in the sampler pool (100)

    output : `str`
        output directory (./)

    verbose: `int`
        Verbosity:
            0: no display of information on screen, save the NS chain and evidence
            1: 0 + display progress on screen
            2: 1 + display diagnostics (ACL), save the posterior samples and trace plots and posterior plots
            3: 2 + save chains from individual samplers
            default: 0

    seed: `int`
        random seed (default: 1234)

    maxmcmc: `int`
        maximum MCMC points for sampling chains (100)

    nthreads: `int` or `None`
        number of parallel samplers. Default (None) uses mp.cpu_count() to autodetermine

    nhamiltonian: `int`
        number of sampler threads using an hamiltonian samplers. Default: 0

    nslice: `int`
            number of sampler threads using an ensemble slice samplers. Default: 0

    resume: `boolean`
        determines whether cpnest will resume a run or run from scratch. Default: False.

    proposal: `dict`
        dictionary of lists with custom jump proposals.
        key 'mhs' for the Metropolis-Hastings sampler,
        'hmc' for the Hamiltonian Monte-Carlo sampler,
        'sli' for the slice sampler.
        'hmc' for the Hamiltonian Monte-Carlo sampler. Default: None

    prior_sampling: `boolean`
        generates samples from the prior

    n_periodic_checkpoint: `int`
        **deprecated**
        checkpoint the sampler every n_periodic_checkpoint iterations
        Default: None (disabled)

    periodic_checkpoint_interval: `float`
        checkpoing the sampler every periodic_checkpoint_interval seconds
        Default: None (disabled)

    prior_sampling: boolean
        produce nlive samples from the prior.
        Default: False

    """
    def __init__(self,
                 usermodel,
                 nlive        = 100,
                 poolsize     = 100,
                 output       = './',
                 verbose      = 0,
                 seed         = None,
                 maxmcmc      = 100,
                 nthreads     = None,
                 nhamiltonian = 0,
                 nslice       = 0,
                 resume       = False,
                 proposals     = None,
                 n_periodic_checkpoint = None,
                 periodic_checkpoint_interval=None,
                 prior_sampling = False
                 ):
        if nthreads is None:
            self.nthreads = mp.cpu_count()
        else:
            self.nthreads = nthreads

        output = os.path.join(output, '')
        os.makedirs(output, exist_ok=True)

        self.verbose  = verbose
        self.output   = output
        self.logger = logging.getLogger('cpnest.cpnest.CPNest')

        # The LogFile context manager ensures everything within is logged to
        # 'cpnest.log' but the file handler is safely closed once the run is
        # finished.
        self.log_file = LogFile(os.path.join(self.output, 'cpnest.log'),
                                verbose=self.verbose)

        with self.log_file:
            # Everything in this context is logged to `log_file`
            msg = 'Running with {0} parallel threads'.format(self.nthreads)
            self.logger.critical(msg)

            if n_periodic_checkpoint is not None:
                self.logger.critical(
                    "The n_periodic_checkpoint kwarg is deprecated, "
                    "use periodic_checkpoint_interval instead."
                )
            if periodic_checkpoint_interval is None:
                periodic_checkpoint_interval = np.inf

            from .sampler import HamiltonianMonteCarloSampler, MetropolisHastingsSampler, SliceSampler
            from .NestedSampling import NestedSampler
            from .proposal import DefaultProposalCycle, HamiltonianProposalCycle, EnsembleSliceProposalCycle

            if proposals is None:
                proposals = dict(mhs=DefaultProposalCycle,
                                 hmc=HamiltonianProposalCycle,
                                 sli=EnsembleSliceProposalCycle)
            elif type(proposals) == list:
                proposals = dict(mhs=proposals[0],
                                 hmc=proposals[1],
                                 sli=proposals[2])
            self.nlive    = nlive
            self.poolsize = poolsize
            self.posterior_samples = None
            self.prior_sampling = prior_sampling
            self.manager = RunManager(
                nthreads=self.nthreads,
                periodic_checkpoint_interval=periodic_checkpoint_interval
            )
            self.manager.start()
            self.user     = usermodel
            self.resume = resume

            if seed is None: self.seed = 1234
            else:
                self.seed=seed

            self.process_pool = []

            # instantiate the nested sampler class
            resume_file = os.path.join(output, "nested_sampler_resume.pkl")
            if not os.path.exists(resume_file) or resume == False:
                self.NS = NestedSampler(self.user,
                            nlive          = nlive,
                            output         = output,
                            verbose        = verbose,
                            seed           = self.seed,
                            prior_sampling = self.prior_sampling,
                            manager        = self.manager)
            else:
                self.NS = NestedSampler.resume(resume_file, self.manager,
                                               self.user)

            nmhs = self.nthreads-nhamiltonian-nslice
            # instantiate the sampler class
            for i in range(nmhs):
                resume_file = os.path.join(output,
                                           "sampler_{0:d}.pkl".format(i))
                if not os.path.exists(resume_file) or resume == False:
                    sampler = MetropolisHastingsSampler(self.user,
                                    maxmcmc,
                                    verbose     = verbose,
                                    output      = output,
                                    poolsize    = poolsize,
                                    seed        = self.seed+i,
                                    proposal    = proposals['mhs'](),
                                    resume_file = resume_file,
                                    sample_prior = prior_sampling,
                                    manager     = self.manager
                                    )
                else:
                    sampler = MetropolisHastingsSampler.resume(resume_file,
                                                            self.manager,
                                                            self.user)

                p = mp.Process(target=sampler.produce_sample)
                self.process_pool.append(p)

            for i in range(nhamiltonian):
                resume_file = os.path.join(output,
                                           "sampler_{0:d}.pkl".format(i))
                if not os.path.exists(resume_file) or resume == False:
                    sampler = HamiltonianMonteCarloSampler(
                        self.user,
                        maxmcmc,
                        verbose     = verbose,
                        output      = output,
                        poolsize    = poolsize,
                        seed        = self.seed+nmhs+i,
                        proposal    = proposals['hmc'](model=self.user),
                        resume_file = resume_file,
                        sample_prior = prior_sampling,
                        manager     = self.manager
                    )
                else:
                    sampler = HamiltonianMonteCarloSampler.resume(resume_file,
                                                                self.manager,
                                                                self.user)
                p = mp.Process(target=sampler.produce_sample)
                self.process_pool.append(p)

            for i in range(nslice):
                resume_file = os.path.join(output, "sampler_{0:d}.pkl".format(i))
                if not os.path.exists(resume_file) or resume == False:
                    sampler = SliceSampler(self.user,
                                    maxmcmc,
                                    verbose     = verbose,
                                    output      = output,
                                    poolsize    = poolsize,
                                    seed        = self.seed+nmhs+nhamiltonian+i,
                                    proposal    = proposals['sli'](),
                                    resume_file = resume_file,
                                    manager     = self.manager
                                    )
                else:
                    sampler = SliceSampler.resume(resume_file,
                                                  self.manager,
                                                  self.user)
                p = mp.Process(target=sampler.produce_sample)
                self.process_pool.append(p)

[docs]    def run(self):
        """
        Run the sampler
        """

        with self.log_file:
            # Everything in this context is logged to `log_file`

            if self.resume:
                signal.signal(signal.SIGTERM, sighandler)
                signal.signal(signal.SIGALRM, sighandler)
                signal.signal(signal.SIGQUIT, sighandler)
                signal.signal(signal.SIGINT, sighandler)
                signal.signal(signal.SIGUSR1, sighandler)
                signal.signal(signal.SIGUSR2, sighandler)

            #self.p_ns.start()
            for each in self.process_pool:
                each.start()
            try:
                self.NS.nested_sampling_loop()
                for each in self.process_pool:
                    each.join()
            except CheckPoint:
                self.checkpoint()
                sys.exit(130)

            if self.verbose >= 2:
                self.logger.critical(
                    "Saving nested samples in {0}".format(self.output)
                )
                self.nested_samples = self.get_nested_samples()
                self.logger.critical(
                    "Saving posterior samples in {0}".format(self.output)
                )
                self.posterior_samples = self.get_posterior_samples()
            else:
                self.nested_samples = self.get_nested_samples(filename=None)
                self.posterior_samples = self.get_posterior_samples(
                    filename=None
                )
            if self.verbose>=3 or self.NS.prior_sampling:
                self.prior_samples = self.get_prior_samples(filename=None)
            if self.verbose>=3 and not self.NS.prior_sampling:
                self.mcmc_samples = self.get_mcmc_samples(filename=None)
            if self.verbose>=2:
                self.plot(corner = False)

            #TODO: Clean up the resume pickles

[docs]    def get_nested_samples(self, filename='nested_samples.dat'):
        """
        returns nested sampling chain
        Parameters
        ----------
        filename : string
                   If given, file to save nested samples to

        Returns
        -------
        pos : :obj:`numpy.ndarray`
        """
        import numpy.lib.recfunctions as rfn
        self.nested_samples = rfn.stack_arrays(
                [s.asnparray()
                    for s in self.NS.nested_samples]
                ,usemask=False)
        if filename:
            np.savetxt(os.path.join(
                self.NS.output_folder, filename),
                self.nested_samples.ravel(),
                header=' '.join(self.nested_samples.dtype.names),
                newline='\n',delimiter=' ')
        return self.nested_samples

[docs]    def get_posterior_samples(self, filename='posterior.dat'):
        """
        Returns posterior samples

        Parameters
        ----------
        filename : string
                   If given, file to save posterior samples to

        Returns
        -------
        pos : :obj:`numpy.ndarray`
        """
        import numpy as np
        import os
        from .nest2pos import draw_posterior_many
        nested_samples     = self.get_nested_samples()
        posterior_samples  = draw_posterior_many([nested_samples],[self.nlive],verbose=self.verbose)
        posterior_samples  = np.array(posterior_samples)
        self.prior_samples = {n:None for n in self.user.names}
        self.mcmc_samples  = {n:None for n in self.user.names}
        # if we run with full verbose, read in and output
        # the mcmc thinned posterior samples
        if self.verbose >= 3:
            from .nest2pos import resample_mcmc_chain
            from numpy.lib.recfunctions import stack_arrays

            prior_samples = []
            mcmc_samples  = []
            for file in os.listdir(self.NS.output_folder):
                if 'prior_samples' in file:
                    prior_samples.append(np.genfromtxt(os.path.join(self.NS.output_folder,file), names = True))
                    os.system('rm {0}'.format(os.path.join(self.NS.output_folder,file)))
                elif 'mcmc_chain' in file:
                    mcmc_samples.append(resample_mcmc_chain(np.genfromtxt(os.path.join(self.NS.output_folder,file), names = True)))
                    os.system('rm {0}'.format(os.path.join(self.NS.output_folder,file)))

            # first deal with the prior samples
            if len(prior_samples)>0:
                self.prior_samples = stack_arrays([p for p in prior_samples])
                if filename:
                    np.savetxt(os.path.join(
                           self.NS.output_folder,'prior.dat'),
                           self.prior_samples.ravel(),
                           header=' '.join(self.prior_samples.dtype.names),
                           newline='\n',delimiter=' ')
            # now stack all the mcmc chains
            if len(mcmc_samples)>0:
                self.mcmc_samples = stack_arrays([p for p in mcmc_samples])
                if filename:
                    np.savetxt(os.path.join(
                           self.NS.output_folder,'mcmc.dat'),
                           self.mcmc_samples.ravel(),
                           header=' '.join(self.mcmc_samples.dtype.names),
                           newline='\n',delimiter=' ')

        # TODO: Replace with something to output samples in whatever format
        if filename:
            np.savetxt(os.path.join(
                self.NS.output_folder, filename),
                posterior_samples.ravel(),
                header=' '.join(posterior_samples.dtype.names),
                newline='\n',delimiter=' ')
        return posterior_samples

[docs]    def get_prior_samples(self, filename='prior.dat'):
        """
        Returns prior samples

        Parameters
        ----------
        filename : string
                   If given, file to save posterior samples to

        Returns
        -------
        pos : :obj:`numpy.ndarray`
        """
        import numpy as np
        import os

        from numpy.lib.recfunctions import stack_arrays

        # read in the samples from the prior coming from each sampler
        prior_samples = []
        for file in os.listdir(self.NS.output_folder):
            if 'prior_samples' in file:
                prior_samples.append(np.genfromtxt(os.path.join(self.NS.output_folder,file), names = True))
                os.system('rm {0}'.format(os.path.join(self.NS.output_folder,file)))

        # if we sampled the prior, the nested samples are samples from the prior
        if self.NS.prior_sampling:
            prior_samples.append(self.get_nested_samples())

        if not prior_samples:
            self.logger.critical('ERROR, no prior samples found!')
            return None

        prior_samples = stack_arrays([p for p in prior_samples])
        if filename:
            np.savetxt(os.path.join(
                       self.NS.output_folder, filename),
                       self.prior_samples.ravel(),
                       header=' '.join(self.prior_samples.dtype.names),
                       newline='\n',delimiter=' ')

        return prior_samples

[docs]    def get_mcmc_samples(self, filename='mcmc.dat'):
        """
        Returns resampled mcmc samples

        Parameters
        ----------
        filename : string
                   If given, file to save posterior samples to

        Returns
        -------
        pos : :obj:`numpy.ndarray`
        """
        import numpy as np
        import os
        from .nest2pos import resample_mcmc_chain
        from numpy.lib.recfunctions import stack_arrays

        mcmc_samples  = []
        for file in os.listdir(self.NS.output_folder):
            if 'mcmc_chain' in file:
                mcmc_samples.append(resample_mcmc_chain(np.genfromtxt(os.path.join(self.NS.output_folder,file), names = True)))
                os.system('rm {0}'.format(os.path.join(self.NS.output_folder,file)))

        if not mcmc_samples:
            self.logger.critical('ERROR, no MCMC samples found!')
            return None

        # now stack all the mcmc chains
        mcmc_samples = stack_arrays([p for p in mcmc_samples])
        if filename:
            np.savetxt(os.path.join(
                       self.NS.output_folder, filename),
                       self.mcmc_samples.ravel(),
                       header=' '.join(self.mcmc_samples.dtype.names),
                       newline='\n',delimiter=' ')
        return mcmc_samples

[docs]    def plot(self, corner = True):
        """
        Make diagnostic plots of the posterior and nested samples
        """
        pos = self.posterior_samples
        if self.verbose>=3 and self.NS.prior_sampling is False:
            pri = self.prior_samples
            mc  = self.mcmc_samples
        elif self.verbose>=3 or self.NS.prior_sampling is True:
            pri = self.prior_samples
            mc  = None
        else:
            pri = None
            mc  = None
        from . import plot
        if self.NS.prior_sampling is False:
            for n in pos.dtype.names:
                plot.plot_hist(pos[n].ravel(), name = n,
                               prior_samples = self.prior_samples[n].ravel() if pri is not None else None,
                               mcmc_samples = self.mcmc_samples[n].ravel() if mc is not None else None,
                               filename = os.path.join(self.output,'posterior_{0}.pdf'.format(n)))
        for n in self.nested_samples.dtype.names:
            plot.plot_chain(self.nested_samples[n],name=n,filename=os.path.join(self.output,'nschain_{0}.pdf'.format(n)))
        if self.NS.prior_sampling is False:
            import numpy as np
            plotting_posteriors = np.squeeze(pos.view((pos.dtype[0], len(pos.dtype.names))))
            if pri is not None:
                plotting_priors = np.squeeze(pri.view((pri.dtype[0], len(pri.dtype.names))))
            else:
                plotting_priors = None

            if mc is not None:
                plotting_mcmc   = np.squeeze(mc.view((mc.dtype[0], len(mc.dtype.names))))
            else:
                plotting_mcmc   = None

            if corner:
                plot.plot_corner(plotting_posteriors,
                                 ps=plotting_priors,
                                 ms=plotting_mcmc,
                                 labels=pos.dtype.names,
                                 filename=os.path.join(self.output,'corner.pdf'))

[docs]    def worker_sampler(self, producer_pipe, logLmin):
        cProfile.runctx('self.sampler.produce_sample(producer_pipe, logLmin)', globals(), locals(), 'prof_sampler.prof')

[docs]    def worker_ns(self):
        cProfile.runctx('self.NS.nested_sampling_loop(self.consumer_pipes)', globals(), locals(), 'prof_nested_sampling.prof')

[docs]    def profile(self):
        for i in range(0,self.NUMBER_OF_PRODUCER_PROCESSES):
            p = mp.Process(target=self.worker_sampler, args=(self.queues[i%len(self.queues)], self.NS.logLmin ))
            self.process_pool.append(p)
        for i in range(0,self.NUMBER_OF_CONSUMER_PROCESSES):
            p = mp.Process(target=self.worker_ns, args=(self.queues, self.port, self.authkey))
            self.process_pool.append(p)
        for each in self.process_pool:
            each.start()

[docs]    def checkpoint(self):
        self.manager.checkpoint_flag=1


[docs]class RunManager(SyncManager):
    def __init__(self, nthreads=None, **kwargs):
        self.periodic_checkpoint_interval = kwargs.pop(
            "periodic_checkpoint_interval", np.inf
        )
        super(RunManager,self).__init__(**kwargs)
        self.nconnected=mp.Value(c_int,0)
        self.producer_pipes = list()
        self.consumer_pipes = list()
        for i in range(nthreads):
            consumer, producer = mp.Pipe(duplex=True)
            self.producer_pipes.append(producer)
            self.consumer_pipes.append(consumer)
        self.logLmin = None
        self.logLmax = None
        self.nthreads=nthreads

[docs]    def start(self):
        super(RunManager, self).start()
        self.logLmin = mp.Value(c_double,-np.inf)
        self.logLmax = mp.Value(c_double,-np.inf)
        self.checkpoint_flag=mp.Value(c_int,0)

[docs]    def connect_producer(self):
        """
        Returns the producer's end of the pipe
        """
        with self.nconnected.get_lock():
            n = self.nconnected.value
            pipe = self.producer_pipes[n]
            self.nconnected.value+=1
        return pipe, n