diff --git a/docs/likelihood.txt b/docs/likelihood.txt index a504a62d8b49ad5fefe68fe2f6d3059ac2cd3cd4..291ec818b6694d522782d58263649a153896d3a0 100644 --- a/docs/likelihood.txt +++ b/docs/likelihood.txt @@ -219,6 +219,14 @@ We provide this general-purpose class as part of tupak: An example using this likelihood can be found `on this page <https://git.ligo.org/Monash/tupak/blob/master/examples/other_examples/linear_regression_unknown_noise.py>`_. +Common likelihood functions +--------------------------- + +As well as the Gaussian likelihood defined above, tupak provides +the following common likelihood functions: + +.. autoclass:: tupak.core.likelihood.PoissonLikelihood + Likelihood for transient gravitational waves -------------------------------------------- diff --git a/examples/other_examples/radioactive_decay.py b/examples/other_examples/radioactive_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..156406da22ca603927d662dacc3b1317c0f12bbd --- /dev/null +++ b/examples/other_examples/radioactive_decay.py @@ -0,0 +1,81 @@ +#!/bin/python +""" +An example of how to use tupak to perform paramater estimation for +non-gravitational wave data. In this case, fitting the half-life and +initial radionucleotide number for Polonium 214. +""" +from __future__ import division +import tupak +import numpy as np +import matplotlib.pyplot as plt +import inspect + +from tupak.core.likelihood import PoissonLikelihood + +# A few simple setup steps +label = 'radioactive_decay' +outdir = 'outdir' +tupak.utils.check_directory_exists_and_if_not_mkdir(outdir) + +# generate a set of counts per minute for Polonium 214 with a half-life of 20 mins +halflife = 20 +N0 = 1.e-19 # initial number of radionucleotides in moles + +def decayrate(deltat, halflife, N0): + """ + Get the decay rate of a radioactive substance in a range of time intervals + (in minutes). halflife is in mins. N0 is in moles. + """ + + times = np.cumsum(deltat) # get cumulative times + times = np.insert(times, 0, 0.) + + ln2 = np.log(2.) + NA = 6.02214078e23 # Avagadro's number + + rates = (N0*NA*(np.exp(-ln2*(times[0:-1]/halflife)) + - np.exp(-ln2*(times[1:]/halflife)))/deltat) + + return rates + + +# Now we define the injection parameters which we make simulated data with +injection_parameters = dict(halflife=halflife, N0=N0) + +# These lines of code generate the fake data. Note the ** just unpacks the +# contents of the injection_parameters when calling the model function. +sampling_frequency = 1. +time_duration = 30. +time = np.arange(0, time_duration, 1./sampling_frequency) +deltat = np.diff(time) + +rates = decayrate(deltat, **injection_parameters) +# get radioactive counts +counts = np.array([np.random.poisson(rate) for rate in rates]) + +# We quickly plot the data to check it looks sensible +fig, ax = plt.subplots() +ax.plot(time[0:-1], counts, 'o', label='data') +ax.plot(time[0:-1], decayrate(deltat, **injection_parameters), '--r', label='signal') +ax.set_xlabel('time') +ax.set_ylabel('counts') +ax.legend() +fig.savefig('{}/{}_data.png'.format(outdir, label)) + +# Now lets instantiate a version of the Poisson Likelihood, giving it +# the time intervals, counts and rate model +likelihood = PoissonLikelihood(deltat, counts, decayrate) + +# Make the prior +priors = {} +priors['halflife'] = tupak.core.prior.LogUniform(1e-5, 1e5, 'halflife', + latex_label='$t_{1/2}$ (min)') +priors['N0'] = tupak.core.prior.LogUniform(1e-25, 1e-10, 'N0', + latex_label='$N_0$ (mole)') + +# And run sampler +result = tupak.run_sampler( + likelihood=likelihood, priors=priors, sampler='dynesty', npoints=500, + nlive=1000, walks=10, injection_parameters=injection_parameters, + outdir=outdir, label=label) +result.plot_corner() diff --git a/tupak/core/likelihood.py b/tupak/core/likelihood.py index be17dc09d42916ff2cb7e8dcdba877ca3707449a..26e91546be975caa24c473fab64f888289ae0a73 100644 --- a/tupak/core/likelihood.py +++ b/tupak/core/likelihood.py @@ -2,7 +2,7 @@ from __future__ import division, print_function import inspect import numpy as np - +from scipy.special import gammaln class Likelihood(object): @@ -82,7 +82,7 @@ class GaussianLikelihood(Likelihood): @staticmethod def _infer_parameters_from_function(func): """ Infers the arguments of function (except the first arg which is - assumed to be the dep. variable + assumed to be the dep. variable) """ parameters = inspect.getargspec(func).args parameters.pop(0) @@ -109,3 +109,102 @@ class GaussianLikelihood(Likelihood): # Return the summed log likelihood return -0.5 * (np.sum((res / sigma)**2) + self.N * np.log(2 * np.pi * sigma**2)) + + +class PoissonLikelihood(Likelihood): + def __init__(self, x, counts, func): + """ + A general Poisson likelihood for a rate - the model parameters are + inferred from the arguments of function, which provides a rate. + + Parameters + ---------- + + x: array_like + A dependent variable at which the Poisson rates will be calculated + counts: array_like + The data to analyse - this must be a set of non-negative integers, + each being the number of events within some interval. + func: + The python function providing the rate of events per interval to + fit to the data. The function must be defined with the first + argument being a dependent parameter (although this does not have + to be used by the function if not required). The subsequent + arguments will require priors and will be sampled over (unless a + fixed value is given). + """ + + parameters = self._infer_parameters_from_function(func) + Likelihood.__init__(self, dict.fromkeys(parameters)) + + self.x = x # the dependent variable + self.counts = counts # the counts + + # check values are non-negative integers + if isinstance(self.counts, int): + # convert to numpy array if passing a single integer + self.counts = np.array([self.counts]) + + # check array is an integer array + if self.counts.dtype.kind not in 'ui': + raise ValueError("Data must be non-negative integers") + + # check for non-negative integers + if np.any(self.counts < 0): + raise ValueError("Data must be non-negative integers") + + # save sum of log factorial of counts + self.sumlogfactorial = np.sum(gammaln(self.counts + 1)) + + self.function = func + + # Check if sigma was provided, if not it is a parameter + self.function_keys = list(self.parameters.keys()) + + @staticmethod + def _infer_parameters_from_function(func): + """ Infers the arguments of function (except the first arg which is + assumed to be the dep. variable) + """ + parameters = inspect.getargspec(func).args + parameters.pop(0) + return parameters + + @property + def N(self): + """ The number of data points """ + return len(self.counts) + + def log_likelihood(self): + # This sets up the function only parameters (i.e. not sigma) + model_parameters = {k: self.parameters[k] for k in self.function_keys} + + # Calculate the rate + rate = self.function(self.x, **model_parameters) + + # check if rate is a single value + if isinstance(rate, float): + # check rate is positive + if rate < 0.: + raise ValueError(("Poisson rate function returns a negative ", + "value!")) + + if rate == 0.: + return -np.inf + else: + # Return the summed log likelihood + return (-self.N*rate + np.sum(self.counts*np.log(rate)) + -self.sumlogfactorial) + elif isinstance(rate, np.ndarray): + # check rates are positive + if np.any(rate < 0.): + raise ValueError(("Poisson rate function returns a negative", + " value!")) + + if np.any(rate == 0.): + return -np.inf + else: + return (np.sum(-rate + self.counts*np.log(rate)) + -self.sumlogfactorial) + else: + raise ValueError("Poisson rate function returns wrong value type!")