Source code for fitter.histfit

import scipy.stats
import pylab
from pylab import mean, sqrt, std


__all__ = ["HistFit"]


[docs] class HistFit: """Plot the histogram of the data (barplot) and the fitted histogram (gaussian case only) The input data can be a series. In this case, we compute the histogram. Then, we fit a curve on top on the histogram that best fit the histogram. If you already have the histogram, you can provide the density function.. In such case, we assume the data to be evenly spaced from 1 to N. If you have some data, histogram is computed, then we add some noise during the fitting process and repeat the process Nfit=20 times. This gives us a better estimate of the underlying mu and sigma parameters of the distribution. .. plot:: from fitter import HistFit import scipy.stats data = [scipy.stats.norm.rvs(2,3.4) for x in range(10000)] hf = HistFit(data, bins=30) hf.fit(error_rate=0.03, Nfit=20 ) print(hf.mu, hf.sigma, hf.amplitude) You may already have your probability density function with the X and Y series. If so, just provide them; Note that the output of the hist function returns an X with N+1 values while Y has only N values. We take care of that. .. plot:: from fitter import HistFit from pylab import hist import scipy.stats data = [scipy.stats.norm.rvs(2,3.4) for x in range(10000)] Y, X, _ = hist(data, bins=30) hf = HistFit(X=X, Y=Y) hf.fit(error_rate=0.03, Nfit=20) print(hf.mu, hf.sigma, hf.amplitude) .. warning:: This is a draft class. It currently handles only gaussian distribution. The API is probably going to change in the close future. """ def __init__(self, data=None, X=None, Y=None, bins=None): """.. rubric:: **Constructor** One should provide either the parameter **data** alone, or the X and Y parameters, which are the histogram of some data sample. :param data: random data :param X: evenly spaced X data :param Y: probability density of the data :param bins: if data is providede, we will compute the probability using hist function and bins may be provided. """ self.data = data if data: Y, X, _ = pylab.hist(self.data, bins=bins, density=True) self.N = len(X) - 1 self.X = [(X[i] + X[i + 1]) / 2 for i in range(self.N)] self.Y = Y self.A = 1 self.guess_std = pylab.std(self.data) self.guess_mean = pylab.mean(self.data) self.guess_amp = 1 else: self.X = X self.Y = Y self.Y = self.Y / sum(self.Y) if len(self.X) == len(self.Y) + 1: self.X = [(X[i] + X[i + 1]) / 2 for i in range(len(X) - 1)] self.N = len(self.X) self.guess_mean = self.X[int(self.N / 2)] self.guess_std = sqrt(sum((self.X - mean(self.X)) ** 2) / self.N) / (sqrt(2 * 3.14)) self.guess_amp = 1.0 self.func = self._func_normal def fit( self, error_rate=0.05, semilogy=False, Nfit=100, error_kwargs={"lw": 1, "color": "black", "alpha": 0.2}, fit_kwargs={"lw": 2, "color": "red"}, ): self.mus = [] self.sigmas = [] self.amplitudes = [] self.fits = [] pylab.figure(1) pylab.clf() pylab.bar(self.X, self.Y, width=0.85, ec="k") for x in range(Nfit): # 5% error on the data to add errors self.E = [scipy.stats.norm.rvs(0, error_rate) for y in self.Y] # [scipy.stats.norm.rvs(0, self.std_data * error_rate) for x in range(self.N)] self.result = scipy.optimize.least_squares(self.func, (self.guess_mean, self.guess_std, self.guess_amp)) mu, sigma, amplitude = self.result["x"] pylab.plot(self.X, amplitude * scipy.stats.norm.pdf(self.X, mu, sigma), **error_kwargs) self.sigmas.append(sigma) self.amplitudes.append(amplitude) self.mus.append(mu) self.fits.append(amplitude * scipy.stats.norm.pdf(self.X, mu, sigma)) self.sigma = mean(self.sigmas) self.amplitude = mean(self.amplitudes) self.mu = mean(self.mus) pylab.plot(self.X, self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma), **fit_kwargs) if semilogy: pylab.semilogy() pylab.grid() pylab.figure(2) pylab.clf() # pylab.bar(self.X, self.Y, width=0.85, ec="k", alpha=0.5) M = mean(self.fits, axis=0) S = pylab.std(self.fits, axis=0) pylab.fill_between(self.X, M - 3 * S, M + 3 * S, color="gray", alpha=0.5) pylab.fill_between(self.X, M - 2 * S, M + 2 * S, color="gray", alpha=0.5) pylab.fill_between(self.X, M - S, M + S, color="gray", alpha=0.5) # pylab.plot(self.X, M-S, color="k") # pylab.plot(self.X, M+S, color="k") pylab.plot(self.X, self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma), **fit_kwargs) pylab.grid() return self.mu, self.sigma, self.amplitude def _func_normal(self, param): # amplitude is supposed to be 1./(np.sqrt(2*np.pi)*sigma)* if normalised mu, sigma, A = param return sum((A * scipy.stats.norm.pdf(self.X, mu, sigma) - (self.Y + self.E)) ** 2)