我正在尝试使用MLE拟合双指数(即两个指数或双指数的混合)数据。虽然没有这样的问题的直接例子,但我发现了一些关于使用最大似然估计进行线性(Maximum Likelihood Estimate pseudocode)、sigmoidal (https://stats.stackexchange.com/questions/66199/maximum-likelihood-curve-model-fitting-in-python)和正态(Scipy MLE fit of a normal distribution)分布拟合的提示。使用这些示例,我测试了以下代码:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import scipy.stats as stats
size = 300
def simu_dt():
## simulate Exp2 data
np.random.seed(0)
## generate random values between 0 to 1
x = np.random.rand(size)
data = []
for n in x:
if n < 0.6:
# generating 1st exp data
data.append(np.random.exponential(scale=20)) # t1
else:
# generating 2nd exp data
data.append(np.random.exponential(scale=500)) # t2
return np.array(data)
ydata2 = simu_dt() # call to generate simulated data
## trimming the data at the beginning and the end a bit
ydata2 = ydata2[np.where(2 < ydata2)]
ydata2 = ydata2[np.where(ydata2 < 3000)]
## creating the normalized log binned histogram data
bins = 10 ** np.linspace(np.log10(np.min(ydata2)), np.log10(np.max(ydata2)), 10)
counts, bin_edges = np.histogram(ydata2, bins=bins)
bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = (bin_edges[1:] - bin_edges[:-1])
counts = counts / bin_width / np.sum(counts)
## generating arbitrary x value
x1 = np.linspace(bin_centres.min(), bin_centres.max(), len(ydata2))
def MLE(params):
""" find the max likelihood """
a1, k1, k2, sd = params
yPred = (1-a1)*k1*np.exp(-k1*x1) + a1*k2*np.exp(-k2*x1)
negLL = -np.sum(stats.norm.pdf(ydata2, loc=yPred, scale=sd))
return negLL
guess = np.array([0.4, 1/30, 1/320, 0.2])
bnds = ((0, 0.9), (1/200, 1/2), (1/1000, 1/100), (0, 1))
## best function used for global fitting
results = optimize.minimize(MLE, guess, method='SLSQP', bounds=bnds)
print(results)
A1, K1, K2, _ = results.x
y_fitted = (1-A1)*K1*np.exp(-K1*x1) + A1*K2*np.exp(-K2*x1)
## plot actual data
plt.plot(bin_centres, counts, 'ko', label=" actual data")
plt.xlabel("Dwell Times (s)")
plt.ylabel("Probability")
## plot fitted data on original data
plt.plot(x1, y_fitted, c='r', linestyle='dashed', label="fit")
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.show()
fit摘要显示:
Out:
fun: -1.7494005752178573e-16
jac: array([-3.24161825e-18, 0.00000000e+00, 4.07105635e-16, -6.38053319e-14])
message: 'Optimization terminated successfully.'
nfev: 6
nit: 1
njev: 1
status: 0
success: True
x: array([0.4 , 0.03333333, 0.003125 , 0.2 ])
This is the plot showing the fit。尽管拟合似乎起作用了,但结果返回了我提供的猜测!此外,如果我改变猜测,拟合也在改变,这意味着它可能根本不收敛。我不确定我做错了什么。我只想说我不是Python和数学方面的专家。因此,任何帮助都是非常感谢的。提前谢谢。
发布于 2019-08-30 22:45:04
有几个地方我会说你犯了错误。例如,您将直接传递x1 (等距x值)而不是ydata2。然后,您使用了不适当的negativeLL,因为您应该在某些参数的假设下对自己的概率进行负对数计算。因此,你的第四个参数是unnecessary.Your函数应该是:
def MLE(params):
""" find the max likelihood """
a1, k1, k2 = params
yPred = (1-a1)*k1*np.exp(-k1*ydata2) + a1*k2*np.exp(-k2*ydata2)
negLL = -np.sum(np.log(yPred))
return negLL
由于数字原因(伸缩性很差),代码仍然无法收敛,线性化的一些建议可能会有所帮助。您可以轻松地将您的优化方法更改为L-BFGS-B,该方法应该会正确收敛。
完整代码:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
import scipy.stats as stats
size = 300000
nbins = 30
def simu_dt():
## simulate Exp2 data
np.random.seed(20)
## generate random values between 0 to 1
x = np.random.rand(size)
data = []
for n in x:
if n < 0.6:
# generating 1st exp data
data.append(np.random.exponential(scale=20)) # t1
else:
# generating 2nd exp data
data.append(np.random.exponential(scale=500)) # t2
return np.array(data)
ydata2 = simu_dt() # call to generate simulated data
## trimming the data at the beginning and the end a bit
ydata2 = ydata2[np.where(2 < ydata2)]
ydata2 = ydata2[np.where(ydata2 < 3000)]
## creating the normalized log binned histogram data
bins = 10 ** np.linspace(np.log10(np.min(ydata2)), np.log10(np.max(ydata2)), nbins)
counts, bin_edges = np.histogram(ydata2, bins=bins)
bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2
bin_width = (bin_edges[1:] - bin_edges[:-1])
counts = counts / bin_width / np.sum(counts)
## generating arbitrary x value
x1 = np.linspace(bin_centres.min(), bin_centres.max(), len(ydata2))
def MLE(params):
""" find the max likelihood """
k1, k2 = params
yPred = 0.6*k1*np.exp(-k1*ydata2) + 0.4*k2*np.exp(-k2*ydata2)
negLL = -np.sum(np.log(yPred))
return negLL
guess = np.array([1/30, 1/200])
bnds = ((1/100, 1/2), (1/1000, 1/100))
## best function used for global fitting
results = optimize.minimize(MLE, guess, bounds=bnds)
print(results)
K1, K2 = results.x
y_fitted = 0.6*K1*np.exp(-K1*x1) + 0.4*K2*np.exp(-K2*x1)
## plot actual data
plt.plot(bin_centres, counts, 'ko', label=" actual data")
plt.xlabel("Dwell Times (s)")
plt.ylabel("Probability")
## plot fitted data on original data
plt.plot(x1, y_fitted, c='r', linestyle='dashed', label="fit")
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.show()
https://stackoverflow.com/questions/57722563
复制相似问题