问如何在python中校正过拟合的样条曲线
EN

Stack Overflow用户

提问于 2018-09-27 02:08:49

回答 2查看 313关注 0票数 0

我有一个数据集，我被要求在它的条形图上分配一条平滑的曲线。然而，我创建的图表似乎非常过拟合。他们要求我分配某种图，它可以连接一些相邻的最小值和最大值，而我不知道怎么做。任何帮助都将不胜感激。

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import spline



fig, ax = plt.subplots()
fig.set_size_inches(13,7, forward=True)

width=1.0



data=pd.DataFrame({'x':[-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
                   'y':[0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465]})

plt.bar(data['x'],data['y'],width, color='r',  alpha=0.95)
x=data['x']
y=data['y']

x_new = np.linspace(x.min(),x.max(),300)

y_smooth =spline(x,y,x_new, order=3,kind='smoothest')

plt.plot(x_new,y_smooth, color='b')

python

curve-fitting

回答 2

Stack Overflow用户

回答已采纳

发布于 2018-10-03 00:59:11

这是我最终得到的答案：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline

fig, ax = plt.subplots()
fig.set_size_inches(13,7, forward=True)
        
width=1.0
       
#data=pd.read_excel("h:/projects/psc/output/data.xlsx")

data=pd.DataFrame({'x':\[-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30\],
                   'y':\[0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465\]})
       
plt.bar(data\['x'\],data\['y'\],width, color='r',  alpha=0.95)
          
x=data\['x'\]
y=data\['y'\]
x_new = np.linspace(x.min(),x.max(),300)
spl=UnivariateSpline(x,y)
spl.set_smoothing_factor(0.001)
y_smooth =spl(x_new)
plt.plot(x_new,y_smooth, color='b', alpha=0.95)][1]

票数 0

Stack Overflow用户

发布于 2018-09-27 07:04:26

这是我从80多个峰值方程中搜索到的洛伦兹峰值方程的一个变体的拟合和绘图代码。这段代码使用scipy的differential_evolution遗传算法来估计curve_fit()中非线性求解器的初始参数。scipy遗传算法模块使用拉丁超立方体算法来确保对参数空间的彻底搜索，并要求搜索的界限，这里这些界限取自数据的最大值和最小值。

import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

xData = numpy.array([-29.0, -28.0, -27.0, -26.0, -25.0, -24.0, -23.0, -22.0, -21.0, -20.0, -19.0, -18.0, -17.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0])
yData = numpy.array([0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465])


def LorentzianPeakG_Offset(x_in, a, b, c, Offset): # from zunzun.com peak equation search
    temp = 0.0
    temp = a/ (1.0 + numpy.power((x_in-b)/c, 2.0))
    temp += Offset
    return temp

# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = LorentzianPeakG_Offset(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([minX, maxX]) # seach bounds for a
    parameterBounds.append([minX, maxX]) # seach bounds for b
    parameterBounds.append([minX, maxX]) # seach bounds for c
    parameterBounds.append([0.0, maxY]) # seach bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

# generate initial parameter values
geneticParameters = generate_Initial_Parameters()

# curve fit the test data
fittedParameters, pcov = curve_fit(LorentzianPeakG_Offset, xData, yData, geneticParameters)

print('Parameters', fittedParameters)

modelPredictions = LorentzianPeakG_Offset(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData), 250)
    yModel = LorentzianPeakG_Offset(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

这就是结果：

票数 0

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/52523786

复制

相似问题

问如何在python中校正过拟合的样条曲线
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何在python中校正过拟合的样条曲线EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

资源

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问如何在python中校正过拟合的样条曲线
EN