首页
学习
活动
专区
工具
TVP
发布
社区首页 >问答首页 >如何在python中校正过拟合的样条曲线

如何在python中校正过拟合的样条曲线
EN

Stack Overflow用户
提问于 2018-09-27 02:08:49
回答 2查看 313关注 0票数 0

我有一个数据集,我被要求在它的条形图上分配一条平滑的曲线。然而,我创建的图表似乎非常过拟合。他们要求我分配某种图,它可以连接一些相邻的最小值和最大值,而我不知道怎么做。任何帮助都将不胜感激。

代码语言:javascript
复制
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import spline



fig, ax = plt.subplots()
fig.set_size_inches(13,7, forward=True)

width=1.0



data=pd.DataFrame({'x':[-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
                   'y':[0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465]})

plt.bar(data['x'],data['y'],width, color='r',  alpha=0.95)
x=data['x']
y=data['y']

x_new = np.linspace(x.min(),x.max(),300)

y_smooth =spline(x,y,x_new, order=3,kind='smoothest')

plt.plot(x_new,y_smooth, color='b')

EN

回答 2

Stack Overflow用户

回答已采纳

发布于 2018-10-03 00:59:11

这是我最终得到的答案:

代码语言:javascript
复制
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import UnivariateSpline

fig, ax = plt.subplots()
fig.set_size_inches(13,7, forward=True)
        
width=1.0
       
#data=pd.read_excel("h:/projects/psc/output/data.xlsx")

data=pd.DataFrame({'x':\[-29, -28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30\],
                   'y':\[0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465\]})
       
plt.bar(data\['x'\],data\['y'\],width, color='r',  alpha=0.95)
          
x=data\['x'\]
y=data\['y'\]
x_new = np.linspace(x.min(),x.max(),300)
spl=UnivariateSpline(x,y)
spl.set_smoothing_factor(0.001)
y_smooth =spl(x_new)
plt.plot(x_new,y_smooth, color='b', alpha=0.95)][1]    

票数 0
EN

Stack Overflow用户

发布于 2018-09-27 07:04:26

这是我从80多个峰值方程中搜索到的洛伦兹峰值方程的一个变体的拟合和绘图代码。这段代码使用scipy的differential_evolution遗传算法来估计curve_fit()中非线性求解器的初始参数。scipy遗传算法模块使用拉丁超立方体算法来确保对参数空间的彻底搜索,并要求搜索的界限,这里这些界限取自数据的最大值和最小值。

代码语言:javascript
复制
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import differential_evolution
import warnings

xData = numpy.array([-29.0, -28.0, -27.0, -26.0, -25.0, -24.0, -23.0, -22.0, -21.0, -20.0, -19.0, -18.0, -17.0, -16.0, -15.0, -14.0, -13.0, -12.0, -11.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0, -4.0, -3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0])
yData = numpy.array([0.002383790226460071, 0.002383790226460071, 0.005164878823996822, 0.004370282081843465, 0.005164878823996822, 0.002383790226460071, 0.003178386968613429, 0.005959475566150178, 0.005959475566150178, 0.006754072308303536, 0.007945967421533572, 0.003575685339690107, 0.001986491855383393, 0.003575685339690107, 0.003972983710766786, 0.00278108859753675, 0.005959475566150178, 0.007151370679380214, 0.004767580452920143, 0.005164878823996822, 0.003575685339690107, 0.004370282081843465, 0.007945967421533572, 0.01311084624553039, 0.01072705601907032, 0.01430274135876043, 0.01231624950337704, 0.01589193484306714, 0.02264600715137068, 0.09654350417163289, 0.05164878823996821, 0.0166865315852205, 0.01549463647199046, 0.01350814461660707, 0.01191895113230036, 0.01191895113230036, 0.00874056416368693, 0.01152165276122368, 0.007151370679380214, 0.009137862534763607, 0.006356773937226857, 0.007151370679380214, 0.00834326579261025, 0.006356773937226857, 0.005562177195073501, 0.006754072308303536, 0.005164878823996822, 0.005164878823996822, 0.005959475566150178, 0.004767580452920143, 0.00278108859753675, 0.007945967421533572, 0.001589193484306714, 0.00278108859753675, 0.003178386968613429, 0.003575685339690107, 0.003178386968613429, 0.004370282081843465, 0.005562177195073501, 0.004370282081843465])


def LorentzianPeakG_Offset(x_in, a, b, c, Offset): # from zunzun.com peak equation search
    temp = 0.0
    temp = a/ (1.0 + numpy.power((x_in-b)/c, 2.0))
    temp += Offset
    return temp

# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
    warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
    val = LorentzianPeakG_Offset(xData, *parameterTuple)
    return numpy.sum((yData - val) ** 2.0)


def generate_Initial_Parameters():
    # min and max used for bounds
    maxX = max(xData)
    minX = min(xData)
    maxY = max(yData)
    minY = min(yData)

    parameterBounds = []
    parameterBounds.append([minX, maxX]) # seach bounds for a
    parameterBounds.append([minX, maxX]) # seach bounds for b
    parameterBounds.append([minX, maxX]) # seach bounds for c
    parameterBounds.append([0.0, maxY]) # seach bounds for Offset

    # "seed" the numpy random number generator for repeatable results
    result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
    return result.x

# generate initial parameter values
geneticParameters = generate_Initial_Parameters()

# curve fit the test data
fittedParameters, pcov = curve_fit(LorentzianPeakG_Offset, xData, yData, geneticParameters)

print('Parameters', fittedParameters)

modelPredictions = LorentzianPeakG_Offset(xData, *fittedParameters) 

absError = modelPredictions - yData

SE = numpy.square(absError) # squared errors
MSE = numpy.mean(SE) # mean squared errors
RMSE = numpy.sqrt(MSE) # Root Mean Squared Error, RMSE
Rsquared = 1.0 - (numpy.var(absError) / numpy.var(yData))
print('RMSE:', RMSE)
print('R-squared:', Rsquared)

print()


##########################################################
# graphics output section
def ModelAndScatterPlot(graphWidth, graphHeight):
    f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
    axes = f.add_subplot(111)

    # first the raw data as a scatter plot
    axes.plot(xData, yData,  'D')

    # create data for the fitted equation plot
    xModel = numpy.linspace(min(xData), max(xData), 250)
    yModel = LorentzianPeakG_Offset(xModel, *fittedParameters)

    # now the model as a line plot
    axes.plot(xModel, yModel)

    axes.set_xlabel('X Data') # X axis data label
    axes.set_ylabel('Y Data') # Y axis data label

    plt.show()
    plt.close('all') # clean up after using pyplot

graphWidth = 800
graphHeight = 600
ModelAndScatterPlot(graphWidth, graphHeight)

这就是结果:

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/52523786

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档