import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 2
noise = 0.05
npts = 100
np.set_printoptions(precision=3)
np.random.seed(10)
c = [-.3,1,0.5]
print(f"data generation coefficients: {c}")
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = c[2]+c[1]*x+c[0]*x*x+np.random.normal(0,noise,npts) # evaluate polynomial at x and add noise
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()

data generation coefficients: [-0.3, 1, 0.5]
first-order fit coefficients: [0.402 0.711]
second-order fit coefficients: [-0.308  1.006  0.507]

%matplotlib ipympl
import numpy as np
import matplotlib.pyplot as plt
xmin = ymin = -2
xmax = ymax = 2
noise = 0.05
npts = 100
np.set_printoptions(precision=3)
np.random.seed(10)
c = [1,-1,0.5,-1.5,2,5]
print(f"data generation coefficients: {c}")
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = ymin+(ymax-ymin)*np.random.rand(npts) # generate random y
z = c[0]+c[1]*x+c[2]*y+c[3]*x*y+c[4]*x*x+c[5]*y*y+np.random.normal(0,noise,npts) # evaluate polynomial and add noise
M = np.c_[np.ones(npts),x,y,x*y,x*x,y*y] # construct Vandermonde matrix
cfit,residuals,rank,values = np.linalg.lstsq(M,z) # do least-squares fit
print(f"fit coefficients: {cfit}")
fig = plt.figure()
fig.canvas.header_visible = False
ax = fig.add_subplot(projection='3d') # add 3D axes
ax.scatter(x,y,z)
xfit = np.linspace(xmin,xmax,npts)
yfit = np.linspace(ymin,ymax,npts)
Xfit,Yfit = np.meshgrid(xfit,yfit)
Zfit = cfit[0]+cfit[1]*Xfit+cfit[2]*Yfit+cfit[3]*Xfit*Yfit+cfit[4]*Xfit*Xfit+cfit[5]*Yfit*Yfit # evaluate fit surface
ax.plot_surface(Xfit,Yfit,Zfit,cmap='gray',alpha=0.5) # plot fit surface
plt.show()

data generation coefficients: [1, -1, 0.5, -1.5, 2, 5]
fit coefficients: [ 1.004 -0.999  0.504 -1.498  1.998  4.998]

import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 10
npts = 100
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5) # generate step to fit
xplot = np.linspace(xmin-0.2,xmax+0.2,npts)
coeff1 = np.polyfit(x,y,1) # fit and evaluate order 1 polynomial
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xplot)
coeff2 = np.polyfit(x,y,4) # fit and evaluate order 2 polynomial
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xplot)
coeff15 = np.polyfit(x,y,15) # fit and evaluate order 15 polynomial
pfit15 = np.poly1d(coeff15)
yfit15 = pfit15(xplot)
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(x,y,'bo',label='data')
plt.plot(xplot,yfit1,'g-',label='order 1')
plt.plot(xplot,yfit2,'c-',label='order 2')
plt.plot(xplot,yfit15,'r-',label='order 15')
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 10
npts = 100
np.random.seed(10)
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5) # generate step to fit
xplot = np.linspace(xmin-0.2,xmax+0.2,npts)
ncenters = 5
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 5 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
cfit5,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit5 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit5 # evaluate fit
ncenters = 50
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 50 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
cfit50,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit50 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit50 # evaluate fit
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(x,y,'bo',label='data')
plt.plot(xplot,yfit5,'g-',label='5 anchors')
plt.plot(xplot,yfit50,'r-',label='50 anchors')
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 2
noise = 0.05
npts = 100
ncenters = 20
c = [-.3,1,0.5]
np.random.seed(10)
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = c[2]+c[1]*x+c[0]*x*x+np.random.normal(0,noise,npts) # evaluate polynomial at x and add noise
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose random RBF centers from data
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
coeff,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
xfit = np.linspace(xmin,xmax,npts)
yfit = (np.abs(np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@coeff # evaluate fit
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(xfit,np.abs(xfit-centers[0])**3,color=(0.85,0.85,0.85),label='$r^3$ basis functions')
for i in range(ncenters):
    plt.plot(xfit,np.abs(xfit-centers[i])**3,color=(0.85,0.85,0.85))
plt.plot(x,y,'o')
plt.plot(xfit,yfit,'g-',label='RBF fit')
plt.ylim(-0.25,1.5)
plt.legend()
plt.show()

import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
#
# generate step function data to fit
#
xmin = 0
xmax = 10
npts = 100
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5)
coeff = np.array([1,0.5,5,2])
#
# define tanh function and residuals
#
def f(coeff,x):
    return (coeff[0]*(coeff[1]+np.tanh(coeff[2]*(x-coeff[3]))))
def residuals(coeff,x,y):
    return f(coeff,x)-y
#
# fit
#
result2 = least_squares(residuals,coeff,args=(x,y),max_nfev=2)
result10 = least_squares(residuals,coeff,args=(x,y),max_nfev=10)
resultend = least_squares(residuals,coeff,args=(x,y))
#
# plot
#
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff,x),'b-',label='start')
plt.plot(x,f(result2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result10.x,x),'g-',label='10 evaluations')
plt.plot(x,f(resultend.x,x),'r-',label='end')
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
xmin = -1
xmax = 2
noise = 1
npts = 100
c = [-3,2,1]
np.random.seed(10)
#
# generate training data
#
x = xmin+(xmax-xmin)*np.random.rand(npts)
y = c[2]+c[1]*x+c[0]*x*x+np.random.normal(0,noise,npts)
#
# generate testing data
#
xtest = xmin+(xmax-xmin)*np.random.rand(npts)
ytest = c[2]+c[1]*xtest+c[0]*xtest*xtest
#
# loop over number of centers, fit, and save
#
errors = []
coeffs = []
centers = []
ncenters = np.arange(1,101,1)
for ncenter in ncenters:
    indices = np.random.uniform(low=0,high=len(x),size=ncenter).astype(int)
    center = x[indices]
    M = np.abs(np.outer(x,np.ones(ncenter))
       -np.outer(np.ones(npts),center))**3
    coeff,residuals,rank,values = np.linalg.lstsq(M,y)
    yfit = (np.abs(np.outer(xtest,np.ones(ncenter))-np.outer(np.ones(npts),center))**3)@coeff
    errors.append(np.mean(np.abs(yfit-ytest)))
    coeffs.append(coeff)
    centers.append(center)
#
# plot data, fits, and errors
#
fig,axs = plt.subplots(2,1)
fig.canvas.header_visible = False
axs[0].plot(x,y,'o',alpha=0.5)
xplot = np.linspace(xmin,xmax,npts)
n = 1
yplot = (np.abs(np.outer(xplot,np.ones(n))-
    np.outer(np.ones(npts),centers[n-1]))**3)@coeffs[n-1]
axs[0].plot(xplot,yplot,'g-',label='1 anchor')
n = 10
yplot = (np.abs(np.outer(xplot,np.ones(n))-
    np.outer(np.ones(npts),centers[n-1]))**3)@coeffs[n-1]
axs[0].plot(xplot,yplot,'k-',label='10 anchors')
n = 100
yplot = (np.abs(np.outer(xplot,np.ones(n))-
    np.outer(np.ones(npts),centers[n-1]))**3)@coeffs[n-1]
axs[0].plot(xplot,yplot,'r-',label='100 anchors')
axs[0].legend()
axs[1].plot(ncenters,errors)
axs[1].set_ylabel('error')
axs[1].set_xlabel('number of centers')
plt.show()

import numpy as np
import matplotlib.pyplot as plt
xmin = -1
xmax = 2
noise = 1
npts = 100
c = [-3,2,1]
np.random.seed(10)
#
# generate data
#
x = xmin+(xmax-xmin)*np.random.rand(npts)
y = c[2]+c[1]*x+c[0]*x*x+np.random.normal(0,noise,npts)
#
# generate Vandermonde matrix
#
ncenters = 100
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int)
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters))
   -np.outer(np.ones(npts),centers))**3
#
# invert matrices to find weight-regularized coefficients
#
Lambda = 1
left = (M.T)@M+Lambda*np.eye(ncenters)
right = (M.T)@y
coeff1 = np.linalg.pinv(left)@right
Lambda = 0
left = (M.T)@M+Lambda*np.eye(ncenters)
right = (M.T)@y
coeff0 = np.linalg.pinv(left)@right
#
# plot fits
#
xfit = np.linspace(xmin,xmax,npts)
yfit0 = (np.abs(np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@coeff0
yfit1 = (np.abs(np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@coeff1
plt.figure()
plt.plot(x,y,'o',alpha=0.5)
plt.plot(xfit,yfit0,'y-',label=r'$\lambda=0$')
plt.plot(xfit,yfit1,'r-',label=r'$\lambda=1$')
plt.legend()
plt.show()

Fab Futures - Data Science - Curriculum 2025

Data Science: Fitting¶

Goal¶

Variables¶

scalar¶

vector¶

matrix¶

Functions¶

linear¶

affine¶

polynomial¶

nonlinear¶

sum¶

integral¶

derivative¶

distance¶

Vandermonde matrix¶

Errors¶

model estimation¶

model mismatch¶

bias-variance tradeoff¶

Fitting¶

linear least squares¶

polynomial¶

1D¶

2D¶

the problems with polynomials¶

radial basis function (RBF)¶

nonlinear least squares¶

Overfitting¶

cross-validation¶

regularization¶

bootstrap¶

Assignment¶

Review¶