import numpy as np
import warnings
x = np.array([0.0, 1.0, 2.0, 3.0,  4.0,  5.0])
y = np.array([0.0, 0.8, 0.9, 0.1, -0.8, -1.0])

import matplotlib.pyplot as plt
plt.plot(x, y, 'o')
plt.xlabel("x")
plt.ylabel("y")
plt.show()

z = np.polyfit(x, y, 3)
z

array([ 0.08703704, -0.81349206,  1.69312169, -0.03968254])

p = np.poly1d(z)
p(0.5)

np.float64(0.6143849206349195)

p30 = np.poly1d(np.polyfit(x, y, 30))

/tmp/ipykernel_20695/2216620616.py:1: RankWarning: Polyfit may be poorly conditioned
  p30 = np.poly1d(np.polyfit(x, y, 30))

with warnings.catch_warnings():
    warnings.simplefilter('ignore', np.exceptions.RankWarning)
    p30 = np.poly1d(np.polyfit(x, y, 30))

xp = np.linspace(-2, 6, 100)
_ = plt.plot(x, y, '.', xp, p(xp), '-', xp, p30(xp), '--')
plt.ylim(-2,2)
(-2, 2)
plt.show()

import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 2
noise = 0.05
npts = 100
a = 0.5
b = 1
c = -.3
np.random.seed(0)
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = a+b*x+c*x*x+np.random.normal(0,noise,npts) # evaluate polynomial at x and add noise
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()

first-order fit coefficients: [0.41918275 0.69084816]
second-order fit coefficients: [-0.3225953   1.04205042  0.49756991]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")
df

plt.figure(figsize=(10, 5))
plt.ylim(0, 400)        
plt.xlim(2010, 2026)
plt.plot(df["year"],df["students"],'o')
plt.xlabel("Year")
plt.ylabel("Number of Students")
plt.title("FabAcademy Students per Year")
plt.show()

x=df["year"].to_numpy()
y=df["students"].to_numpy()

coeff1 = np.polyfit(x,y,1) # fit 1st-order polynomial
coeff2 = np.polyfit(x,y,2) # fit 2nd-order polynomial
coeff10 = np.polyfit(x,y,10) # fit 10th-order polynomial
coeff30 = np.polyfit(x,y,30) # fit 30th-order polynomial

npts=200
xfit = np.arange(x.min(),x.max(),(x.max()-x.min())/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
pfit10 = np.poly1d(coeff10)
yfit10 = pfit10(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff10}")
pfit30 = np.poly1d(coeff30)
yfit30 = pfit30(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff30}")

plt.figure(figsize=(8,5))
plt.ylim(0, 400)
plt.xlim(x.min()-1, x.max()+1)

# 元データ
plt.plot(x, y, 'o', label="data")

# フィット
plt.plot(xfit, pfit1(xfit), 'g-', label="linear fit")
plt.plot(xfit, pfit2(xfit), 'r-', label="quadratic fit")
plt.plot(xfit, pfit10(xfit), 'b-', label="10th-order fit")
plt.plot(xfit, pfit30(xfit), 'c-', label="30th-order fit")

plt.xlabel("Year")
plt.ylabel("Number of Students")
plt.title("FabAcademy Students per Year")
plt.legend()
plt.grid(True)
plt.show()

first-order fit coefficients: [ 7.02417582e+00 -1.39784418e+04]
second-order fit coefficients: [-3.05700549e+00  1.23481554e+04 -1.24692154e+07]
second-order fit coefficients: [-5.26922897e-20  2.12858430e-16  7.10797571e-14 -5.77744809e-10
 -1.16622166e-06  6.26676245e-07  4.75802118e+00  9.59286628e+03
 -4.87164709e+06 -5.86173729e+10  5.91965040e+13]
second-order fit coefficients: [ 1.78795778e-86 -1.44007127e-83 -6.10715195e-80 -1.06389317e-76
 -1.03140260e-73  4.55546552e-71  5.10287993e-67  1.52400870e-63
  3.30071158e-60  5.63778848e-57  7.20783548e-54  3.59048655e-51
 -1.54016639e-47 -6.99942890e-44 -1.93584155e-40 -4.26760130e-37
 -7.86801453e-34 -1.14818422e-30 -1.00438783e-27  1.10146780e-24
  8.28209481e-21  2.61061981e-17  6.20320329e-14  1.16785477e-10
  1.57779006e-07  5.25007079e-05 -5.45463312e-01 -2.25886841e+03
 -5.30683563e+06 -5.11022138e+09  2.59758947e+13]

/tmp/ipykernel_396/2768021445.py:6: RankWarning: Polyfit may be poorly conditioned
  coeff10 = np.polyfit(x,y,10) # fit 10th-order polynomial
/tmp/ipykernel_396/2768021445.py:7: RankWarning: Polyfit may be poorly conditioned
  coeff30 = np.polyfit(x,y,30) # fit 30th-order polynomial

import numpy as np
x = np.array([0, 1, 2, 3])
y = np.array([-1, 0.2, 0.9, 2.1])

A = np.vstack([x, np.ones(len(x))]).T
A

array([[0., 1.],
       [1., 1.],
       [2., 1.],
       [3., 1.]])

m, c = np.linalg.lstsq(A, y)[0]
m, c

(np.float64(1.0), np.float64(-0.9499999999999996))

import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 2
noise = 0.05
npts = 100
ncenters = 5
a = 0.5
b = 1
c = -.3
np.random.seed(0)
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = a+b*x+c*x*x+np.random.normal(0,noise,npts) # evaluate polynomial at x and add noise

indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose random RBF centers from data
print(indices)

centers = np.sort(x[indices])
M = (np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
#print(M)
b,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
xfit = np.linspace(xmin,xmax,npts)
yfit = ((np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@b # evaluate fit
plt.plot(x,y,'o')
plt.plot(xfit,yfit,'g-',label='RBF fit')
plt.plot(xfit,(xfit-centers[0])**3,color=(0.75,0.75,0.75),label='$r^3$ basis functions')
for i in range(1,ncenters):
    plt.plot(xfit,(xfit-centers[i])**3,color=(0.85,0.85,0.85))
plt.ylim(-0.25,1.5)
plt.legend()
plt.show()

[20 42 37 46 27]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
ncenters = 5
df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")

x=df["year"].to_numpy()
y=df["students"].to_numpy()

indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose random RBF centers from data
centers = np.sort(x[indices])
print(f"indices={indices}, centers={centers}")
npts = len(x)
print(npts)
M = (np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
print(M)

xmin = x.min()
xmax = x.max()

b,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
xfit = np.linspace(xmin,xmax,npts)
yfit = ((np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@b # evaluate fit

plt.figure(figsize=(8,5))
plt.ylim(0, 400)
plt.xlim(x.min()-1, x.max()+1)

plt.plot(x,y,'o')
plt.plot(xfit,yfit,'g-',label='RBF fit')
plt.plot(xfit,(xfit-centers[0])**3,color=(0.75,0.75,0.75),label='$r^3$ basis functions')
for i in range(1,ncenters):
    plt.plot(xfit,(xfit-centers[i])**3,color=(0.75,0.75,0.75))

plt.legend()
plt.show()

indices=[10  4 11  7  5], centers=[2016 2017 2019 2022 2023]
14
[[-6.400e+01 -1.250e+02 -3.430e+02 -1.000e+03 -1.331e+03]
 [-2.700e+01 -6.400e+01 -2.160e+02 -7.290e+02 -1.000e+03]
 [-8.000e+00 -2.700e+01 -1.250e+02 -5.120e+02 -7.290e+02]
 [-1.000e+00 -8.000e+00 -6.400e+01 -3.430e+02 -5.120e+02]
 [ 0.000e+00 -1.000e+00 -2.700e+01 -2.160e+02 -3.430e+02]
 [ 1.000e+00  0.000e+00 -8.000e+00 -1.250e+02 -2.160e+02]
 [ 8.000e+00  1.000e+00 -1.000e+00 -6.400e+01 -1.250e+02]
 [ 2.700e+01  8.000e+00  0.000e+00 -2.700e+01 -6.400e+01]
 [ 6.400e+01  2.700e+01  1.000e+00 -8.000e+00 -2.700e+01]
 [ 1.250e+02  6.400e+01  8.000e+00 -1.000e+00 -8.000e+00]
 [ 2.160e+02  1.250e+02  2.700e+01  0.000e+00 -1.000e+00]
 [ 3.430e+02  2.160e+02  6.400e+01  1.000e+00  0.000e+00]
 [ 5.120e+02  3.430e+02  1.250e+02  8.000e+00  1.000e+00]
 [ 7.290e+02  5.120e+02  2.160e+02  2.700e+01  8.000e+00]]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
ncenters = 5
df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")

x=df["year"].to_numpy()
y=df["students"].to_numpy()

#indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose random RBF centers from data
indices = np.random.randint(low=0,high=len(x),size=ncenters).astype(int) # choose random RBF centers from data
centers = np.sort(x[indices])
print(f"indices={indices}, centers={centers}")
npts = len(x)
print(npts)
M = (np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
#print(M)

xmin = x.min()
xmax = x.max()

b,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
xfit = np.linspace(xmin,xmax,npts)
yfit = ((np.outer(xfit,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@b # evaluate fit

plt.figure(figsize=(8,5))
plt.ylim(0, 400)
plt.xlim(x.min()-1, x.max()+1)

plt.plot(x,y,'o')
plt.plot(xfit,yfit,'g-',label='RBF fit')
plt.plot(xfit,(xfit-centers[0])**3,color=(0.75,0.75,0.75),label='$r^3$ basis functions')
for i in range(1,ncenters):
    plt.plot(xfit,(xfit-centers[i])**3,color=(0.75,0.75,0.75))

plt.legend()
plt.show()

indices=[4 5 7 1 8], centers=[2013 2016 2017 2019 2020]
14

plt.figure(figsize=(8,5))

plt.gca().set_facecolor("black")
plt.gcf().patch.set_facecolor("black")

plt.ylim(0, 400)
plt.xlim(x.min()-1, x.max()+1)
plt.xlabel("Year", color="white")
plt.ylabel("Student Count", color="white")
plt.tick_params(colors="white")

plt.plot(x, y, 'o', color="#7FFFD4", markersize=8, label="Data")

plt.plot(xfit, yfit, color="#A0C8FF", linewidth=2.2, label="RBF fit")

for i in range(ncenters):
    plt.plot(
        xfit, (xfit - centers[i])**3,
        color="#555555", linewidth=1, alpha=0.6
    )

legend = plt.legend()
plt.setp(legend.get_texts(), color="white")

plt.tight_layout()
plt.show()

import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
#
# generate step function data to fit
#
xmin = 0
xmax = 10
npts = 100
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5)
coeff = np.array([1,0.5,5,2])

#
# define tanh function and residuals
#
def f(coeff,x):
    return (coeff[0]*(coeff[1]+np.tanh(coeff[2]*(x-coeff[3]))))
def residuals(coeff,x,y):
    return f(coeff,x)-y
#
# fit
#
result2 = least_squares(residuals,coeff,args=(x,y),max_nfev=2)
result10 = least_squares(residuals,coeff,args=(x,y),max_nfev=10)
resultend = least_squares(residuals,coeff,args=(x,y))
#
# plot
#
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff,x),'b-',label='start')
plt.plot(x,f(result2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(resultend.x,x),'r-',label='end')
plt.legend()
plt.show()

import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")
x = df["year"].to_numpy()
y = df["students"].to_numpy()

coeff1 = np.array([200, 0.5, 1, 2016])  
coeff2 = np.array([100, 0, 0.5, 2016])  


def f(coeff, x):
    return coeff[0] * (coeff[1] + np.tanh(coeff[2] * (x - coeff[3])))

def residuals(coeff, x, y):
    return f(coeff, x) - y

# solve
result1_2 = least_squares(residuals,coeff1,args=(x,y),max_nfev=2)
result1_5 = least_squares(residuals,coeff1,args=(x,y),max_nfev=5)
result1_10 = least_squares(residuals,coeff1,args=(x,y),max_nfev=10)
result1_end = least_squares(residuals,coeff1,args=(x,y))

result2_2 = least_squares(residuals,coeff2,args=(x,y),max_nfev=2)
result2_5 = least_squares(residuals,coeff2,args=(x,y),max_nfev=5)
result2_10 = least_squares(residuals,coeff2,args=(x,y),max_nfev=10)
result2_end = least_squares(residuals,coeff2,args=(x,y))

# plot
plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff1,x),'b-',label='start')
plt.plot(x,f(result1_2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result1_5.x,x),'m-',label='5 evaluations')
plt.plot(x,f(result1_10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(result1_end.x,x),'r-',label='end')
#plt.legend(loc='center right',bbox_to_anchor=(1.4, 0.5))

plt.subplot(1,3,2)
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff2,x),'b-',label='start')
plt.plot(x,f(result2_2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result2_5.x,x),'m-',label='5 evaluations')
plt.plot(x,f(result2_10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(result2_end.x,x),'r-',label='end')
plt.legend(loc='center right',bbox_to_anchor=(1.4, 0.5))


plt.tight_layout()
plt.show()

import numpy as np
from scipy.optimize import least_squares
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")
x = df["year"].to_numpy()
y = df["students"].to_numpy()
#normalization
y = (y - y.min()) / (y.max() - y.min())
print(y)
#coeff = np.array([1, 0.5, 0.1, 2016])  
coeff1 = np.array([1, 0.5, 1, 2016])  
coeff2 = np.array([1, 0, 0.5, 2016])  
coeff3 = np.array([1, 0, 0.5, 2017])  


def f(coeff, x):
    return coeff[0] * (coeff[1] + np.tanh(coeff[2] * (x - coeff[3])))

def residuals(coeff, x, y):
    return f(coeff, x) - y

# solve
result1_2 = least_squares(residuals,coeff1,args=(x,y),max_nfev=2)
result1_5 = least_squares(residuals,coeff1,args=(x,y),max_nfev=5)
result1_10 = least_squares(residuals,coeff1,args=(x,y),max_nfev=10)
result1_end = least_squares(residuals,coeff1,args=(x,y))

result2_2 = least_squares(residuals,coeff2,args=(x,y),max_nfev=2)
result2_5 = least_squares(residuals,coeff2,args=(x,y),max_nfev=5)
result2_10 = least_squares(residuals,coeff2,args=(x,y),max_nfev=10)
result2_end = least_squares(residuals,coeff2,args=(x,y))

result3_2 = least_squares(residuals,coeff3,args=(x,y),max_nfev=2)
result3_5 = least_squares(residuals,coeff3,args=(x,y),max_nfev=5)
result3_10 = least_squares(residuals,coeff3,args=(x,y),max_nfev=10)
result3_end = least_squares(residuals,coeff3,args=(x,y))


# plot
plt.figure(figsize=(20,5))
plt.subplot(1,3,1)
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff1,x),'b-',label='start')
plt.plot(x,f(result1_2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result1_5.x,x),'m-',label='5 evaluations')
plt.plot(x,f(result1_10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(result1_end.x,x),'r-',label='end')
#plt.legend(loc='center right',bbox_to_anchor=(1.4, 0.5))

plt.subplot(1,3,2)
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff2,x),'b-',label='start')
plt.plot(x,f(result2_2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result2_5.x,x),'m-',label='5 evaluations')
plt.plot(x,f(result2_10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(result2_end.x,x),'r-',label='end')
#plt.legend(loc='center right',bbox_to_anchor=(1.4, 0.5))

plt.subplot(1,3,3)
plt.plot(x,y,'bo',label='data')
plt.plot(x,f(coeff3,x),'b-',label='start')
plt.plot(x,f(result3_2.x,x),'c-',label='2 evaluations')
plt.plot(x,f(result3_5.x,x),'m-',label='5 evaluations')
plt.plot(x,f(result3_10.x,x),'g-',label='10 evaluation')
plt.plot(x,f(result3_end.x,x),'r-',label='end')
plt.legend(loc='center right',bbox_to_anchor=(1.4, 0.5))
plt.tight_layout()
plt.show()

[0.         0.20361991 0.04072398 0.67420814 0.86425339 1.
 0.73303167 0.71945701 0.78280543 0.60633484 0.71040724 0.50226244
 0.66968326 0.52941176]

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

df = pd.read_csv("datasets/FA_students_graduates_2012_2025.csv")
x = df["year"].to_numpy()
y = df["students"].to_numpy()

def gaussian(x, A, mu, sigma):
    #A:Amplitude (Peak Height), μ (mu) — Mean (Center / Peak Position), σ (sigma) — Standard Deviation (Spread / Width)
    return A * np.exp(-(x - mu)**2 / (2 * sigma**2))

p0 = [300, 2017, 4]   # A=300, μ =2017, σ =4
popt, pcov = curve_fit(gaussian, x, y, p0=p0)
print(popt) # A, mu, sigma

y_fit = gaussian(x, *popt)

plt.figure(figsize=(10,5))
plt.scatter(x, y, label="data", color="blue")
plt.plot(x, y_fit, "r-", label="Gaussian fit", linewidth=2)

plt.xlabel("Year")
plt.ylabel("Students")
plt.title("Gaussian Fit to FabAcademy Students per Year")
plt.legend()
plt.grid(True)
plt.show()

[ 257.39647568 2019.59821292    5.53562808]

Week 2_1: Fitting¶

Class¶

- Fitting ¶

Assignment　　¶

Approach¶

1. Understand examples¶

linear least squares¶

polynomial fit¶

Polynomial fit @ class¶

2. Polynominal Fit @ Fab Academy data¶

3. Radial Basis Function Fit¶

RBF Fit @ Class¶

RBF Fit @ Fab Academy data¶

3. Nonlinear Least Squares¶

example @ class¶

nonlinear least squares @ Fab Academy data　　　¶

4. Gaussian fitting¶

	year	students	graduates
0	2012	73	31
1	2013	118	40
2	2014	82	68
3	2015	222	147
4	2016	264	163
5	2017	294	185
6	2018	235	164
7	2019	232	159
8	2020	246	126
9	2021	207	121
10	2022	230	117
11	2023	184	107
12	2024	221	168
13	2025	190	111

Week 2_1: Fitting¶

Class¶

- Fitting¶

Assignment ¶

Approach¶

1. Understand examples¶

linear least squares¶

polynomial fit¶

Polynomial fit @ class¶

2. Polynominal Fit @ Fab Academy data¶

3. Radial Basis Function Fit¶

RBF Fit @ Class¶

RBF Fit @ Fab Academy data¶

3. Nonlinear Least Squares¶

example @ class¶

nonlinear least squares @ Fab Academy data ¶

4. Gaussian fitting¶

- Fitting ¶

Assignment　　¶

nonlinear least squares @ Fab Academy data　　　¶