import numpy

#Create　the polynomial多項式 1x^2 + 2x + 3
print(numpy.poly1d([1,2,3]))

   2
1 x + 2 x + 3

# Create the polynomial 1x^2 + 2x + 3 and use it as the function p(x)

import numpy as np

# Create the polynomial 1x^2 + 2x + 3 and call it the function p(x).”
# 多項式 1x² + 2x + 3を作り、関数p(x)とする
p = np.poly1d([1,2,3])
print(p)

# Evaluate the polynomial p(x) by substituting 1 into p(x).
# p(x)に1を代入して解を求める
p(1)
print(p(1))

   2
1 x + 2 x + 3
6

# arrayを使って多項式を作る

import numpy as np

# quadratic function
# 二次関数 x^2 + 2x +4
z = np.array([1,2,3])
p = np.poly1d(z)
print(p)

   2
1 x + 2 x + 3

# arrayを使って多項式を作る

import numpy as np

# cubic function
# 三次関数 X^3 + 2x^2 + 3x + 4
z = np.array([1,2,3,4])
p = np.poly1d(z)
print(p)

   3     2
1 x + 2 x + 3 x + 4

#### Explanation of linear least squares - polynominal

# 1. import libraries
# 1. ライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt

# 2. Set parameters
# 2. パラメータ設定
xmin = 0
xmax = 2
noise = 0.05
npts = 100
a = 0.5
b = 1
c = -.3

# 3. Create random data 
# 3. ランダムデータの生成
np.random.seed(0)
x = xmin+(xmax-xmin)*np.random.rand(npts) # generate random x
y = a+b*x+c*x*x+np.random.normal(0,noise,npts) # evaluate polynomial at x and add noise

# 4. polynomial fitting using polyfit
# 4. 多項式フィッティング
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial

# 5. preparation of fit and print
# 5. フィット曲線の準備 と　表示
xfit = np.arange(xmin,xmax,(xmax-xmin)/npts) # グラフ用のx
pfit1 = np.poly1d(coeff1) # 1次多項式を関数化
yfit1 = pfit1(xfit) # evaluate first-order fit # xfit 上で評価
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)# 2次多項式を関数化
yfit2 = pfit2(xfit) # evaluate second-order fit # xfit 上で評価
print(f"second-order fit coefficients: {coeff2}")

# 7. plot
# 7. グラフ描画
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()

plt.show()

first-order fit coefficients: [0.41918275 0.69084816]
second-order fit coefficients: [-0.3225953   1.04205042  0.49756991]

# 1. Make a CSV file

import numpy as np
import pandas as pd

# parameters
xmin = 0
xmax = 2
noise = 0.05
npts = 100
a = 0.5
b = 1
c = -0.3

# Create ramdam data
np.random.seed(0)
x = xmin + (xmax - xmin) * np.random.rand(npts)
y = a + b*x + c*x*x + np.random.normal(0, noise, npts)

df = pd.DataFrame({'x': x, 'y': y})
print(df)
df.to_csv('data/day3_random_data.csv', index=False)

           x         y
0   1.097627  1.177934
1   1.430379  1.361625
2   1.205527  1.292821
3   1.089766  1.156677
4   0.847310  1.206342
..       ...       ...
95  0.366383  0.860192
96  1.173026  1.220058
97  0.040215  0.505252
98  1.657880  1.310534
99  0.009391  0.510238

[100 rows x 2 columns]

# 2. Read the CSV file

import pandas as pd

df = pd.read_csv('data/day3_random_data.csv')
df.head() # Displays the first 5 rows of the DataFrame by default. The number of rows can be specified via an argument

# 3. Plot the value of CSV in a graph(1)

'''
Prompt(ChatGPt)
How can I use the x and y values imported from a CSV with
df = pd.read_csv('data/day3_random_data.csv')
as inputs for
coeff1 = np.polyfit(x, y, 1)  # fit first-order polynomial
and
coeff2 = np.polyfit(x, y, 2)  # fit second-order polynomial?

The answer is below
'''

import pandas as pd
import numpy as np

df = pd.read_csv('data/day3_random_data.csv')

# Use x, y as NumPy Array
# The CSV that is read becomes a pandas Series, so since it is a collection of values with labels (index),
# we convert it to a NumPy array.”
x = df['x'].values
y = df['y'].values

# polynomial fitting using polyfit
coeff1 = np.polyfit(x, y, 1)  # fit first-order polynomial
coeff2 = np.polyfit(x, y, 2)  # # fit second-order polynomial

xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit

print("First-order coefficients:", coeff1)
print("Second-order coefficients:", coeff2)

First-order coefficients: [0.41918275 0.69084816]
Second-order coefficients: [-0.3225953   1.04205042  0.49756991]

# 3. Plot the value of CSV in a graph(2)

plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()

plt.show()

import pandas as pd

df = pd.read_csv('datasets/year_totalbirth.csv')
df.head() # Displays the first 5 rows of the DataFrame by default. The number of rows can be specified via an argument

# 4. Use another CSV file
# Linear fit and Quadratic

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # add

df = pd.read_csv('datasets/year_totalbirth.csv')

# Use x, y as NumPy Array
# The CSV that is read becomes a pandas Series, so since it is a collection of values with labels (index),
# we convert it to a NumPy array.”
x = df['year'].values
y = df['total_birth'].values

# polynomial fitting using polyfit
coeff1 = np.polyfit(x, y, 1)  # fit first-order polynomial
coeff2 = np.polyfit(x, y, 2)  # # fit second-order polynomial

# The x-range for plotting (from the minimum to the maximum year)
xmin = x.min()
xmax = x.max()
npts = 200
xfit = np.linspace(xmin, xmax, npts)


xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit

print("First-order coefficients:", coeff1)
print("Second-order coefficients:", coeff2)

plt.figure(figsize=(8, 5))
plt.plot(x,y,'o',label='Actual birth')
plt.plot(xfit,yfit1,'g-',label='Linear fit')
plt.plot(xfit,yfit2,'r-',label='Quadratic fit')

plt.xlabel("Year")
plt.ylabel("Number of birth")
plt.title("Japan yearly birth(e-Stat data)")
plt.grid(True)
plt.legend()
plt.show()

First-order coefficients: [-1.91746836e+04  3.95657683e+07]
Second-order coefficients: [ 7.62939602e+01 -3.22484096e+05  3.40973469e+08]

# 4. Use another CSV file
# Linear fit, Quadratic, cubic fit, quartic fit

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # add

df = pd.read_csv('datasets/year_totalbirth.csv')

# Use x, y as NumPy Array
# The CSV that is read becomes a pandas Series, so since it is a collection of values with labels (index),
# we convert it to a NumPy array.”
x = df['year'].values
y = df['total_birth'].values

# polynomial fitting using polyfit
coeff1 = np.polyfit(x, y, 1)  # fit first-order polynomial
coeff2 = np.polyfit(x, y, 2)  # # fit second-order polynomial
coeff3 = np.polyfit(x, y, 3)  # # fit second-order polynomial
coeff4 = np.polyfit(x, y, 4)  # # fit second-order polynomial

# The x-range for plotting (from the minimum to the maximum year)
xmin = x.min()
xmax = x.max()
npts = 200
xfit = np.linspace(xmin, xmax, npts)


xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit

pfit3 = np.poly1d(coeff3)
yfit3 = pfit3(xfit) # evaluate second-order fit
pfit4 = np.poly1d(coeff4)
yfit4 = pfit4(xfit) # evaluate second-order fit

print("First-order coefficients:", coeff1)
print("Second-order coefficients:", coeff2)
print("Third-order coefficients:", coeff3)
print("Forth-order coefficients:", coeff4)

plt.figure(figsize=(8, 5))
plt.plot(x,y,'o',label='Actual birth')
plt.plot(xfit,yfit1,'g-',label='Linear fit')
plt.plot(xfit,yfit2,'r-',label='Quadratic fit')
plt.plot(xfit,yfit3,'y-',label='cubic fit')
plt.plot(xfit,yfit4,'m-',label='quartic fit')


plt.xlabel("Year")
plt.ylabel("Number of birth")
plt.title("Japan yearly birth(e-Stat data)")
plt.grid(True)
plt.legend()
plt.show()

First-order coefficients: [-1.91746836e+04  3.95657683e+07]
Second-order coefficients: [ 7.62939602e+01 -3.22484096e+05  3.40973469e+08]
Third-order coefficients: [-5.21225388e+00  3.11601420e+04 -6.21078987e+07  4.12744720e+10]
Forth-order coefficients: [ 1.85014314e-01 -1.47568524e+03  4.41357386e+06 -5.86654970e+09
  2.92405810e+12]

# 4. Use another CSV file

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # add

df = pd.read_csv('datasets/year_totalbirth.csv')

# Use x, y as NumPy Array
# The CSV that is read becomes a pandas Series, so since it is a collection of values with labels (index),
# we convert it to a NumPy array.”
x = df['year'].values
y = df['total_birth'].values

# polynomial fitting using polyfit
coeff1 = np.polyfit(x, y, 1)  # fit first-order polynomial
coeff2 = np.polyfit(x, y, 2)  # # fit second-order polynomial

# The x-range for plotting (from the minimum to the maximum year)
xmin = x.min()
xmax = x.max()
npts = 200
xfit = np.linspace(xmin, xmax, npts)


xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit

print("First-order coefficients:", coeff1)
print("Second-order coefficients:", coeff2)

plt.figure(figsize=(8, 5))
plt.plot(x,y,'o',label='Actual birth')
plt.plot(xfit,yfit1,'g-',label='Linear fit')
plt.plot(xfit,yfit2,'r-',label='Quadratic fit')

plt.xlabel("Year")
plt.ylabel("Number of birth")
plt.title("Japan yearly birth(e-Stat data)")
plt.grid(True)
plt.legend()
plt.show()

First-order coefficients: [-1.91746836e+04  3.95657683e+07]
Second-order coefficients: [ 7.62939602e+01 -3.22484096e+05  3.40973469e+08]

'''
Prompt(ChatGPT)  
The CSV file contains data for each year with the columns: Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec, for example:
1947 → 295465, 226018, 235891, 209159, 195574, 194633, 226560, 236831, 231874, 229058, 210764, 186961.  
I want to plot this data with the Y-axis showing the number of births and the X-axis representing the months.  
  
Answer
The answer is below
'''

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ReadCSV
df = pd.read_csv("datasets/year_birthmonth.csv")

# Set Year
year_to_plot = 2024
row = df[df["Year"] == year_to_plot]

# Month labels and numbers
months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
x = np.arange(1, 13)  # 1=Jan, 12=Dec
y = row[months].values.flatten()

# --- polyfit ---
coeff1 = np.polyfit(x, y, 1)  # linear
coeff2 = np.polyfit(x, y, 2)  # quadratic

pfit1 = np.poly1d(coeff1)
pfit2 = np.poly1d(coeff2)

# x軸を滑らかにする
xfit = np.linspace(1, 12, 100)
yfit1 = pfit1(xfit)
yfit2 = pfit2(xfit)

# --- Plot ---
plt.figure(figsize=(10,5))
plt.plot(x, y, 'o', label='Actual data')
plt.plot(xfit, yfit1, 'g-', label='Linear fit')
plt.plot(xfit, yfit2, 'r-', label='Quadratic fit')

plt.xticks(x, months)
plt.xlabel("Month")
plt.ylabel("Number of Births")
plt.title(f"Monthly Births in {year_to_plot} with Polynomial Fit")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend()
plt.show()

# Print fit
print("Linear fit coefficients:", coeff1)
print("Quadratic fit coefficients:", coeff2)

Linear fit coefficients: [  327.01048951 55055.51515152]
Quadratic fit coefficients: [ -115.29595405  1825.85789211 51558.20454545]

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# ReadCSV
df = pd.read_csv("datasets/year_birthmonth.csv")

# Set Year
year_to_plot = 2024
row = df[df["Year"] == year_to_plot]

# Month labela and numbers
months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
x = np.arange(1, 13)  # 1=Jan, 12=Dec
y = row[months].values.flatten()

# --- polyfit ---
coeff1 = np.polyfit(x, y, 1)  # linear
coeff2 = np.polyfit(x, y, 2)  # quadratic
coeff3 = np.polyfit(x, y, 3)  # cubic
coeff4 = np.polyfit(x, y, 4)  # quaartic
coeff5 = np.polyfit(x, y, 5)  # quintic

pfit1 = np.poly1d(coeff1)
pfit2 = np.poly1d(coeff2)
pfit3 = np.poly1d(coeff3)
pfit4 = np.poly1d(coeff4)
pfit5 = np.poly1d(coeff5)

# x軸を滑らかにする
xfit = np.linspace(1, 12, 100)
yfit1 = pfit1(xfit)
yfit2 = pfit2(xfit)
yfit3 = pfit3(xfit)
yfit4 = pfit4(xfit)
yfit5 = pfit5(xfit)

# --- Plot ---
plt.figure(figsize=(10,5))
plt.plot(x, y, 'o', label='Actual data')
plt.plot(xfit, yfit1, 'g-', label='Linear fit')
plt.plot(xfit, yfit2, 'r-', label='Quadratic fit')
plt.plot(xfit, yfit3, 'y-', label='Cubic fit')
plt.plot(xfit, yfit4, 'm-', label='Quartic fit')
plt.plot(xfit, yfit5, 'b-', label='Quintic fit')


plt.xticks(x, months)
plt.xlabel("Month")
plt.ylabel("Number of Births")
plt.title(f"Monthly Births in {year_to_plot} with Polynomial Fit")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend()
plt.show()

# Print fit
print("Linear fit coefficients:", coeff1)
print("Quadratic fit coefficients:", coeff2)

Linear fit coefficients: [  327.01048951 55055.51515152]
Quadratic fit coefficients: [ -115.29595405  1825.85789211 51558.20454545]

	year	total_birth	Unnamed: 2	Unnamed: 3	Unnamed: 4	Unnamed: 5	Unnamed: 6
0	1947	2678792	NaN	NaN	NaN	NaN	NaN
1	1950	2337507	NaN	NaN	NaN	NaN	NaN
2	1955	1730692	NaN	NaN	NaN	NaN	NaN
3	1960	1606041	NaN	NaN	NaN	NaN	NaN
4	1965	1823697	NaN	NaN	NaN	NaN	NaN

3.Fitting¶

Assignment¶

Words¶

Variables 変数¶

Functions 関数¶

Errors: 誤差¶

Fitting: フィッティング（適合）¶

Overfitting: 過学習¶

Polynomial¶

polyfit ¶

poly1d ¶

What I do here is:¶

1. Make a CSV file¶

2. Read the CSV file¶

3. Plot the value of CSV in a graph(1)¶

3. Plot the value of CSV in a graph(2)¶

4. Use another CSV file¶

Fit of Data by Year-month of birth¶

	x	y
0	1.097627	1.177934
1	1.430379	1.361625
2	1.205527	1.292821
3	1.089766	1.156677
4	0.847310	1.206342

3.Fitting¶

Assignment¶

Words¶

Variables 変数¶

Functions 関数¶

Errors: 誤差¶

Fitting: フィッティング（適合）¶

Overfitting: 過学習¶

Polynomial¶

polyfit¶

poly1d¶

What I do here is:¶

1. Make a CSV file¶

2. Read the CSV file¶

3. Plot the value of CSV in a graph(1)¶

3. Plot the value of CSV in a graph(2)¶

4. Use another CSV file¶

Fit of Data by Year-month of birth¶

polyfit ¶

poly1d ¶