< Home
Session3: Fitting 25/11/2025¶
Liner and Polynomial¶
Import libraries¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.set_printoptions(precision=3) # Make numpy print cleaner
df = pd.read_csv('datasets/youtube_video.csv')# Load the CSV file using pandas
print("CSV Columns:", df.columns.tolist())
df.head()
CSV Columns: ['video_id', 'title', 'channel_name', 'channel_id', 'view_count', 'like_count', 'comment_count', 'published_date', 'thumbnail']
Out[1]:
| video_id | title | channel_name | channel_id | view_count | like_count | comment_count | published_date | thumbnail | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | QwtOeDR-N3g | Why do Human Feet Wash up on This Beach? | Fas... | Fascinating Horror | UCFXad0mx4WxY1fXdbvtg0CQ | 33955 | 1817 | 119 | 2025-10-03T09:15:02Z | https://i.ytimg.com/vi/QwtOeDR-N3g/hqdefault.jpg |
| 1 | K0fw1uiSGE0 | Range Rover SV (2025) - The King of the City, ... | CAR TV | UCs_gcVRMHEx5mVXfQ7icQTg | 59659 | 1130 | 22 | 2025-10-08T09:10:46Z | https://i.ytimg.com/vi/K0fw1uiSGE0/hqdefault.jpg |
| 2 | StuKQpzfyjI | Bruce Wayne Becomes Batman - NEW Gotham City U... | Just4fun290 | UCdPG86Ahrf9A8YzBnFDAaEg | 8658 | 89 | 0 | 2025-10-04T15:00:06Z | https://i.ytimg.com/vi/StuKQpzfyjI/hqdefault.jpg |
| 3 | C91854vbs8I | How the music of Mario Kart has evolved | Thomas Game Docs | UCyy7dZhgfeMMctSoo3wDXlQ | 161045 | 5391 | 239 | 2025-05-08T15:32:15Z | https://i.ytimg.com/vi/C91854vbs8I/hqdefault.jpg |
| 4 | 1DaHVSmx160 | My bulldog loves mess... you can see that )))) | Enzo English Bulldog | UCkk_2IfQ52OJcCUXOrJBtnw | 7620 | 94 | 24 | 2021-08-12T15:33:31Z | https://i.ytimg.com/vi/1DaHVSmx160/hqdefault.jpg |
Select two numeric columns (x, y)¶
In [2]:
numeric_df = df.select_dtypes(include='number') # Select numeric columns
print("Numeric columns:", numeric_df.columns.tolist())
# Choose two numeric columns for fitting
# (edit these names based on your file)
x = numeric_df.iloc[:,0].values # First numeric column
y = numeric_df.iloc[:,1].values # Second numeric column
Numeric columns: ['view_count', 'like_count', 'comment_count']
Polynomial fitting¶
In [3]:
coeff1 = np.polyfit(x, y, 1) # Fit 1st-order and 2nd-order polynomials
coeff2 = np.polyfit(x, y, 2)
print("Linear fit coefficients:", coeff1)
print("Quadratic fit coefficients:", coeff2)
p1 = np.poly1d(coeff1) # Create polynomial functions
p2 = np.poly1d(coeff2)
Linear fit coefficients: [4.24140234e-03 1.44382848e+04] Quadratic fit coefficients: [-9.32512108e-13 7.04372798e-03 1.12906793e+04]
Create smooth curve for plotting¶
In [4]:
xfit = np.linspace(min(x), max(x), 200) # Generate smooth curve for display
yfit1 = p1(xfit)
yfit2 = p2(xfit)
Plot the data and fits¶
In [5]:
plt.figure(figsize=(10,6)) # 6. Plot
plt.plot(x, y, 'o', label='Data points')
plt.plot(xfit, yfit1, '-', label='Linear Fit')
plt.plot(xfit, yfit2, '-', label='Quadratic Fit')
plt.xlabel("X values")
plt.ylabel("Y values")
plt.title("Curve Fitting on youtube_video.csv")
plt.legend()
plt.grid(True)
plt.show()
Radical Basic Function¶
Just trying to copy and paste Prof. Neil's code and learn through the code by changing some code.
In [6]:
import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 1
npts = 100
np.random.seed(10)
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5) # generate step to fit
xplot = np.linspace(xmin-0.2,xmax+0.2,npts)
ncenters = 5
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 5 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
-np.outer(np.ones(npts),centers))**3
cfit5,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit5 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit5 # evaluate fit
ncenters = 50
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 50 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
-np.outer(np.ones(npts),centers))**3
cfit50,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit50 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit50 # evaluate fit
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(x,y,'bo',label='data')
plt.plot(xplot,yfit5,'g-',label='5 anchors')
plt.plot(xplot,yfit50,'r-',label='50 anchors')
plt.legend()
plt.show()
Now, I am trying to play with my data and do radical basic function
In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [8]:
# -----------------------------
# 1. LOAD YOUR CSV FILE
# -----------------------------
# Make sure youtube_video.csv is uploaded in the same folder as your notebook
data = np.loadtxt('youtube_video.csv', delimiter=',', skiprows=1)
# Extract x and y columns
x = data[:, 0]
y = data[:, 1]
# Number of points
npts = len(x)
# Generate X for smooth plotting
xplot = np.linspace(min(x), max(x), 400)
# Set random seed for repeatability
np.random.seed(10)
# -----------------------------
# FUNCTION: BUILD RBF MATRIX
# -----------------------------
def build_rbf_matrix(xvals, centers):
"""
Computes |x - center|^3 as the cubic radial basis.
"""
return np.abs(
np.outer(xvals, np.ones(len(centers))) -
np.outer(np.ones(len(xvals)), centers)
) ** 3
# -----------------------------
# 2. RBF FIT WITH 5 CENTERS
# -----------------------------
ncenters_5 = 5
indices = np.random.choice(npts, ncenters_5, replace=False)
centers_5 = x[indices]
# Build RBF matrix
M5 = build_rbf_matrix(x, centers_5)
# Solve least squares
cfit5, _, _, _ = np.linalg.lstsq(M5, y, rcond=None)
# Evaluate the fit
yfit5 = build_rbf_matrix(xplot, centers_5) @ cfit5
# -----------------------------
# 3. RBF FIT WITH 50 CENTERS
# -----------------------------
ncenters_50 = 50
indices = np.random.choice(npts, ncenters_50, replace=False)
centers_50 = x[indices]
# Build RBF matrix
M50 = build_rbf_matrix(x, centers_50)
# Solve least squares
cfit50, _, _, _ = np.linalg.lstsq(M50, y, rcond=None)
# Evaluate the fit
yfit50 = build_rbf_matrix(xplot, centers_50) @ cfit50
# -----------------------------
# 4. PLOT RESULTS
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(x, y, 'bo', label='Original Data')
plt.plot(xplot, yfit5, 'g-', label='RBF Fit (5 centers)')
plt.plot(xplot, yfit50, 'r-', label='RBF Fit (50 centers)')
plt.xlabel("x")
plt.ylabel("y")
plt.title("Radial Basis Function Fit (Cubic RBF)")
plt.legend()
plt.grid(True)
plt.show()
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) Cell In[8], line 5 1 # ----------------------------- 2 # 1. LOAD YOUR CSV FILE 3 # ----------------------------- 4 # Make sure youtube_video.csv is uploaded in the same folder as your notebook ----> 5 data = np.loadtxt('youtube_video.csv', delimiter=',', skiprows=1) 7 # Extract x and y columns 8 x = data[:, 0] File /opt/conda/lib/python3.13/site-packages/numpy/lib/_npyio_impl.py:1397, in loadtxt(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin, encoding, max_rows, quotechar, like) 1394 if isinstance(delimiter, bytes): 1395 delimiter = delimiter.decode('latin1') -> 1397 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter, 1398 converters=converters, skiplines=skiprows, usecols=usecols, 1399 unpack=unpack, ndmin=ndmin, encoding=encoding, 1400 max_rows=max_rows, quote=quotechar) 1402 return arr File /opt/conda/lib/python3.13/site-packages/numpy/lib/_npyio_impl.py:1024, in _read(fname, delimiter, comment, quote, imaginary_unit, usecols, skiplines, max_rows, converters, ndmin, unpack, dtype, encoding) 1022 fname = os.fspath(fname) 1023 if isinstance(fname, str): -> 1024 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) 1025 if encoding is None: 1026 encoding = getattr(fh, 'encoding', 'latin1') File /opt/conda/lib/python3.13/site-packages/numpy/lib/_datasource.py:192, in open(path, mode, destpath, encoding, newline) 155 """ 156 Open `path` with `mode` and return the file object. 157 (...) 188 189 """ 191 ds = DataSource(destpath) --> 192 return ds.open(path, mode, encoding=encoding, newline=newline) File /opt/conda/lib/python3.13/site-packages/numpy/lib/_datasource.py:529, in DataSource.open(self, path, mode, encoding, newline) 526 return _file_openers[ext](found, mode=mode, 527 encoding=encoding, newline=newline) 528 else: --> 529 raise FileNotFoundError(f"{path} not found.") FileNotFoundError: youtube_video.csv not found.
This code have some issue. definitely I will be trying to solve the issue.
I am Loading the CSV using pandas, then select only the numeric columns.
In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Load CSV using pandas (handles text columns)
df = pd.read_csv("datasets/youtube_video.csv")
print(df.head()) # show first rows to inspect column names
video_id title \
0 QwtOeDR-N3g Why do Human Feet Wash up on This Beach? | Fas...
1 K0fw1uiSGE0 Range Rover SV (2025) - The King of the City, ...
2 StuKQpzfyjI Bruce Wayne Becomes Batman - NEW Gotham City U...
3 C91854vbs8I How the music of Mario Kart has evolved
4 1DaHVSmx160 My bulldog loves mess... you can see that ))))
channel_name channel_id view_count like_count \
0 Fascinating Horror UCFXad0mx4WxY1fXdbvtg0CQ 33955 1817
1 CAR TV UCs_gcVRMHEx5mVXfQ7icQTg 59659 1130
2 Just4fun290 UCdPG86Ahrf9A8YzBnFDAaEg 8658 89
3 Thomas Game Docs UCyy7dZhgfeMMctSoo3wDXlQ 161045 5391
4 Enzo English Bulldog UCkk_2IfQ52OJcCUXOrJBtnw 7620 94
comment_count published_date \
0 119 2025-10-03T09:15:02Z
1 22 2025-10-08T09:10:46Z
2 0 2025-10-04T15:00:06Z
3 239 2025-05-08T15:32:15Z
4 24 2021-08-12T15:33:31Z
thumbnail
0 https://i.ytimg.com/vi/QwtOeDR-N3g/hqdefault.jpg
1 https://i.ytimg.com/vi/K0fw1uiSGE0/hqdefault.jpg
2 https://i.ytimg.com/vi/StuKQpzfyjI/hqdefault.jpg
3 https://i.ytimg.com/vi/C91854vbs8I/hqdefault.jpg
4 https://i.ytimg.com/vi/1DaHVSmx160/hqdefault.jpg
The following code wasn't working before however I tried changing the name of the column such as view_count and like_count which is the numeric columns for RBF fitting.
In [11]:
# Select the numeric columns for RBF fitting
x = df["view_count"].to_numpy()
y = df["like_count"].to_numpy()
# OPTIONAL: sort the data so x increases
idx = np.argsort(x)
x = x[idx]
y = y[idx]
Then use RBF code normally¶
In [30]:
def build_rbf_matrix(xvals, centers):
return np.abs(
np.outer(xvals, np.ones(len(centers))) -
np.outer(np.ones(len(xvals)), centers)
) ** 5
xplot = np.linspace(x.min(), x.max(), 400)
# ----- 5 Centers -----
ncenters = 5
indices = np.random.choice(len(x), ncenters, replace=False)
centers = x[indices]
M = build_rbf_matrix(x, centers)
cfit5, _, _, _ = np.linalg.lstsq(M, y, rcond=None)
yfit5 = build_rbf_matrix(xplot, centers) @ cfit5
# ----- 50 Centers -----
ncenters = 50
indices = np.random.choice(len(x), ncenters, replace=False)
centers = x[indices]
M = build_rbf_matrix(x, centers)
cfit50, _, _, _ = np.linalg.lstsq(M, y, rcond=None)
yfit50 = build_rbf_matrix(xplot, centers) @ cfit50
# ----- Plot -----
plt.figure(figsize=(10,5))
plt.plot(x, y, 'bo', label='Data')
plt.plot(xplot, yfit5, 'g-', label='5 centers')
plt.plot(xplot, yfit50, 'r-', label='50 centers')
plt.legend()
plt.show()
In [ ]: