[Tenzin Namgyal] - Fab Futures - Data Science
Home About

< Home

Session3: Fitting 25/11/2025¶

Liner and Polynomial¶

Import libraries¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


np.set_printoptions(precision=3) # Make numpy print cleaner

df = pd.read_csv('datasets/youtube_video.csv')# Load the CSV file using pandas
print("CSV Columns:", df.columns.tolist())
df.head()
CSV Columns: ['video_id', 'title', 'channel_name', 'channel_id', 'view_count', 'like_count', 'comment_count', 'published_date', 'thumbnail']
Out[1]:
video_id title channel_name channel_id view_count like_count comment_count published_date thumbnail
0 QwtOeDR-N3g Why do Human Feet Wash up on This Beach? | Fas... Fascinating Horror UCFXad0mx4WxY1fXdbvtg0CQ 33955 1817 119 2025-10-03T09:15:02Z https://i.ytimg.com/vi/QwtOeDR-N3g/hqdefault.jpg
1 K0fw1uiSGE0 Range Rover SV (2025) - The King of the City, ... CAR TV UCs_gcVRMHEx5mVXfQ7icQTg 59659 1130 22 2025-10-08T09:10:46Z https://i.ytimg.com/vi/K0fw1uiSGE0/hqdefault.jpg
2 StuKQpzfyjI Bruce Wayne Becomes Batman - NEW Gotham City U... Just4fun290 UCdPG86Ahrf9A8YzBnFDAaEg 8658 89 0 2025-10-04T15:00:06Z https://i.ytimg.com/vi/StuKQpzfyjI/hqdefault.jpg
3 C91854vbs8I How the music of Mario Kart has evolved Thomas Game Docs UCyy7dZhgfeMMctSoo3wDXlQ 161045 5391 239 2025-05-08T15:32:15Z https://i.ytimg.com/vi/C91854vbs8I/hqdefault.jpg
4 1DaHVSmx160 My bulldog loves mess... you can see that )))) Enzo English Bulldog UCkk_2IfQ52OJcCUXOrJBtnw 7620 94 24 2021-08-12T15:33:31Z https://i.ytimg.com/vi/1DaHVSmx160/hqdefault.jpg

Select two numeric columns (x, y)¶

In [2]:
numeric_df = df.select_dtypes(include='number') # Select numeric columns

print("Numeric columns:", numeric_df.columns.tolist())

# Choose two numeric columns for fitting
# (edit these names based on your file)
x = numeric_df.iloc[:,0].values   # First numeric column
y = numeric_df.iloc[:,1].values   # Second numeric column
Numeric columns: ['view_count', 'like_count', 'comment_count']

Polynomial fitting¶

In [3]:
coeff1 = np.polyfit(x, y, 1) # Fit 1st-order and 2nd-order polynomials
coeff2 = np.polyfit(x, y, 2)

print("Linear fit coefficients:", coeff1)
print("Quadratic fit coefficients:", coeff2)

p1 = np.poly1d(coeff1) # Create polynomial functions
p2 = np.poly1d(coeff2)
Linear fit coefficients: [4.24140234e-03 1.44382848e+04]
Quadratic fit coefficients: [-9.32512108e-13  7.04372798e-03  1.12906793e+04]

Create smooth curve for plotting¶

In [4]:
xfit = np.linspace(min(x), max(x), 200) # Generate smooth curve for display

yfit1 = p1(xfit)
yfit2 = p2(xfit)

Plot the data and fits¶

In [5]:
plt.figure(figsize=(10,6)) # 6. Plot

plt.plot(x, y, 'o', label='Data points')
plt.plot(xfit, yfit1, '-', label='Linear Fit')
plt.plot(xfit, yfit2, '-', label='Quadratic Fit')

plt.xlabel("X values")
plt.ylabel("Y values")
plt.title("Curve Fitting on youtube_video.csv")
plt.legend()
plt.grid(True)
plt.show()
No description has been provided for this image

Radical Basic Function¶

Just trying to copy and paste Prof. Neil's code and learn through the code by changing some code.

In [6]:
import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 1
npts = 100
np.random.seed(10)
x = np.linspace(xmin,xmax,npts)
y = np.heaviside(-1+2*(x-xmin)/(xmax-xmin),0.5) # generate step to fit
xplot = np.linspace(xmin-0.2,xmax+0.2,npts)
ncenters = 5
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 5 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
cfit5,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit5 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit5 # evaluate fit
ncenters = 50
indices = np.random.uniform(low=0,high=len(x),size=ncenters).astype(int) # choose 50 random RBF centers
centers = x[indices]
M = np.abs(np.outer(x,np.ones(ncenters)) # construct matrix of basis terms
   -np.outer(np.ones(npts),centers))**3
cfit50,residuals,rank,values = np.linalg.lstsq(M,y) # do SVD fit
yfit50 = (np.abs(np.outer(xplot,np.ones(ncenters))-np.outer(np.ones(npts),centers))**3)@cfit50 # evaluate fit
fig = plt.figure()
fig.canvas.header_visible = False
plt.plot(x,y,'bo',label='data')
plt.plot(xplot,yfit5,'g-',label='5 anchors')
plt.plot(xplot,yfit50,'r-',label='50 anchors')
plt.legend()
plt.show()
No description has been provided for this image

Now, I am trying to play with my data and do radical basic function

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [8]:
# -----------------------------
# 1. LOAD YOUR CSV FILE
# -----------------------------
# Make sure youtube_video.csv is uploaded in the same folder as your notebook
data = np.loadtxt('youtube_video.csv', delimiter=',', skiprows=1)

# Extract x and y columns
x = data[:, 0]
y = data[:, 1]

# Number of points
npts = len(x)

# Generate X for smooth plotting
xplot = np.linspace(min(x), max(x), 400)

# Set random seed for repeatability
np.random.seed(10)


# -----------------------------
# FUNCTION: BUILD RBF MATRIX
# -----------------------------
def build_rbf_matrix(xvals, centers):
    """
    Computes |x - center|^3 as the cubic radial basis.
    """
    return np.abs(
        np.outer(xvals, np.ones(len(centers))) -
        np.outer(np.ones(len(xvals)), centers)
    ) ** 3


# -----------------------------
# 2. RBF FIT WITH 5 CENTERS
# -----------------------------
ncenters_5 = 5
indices = np.random.choice(npts, ncenters_5, replace=False)
centers_5 = x[indices]

# Build RBF matrix
M5 = build_rbf_matrix(x, centers_5)

# Solve least squares
cfit5, _, _, _ = np.linalg.lstsq(M5, y, rcond=None)

# Evaluate the fit
yfit5 = build_rbf_matrix(xplot, centers_5) @ cfit5


# -----------------------------
# 3. RBF FIT WITH 50 CENTERS
# -----------------------------
ncenters_50 = 50
indices = np.random.choice(npts, ncenters_50, replace=False)
centers_50 = x[indices]

# Build RBF matrix
M50 = build_rbf_matrix(x, centers_50)

# Solve least squares
cfit50, _, _, _ = np.linalg.lstsq(M50, y, rcond=None)

# Evaluate the fit
yfit50 = build_rbf_matrix(xplot, centers_50) @ cfit50


# -----------------------------
# 4. PLOT RESULTS
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(x, y, 'bo', label='Original Data')
plt.plot(xplot, yfit5, 'g-', label='RBF Fit (5 centers)')
plt.plot(xplot, yfit50, 'r-', label='RBF Fit (50 centers)')
plt.xlabel("x")
plt.ylabel("y")
plt.title("Radial Basis Function Fit (Cubic RBF)")
plt.legend()
plt.grid(True)
plt.show()
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[8], line 5
      1 # -----------------------------
      2 # 1. LOAD YOUR CSV FILE
      3 # -----------------------------
      4 # Make sure youtube_video.csv is uploaded in the same folder as your notebook
----> 5 data = np.loadtxt('youtube_video.csv', delimiter=',', skiprows=1)
      7 # Extract x and y columns
      8 x = data[:, 0]

File /opt/conda/lib/python3.13/site-packages/numpy/lib/_npyio_impl.py:1397, in loadtxt(fname, dtype, comments, delimiter, converters, skiprows, usecols, unpack, ndmin, encoding, max_rows, quotechar, like)
   1394 if isinstance(delimiter, bytes):
   1395     delimiter = delimiter.decode('latin1')
-> 1397 arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,
   1398             converters=converters, skiplines=skiprows, usecols=usecols,
   1399             unpack=unpack, ndmin=ndmin, encoding=encoding,
   1400             max_rows=max_rows, quote=quotechar)
   1402 return arr

File /opt/conda/lib/python3.13/site-packages/numpy/lib/_npyio_impl.py:1024, in _read(fname, delimiter, comment, quote, imaginary_unit, usecols, skiplines, max_rows, converters, ndmin, unpack, dtype, encoding)
   1022     fname = os.fspath(fname)
   1023 if isinstance(fname, str):
-> 1024     fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
   1025     if encoding is None:
   1026         encoding = getattr(fh, 'encoding', 'latin1')

File /opt/conda/lib/python3.13/site-packages/numpy/lib/_datasource.py:192, in open(path, mode, destpath, encoding, newline)
    155 """
    156 Open `path` with `mode` and return the file object.
    157 
   (...)    188 
    189 """
    191 ds = DataSource(destpath)
--> 192 return ds.open(path, mode, encoding=encoding, newline=newline)

File /opt/conda/lib/python3.13/site-packages/numpy/lib/_datasource.py:529, in DataSource.open(self, path, mode, encoding, newline)
    526     return _file_openers[ext](found, mode=mode,
    527                               encoding=encoding, newline=newline)
    528 else:
--> 529     raise FileNotFoundError(f"{path} not found.")

FileNotFoundError: youtube_video.csv not found.

This code have some issue. definitely I will be trying to solve the issue.

I am Loading the CSV using pandas, then select only the numeric columns.

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load CSV using pandas (handles text columns)
df = pd.read_csv("datasets/youtube_video.csv")

print(df.head())   # show first rows to inspect column names
      video_id                                              title  \
0  QwtOeDR-N3g  Why do Human Feet Wash up on This Beach? | Fas...   
1  K0fw1uiSGE0  Range Rover SV (2025) - The King of the City, ...   
2  StuKQpzfyjI  Bruce Wayne Becomes Batman - NEW Gotham City U...   
3  C91854vbs8I            How the music of Mario Kart has evolved   
4  1DaHVSmx160     My bulldog loves mess... you can see that ))))   

           channel_name                channel_id  view_count  like_count  \
0    Fascinating Horror  UCFXad0mx4WxY1fXdbvtg0CQ       33955        1817   
1                CAR TV  UCs_gcVRMHEx5mVXfQ7icQTg       59659        1130   
2           Just4fun290  UCdPG86Ahrf9A8YzBnFDAaEg        8658          89   
3      Thomas Game Docs  UCyy7dZhgfeMMctSoo3wDXlQ      161045        5391   
4  Enzo English Bulldog  UCkk_2IfQ52OJcCUXOrJBtnw        7620          94   

   comment_count        published_date  \
0            119  2025-10-03T09:15:02Z   
1             22  2025-10-08T09:10:46Z   
2              0  2025-10-04T15:00:06Z   
3            239  2025-05-08T15:32:15Z   
4             24  2021-08-12T15:33:31Z   

                                          thumbnail  
0  https://i.ytimg.com/vi/QwtOeDR-N3g/hqdefault.jpg  
1  https://i.ytimg.com/vi/K0fw1uiSGE0/hqdefault.jpg  
2  https://i.ytimg.com/vi/StuKQpzfyjI/hqdefault.jpg  
3  https://i.ytimg.com/vi/C91854vbs8I/hqdefault.jpg  
4  https://i.ytimg.com/vi/1DaHVSmx160/hqdefault.jpg  

The following code wasn't working before however I tried changing the name of the column such as view_count and like_count which is the numeric columns for RBF fitting.

In [11]:
# Select the numeric columns for RBF fitting
x = df["view_count"].to_numpy()
y = df["like_count"].to_numpy()

# OPTIONAL: sort the data so x increases
idx = np.argsort(x)
x = x[idx]
y = y[idx]

Then use RBF code normally¶

In [30]:
def build_rbf_matrix(xvals, centers):
    return np.abs(
        np.outer(xvals, np.ones(len(centers))) -
        np.outer(np.ones(len(xvals)), centers)
    ) ** 5

xplot = np.linspace(x.min(), x.max(), 400)

# ----- 5 Centers -----
ncenters = 5
indices = np.random.choice(len(x), ncenters, replace=False)
centers = x[indices]

M = build_rbf_matrix(x, centers)
cfit5, _, _, _ = np.linalg.lstsq(M, y, rcond=None)
yfit5 = build_rbf_matrix(xplot, centers) @ cfit5

# ----- 50 Centers -----
ncenters = 50
indices = np.random.choice(len(x), ncenters, replace=False)
centers = x[indices]

M = build_rbf_matrix(x, centers)
cfit50, _, _, _ = np.linalg.lstsq(M, y, rcond=None)
yfit50 = build_rbf_matrix(xplot, centers) @ cfit50

# ----- Plot -----
plt.figure(figsize=(10,5))
plt.plot(x, y, 'bo', label='Data')
plt.plot(xplot, yfit5, 'g-', label='5 centers')
plt.plot(xplot, yfit50, 'r-', label='50 centers')
plt.legend()
plt.show()
No description has been provided for this image
In [ ]: