Fitting the Data¶
In [1]:
import numpy as np
import matplotlib.pyplot as plt
xmin = 0
xmax = 2
noise = 0.05
npts = 100
a = 0.5
b = 1
c = -.3
np.random.seed(0)
x = xmin+(xmax-xmin)*np.random.rand(npts)
print(x)
[1.09762701 1.43037873 1.20552675 1.08976637 0.8473096 1.29178823 0.87517442 1.783546 1.92732552 0.76688304 1.58345008 1.05778984 1.13608912 1.85119328 0.14207212 0.1742586 0.04043679 1.66523969 1.5563135 1.7400243 1.95723668 1.59831713 0.92295872 1.56105835 0.23654885 1.27984204 0.28670657 1.88933783 1.04369664 0.82932388 0.52911122 1.54846738 0.91230066 1.1368679 0.0375796 1.23527099 1.22419145 1.23386799 1.88749616 1.3636406 0.7190158 0.87406391 1.39526239 0.12045094 1.33353343 1.34127574 0.42076512 0.2578526 0.6308567 0.72742154 1.14039354 0.87720303 1.97674768 0.20408962 0.41775351 0.32261904 1.30621665 0.50658321 0.93262155 0.48885118 0.31793917 0.22075028 1.31265918 0.2763659 0.39316472 0.73745034 1.64198646 0.19420255 1.67588981 0.19219682 1.95291893 0.9373024 1.95352218 1.20969104 1.47852716 0.07837558 0.56561393 0.24039312 0.5922804 0.23745544 0.63596636 0.82852599 0.12829499 1.38494424 1.13320291 0.53077898 1.04649611 0.18788102 1.15189299 1.8585924 0.6371379 1.33482076 0.26359572 1.43265441 0.57881219 0.36638272 1.17302587 0.04021509 1.65788006 0.00939095]
In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.nonparametric.smoothers_lowess import lowess
# Load the CSV file (adjust path if required)
df = pd.read_csv("datasets/NIFTY_50.csv")
# Convert date column to datetime
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
# Filter data from 2007 to 2012
df_period = df[(df['Date'].dt.year >= 2007) & (df['Date'].dt.year <= 2012)]
# Sort by date
df_period = df_period.sort_values('Date')
# Select last trading day of each month
monthly_last = df_period.groupby([df_period['Date'].dt.year,
df_period['Date'].dt.month]).tail(1)
# Keep date and closing value
monthly_last = monthly_last[['Date', 'Close']].reset_index(drop=True)
# Fit a straight line
coeff = np.polyfit(x, y, 1)
fit_line = np.poly1d(coeff)
# Plot the results
plt.figure(figsize=(12, 8))
# Scatter plot
plt.scatter(monthly_last['Date'], y, label='Monthly Close Points')
# Line of best fit
plt.plot(monthly_last['Date'], fit_line(x), label='Line of Best Fit')
plt.xlabel('Date')
plt.ylabel('Nifty Close')
plt.title('Last Trading Day of Each Month (2007–2012)\nScatter Plot + Line of Best Fit')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
/tmp/ipykernel_52147/3036111219.py:11: UserWarning: Parsing dates in %Y-%m-%d format when dayfirst=True was specified. Pass `dayfirst=False` or specify a format to silence this warning. df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
This Line Fit doesn't give the best fit as it is far away from most of the data points, lot of residual i think. I will try to draw a curve fit on the same dataset.
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from statsmodels.nonparametric.smoothers_lowess import lowess
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df_period = df[(df['Date'].dt.year >= 2007) & (df['Date'].dt.year <= 2012)]
df_period = df_period.sort_values('Date')
monthly_last = df_period.groupby([df_period['Date'].dt.year,
df_period['Date'].dt.month]).tail(1)
monthly_last = monthly_last[['Date', 'Close']].reset_index(drop=True)
print (monthly_last)
# Prepare X and Y for LOESS (x must be numeric)
x = mdates.date2num(monthly_last['Date']) # Convert dates → numeric
y = monthly_last['Close'].values
# Apply LOESS smoothing
# frac = smoothing parameter (0.1 = smooth, 0.5 = more flexible)
loess_result = lowess(y, x, frac=0.3)
x_smooth = loess_result[:, 0]
y_smooth = loess_result[:, 1]
# Plot the results
plt.figure(figsize=(12, 8))
# Scatter plot
plt.scatter(monthly_last["Date"], y, label='Monthly Close Points')
# LOESS smoothed curve
plt.plot(mdates.num2date(x_smooth), y_smooth,
color='red', linewidth=2, label='LOESS Curve')
plt.xlabel('Date')
plt.ylabel('Nifty Close')
plt.title('Last Trading Day of Each Month (2007–2012)\nScatter Plot + LOESS Curve Fit')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()
Date Close 0 2007-09-28 5021.350098 1 2007-10-31 5900.649902 2 2007-11-30 5762.750000 3 2007-12-31 6138.600098 4 2008-01-31 5137.450195 .. ... ... 59 2012-08-31 5258.500000 60 2012-09-28 5703.299805 61 2012-10-31 5619.700195 62 2012-11-30 5879.850098 63 2012-12-31 5905.100098 [64 rows x 2 columns]
I used LOESS Curve fitting as it gives me a better fit for the stock market trend.
LOESS (Locally Estimated Scatterplot Smoothing) is a powerful statistical technique that fits a smooth curve to data by performing low-degree polynomial regressions on localized subsets of the data, giving more weight to closer points, making it great for complex trends, noisy data, and outlier detection without assuming a global function.
In [ ]: