import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm

# Load the data
df = pd.read_csv("datasets/NIFTY_50.csv")

# 1. Prepare the Daily Returns data
# Convert 'Daily_Return_%' to numeric, coercing errors to NaN
df['Daily_Return_%'] = pd.to_numeric(df['Daily_Return_%'], errors='coerce')

# Drop rows with NaN (the first row is typically NaN after return calculation)
returns = df['Daily_Return_%'].dropna()

# 2. Calculate parameters and fit the Normal Distribution
# Calculate mean (mu) and standard deviation (std) to fit the normal curve
mu, std = norm.fit(returns)

# 3. Create the plot
plt.figure(figsize=(12, 8))

# Create a histogram with a Kernel Density Estimate (KDE) curve
# The KDE curve represents the empirical probability distribution
sns.histplot(returns, bins=50, kde=True, stat="density", label='Daily Returns (KDE)')

# Plot the theoretical Normal Distribution curve for comparison
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
# Calculate the probability density function (PDF) for the fitted normal distribution
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'r', linewidth=2, label=f'Fitted Normal Dist. (μ={mu:.4f}, σ={std:.4f})')

plt.title('Probability Distribution of NIFTY 50 Daily Returns')
plt.xlabel('Daily Return (%)')
plt.ylabel('Density')
plt.legend()
plt.grid(True, alpha=0.5)
plt.show()