import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("datasets/Installed Capacity Statewise_Sample_Data.csv")
df.head()

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Convert date column to datetime
df["Date (date)"] = pd.to_datetime(df["Date (date)"])

# Sort by date
df_sorted = df.sort_values("Date (date)")
plt.figure(figsize=(10,5))

# Plot renewable capacity
plt.plot(df_sorted["Date (date)"],
         df_sorted["Renewable Energy Mode Installed Capacity (res_cap)"])

plt.xlabel("Date")
plt.ylabel("Renewable Capacity (MW)")
plt.title("Renewable Energy Installed Capacity Over Time")

# Set x-axis tick locator to yearly spacing
plt.gca().xaxis.set_major_locator(mdates.YearLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%Y"))

plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

energy_cols = ["Renewable Energy Mode Installed Capacity (res_cap)"]

df_plot = df[["State Code (state_code)"] + energy_cols]

df_plot.set_index("State Code (state_code)").plot(
    kind="bar",
    stacked=True,
    figsize=(14,7)
)

plt.ylabel("Installed Capacity (MW)")
plt.title("Energy Mix by State")
plt.xticks(rotation=90)
plt.show()

# The column of interest
energy_cols = ["Renewable Energy Mode Installed Capacity (res_cap)"]

# 1. Group the data by state code and state name, and sum the capacity to get a single row per state.
df_grouped = df.groupby(["State Code (state_code)", "State Name (state_name)"])[energy_cols].sum().reset_index()

# 2. Sort the aggregated data and select the top 4 states based on total renewable capacity.
df_plot = df_grouped.sort_values(by=energy_cols[0], ascending=False).head(10)

# 3. Set the State Name as the index for plotting.
df_plot = df_plot.set_index("State Name (state_name)")

plt.figure(figsize=(14, 7))
df_plot.plot(
    kind="bar",
    stacked=True,
    figsize=(14, 7),
    ax=plt.gca(), # Use the current axes
    legend=False # Only one column, so legend is not needed
)

plt.ylabel("Installed Capacity (MW)")
plt.title("Renewable Energy Installed Capacity by Top 10 States")
plt.xlabel("State Name")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
# plt.savefig("top_4_states_renewable_capacity.png")
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("datasets/NIFTY_50.csv")
df.head()

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year

# Compute average returns by year
yearly_avg = df.groupby('Year')['Daily_Return_%'].mean()

# Turn into heatmap format (1 row)
heatmap_data = np.array([yearly_avg.values])

# Create red → white → green color scale
colors = ['red', 'white', 'green']
cmap = LinearSegmentedColormap.from_list("red_white_green", colors)

# Plot heatmap
plt.figure(figsize=(14, 4))
plt.imshow(heatmap_data, cmap=cmap, aspect='auto')

plt.colorbar(label="Average Return (%)")
plt.yticks([])
plt.xticks(
    ticks=range(len(yearly_avg.index)),
    labels=yearly_avg.index,
    rotation=90
)
plt.title("Heatmap of Average Yearly Returns (Green = Positive, Red = Negative)")
plt.tight_layout()
plt.show()

# NIFTY VALUE ON LAST TRADING DAY OF MARCH (EVERY YEAR)

# Filter for March and take the last trading day of each year
march_last = df[df['Date'].dt.month == 3].groupby(df['Date'].dt.year).tail(1)
march_last = march_last.sort_values('Date')

plt.figure(figsize=(10, 5))
plt.plot(march_last['Year'], march_last['Close'], marker='o')

plt.xlabel("Year")
plt.ylabel("Nifty Close Value")
plt.title("Nifty Value on Last Trading Day of March for Each Year")
plt.grid(True)
plt.tight_layout()
plt.show()

	Date (date)	Region (region)	State Name (state_name)	State Code (state_code)	Sector/Ownership (sector)	Coal Mode Installed Capacity (coal_cap)	Gas Mode Installed Capacity (gas_cap)	Nuclear Mode Installed Capacity (nuclear_cap)	Hydro Mode Installed Capacity (hydro_cap)	Renewable Energy Mode Installed Capacity (res_cap)
0	2019-02-01	Northern	Chandigarh	4	State	0.00	0.0	0.00	0.0	0.00
1	2023-02-01	Eastern	West Bengal	19	State	4810.00	80.0	0.00	986.0	121.95
2	2024-01-01	Western	Maharashtra	27	Private	10826.01	568.0	0.00	481.0	13376.87
3	2022-10-01	Northern	Uttarakhand	5	Private	0.00	450.0	0.00	829.0	861.34
4	2019-08-01	Eastern	Andaman And Nicobar Islands	35	Private	0.00	0.0	6.63	0.0	0.00

	Date	Adj Close	Close	High	Low	Open	SMA_20	SMA_50	EMA_12	EMA_26	MACD	Signal_Line	RSI_14	BB_Mid	BB_Upper	BB_Lower	Daily_Return_%
0	2007-09-17	4494.649902	4494.649902	4549.049805	4482.850098	4518.450195	NaN	NaN	4494.649902	4494.649902	0.000000	0.000000	NaN	NaN	NaN	NaN	NaN
1	2007-09-18	4546.200195	4546.200195	4551.799805	4481.549805	4494.100098	NaN	NaN	4502.580717	4498.468443	4.112274	0.822455	NaN	NaN	NaN	NaN	1.146926
2	2007-09-19	4732.350098	4732.350098	4739.000000	4550.250000	4550.250000	NaN	NaN	4537.929852	4515.793010	22.136843	5.085332	NaN	NaN	NaN	NaN	4.094626
3	2007-09-20	4747.549805	4747.549805	4760.850098	4721.149902	4734.850098	NaN	NaN	4570.179076	4532.960180	37.218896	11.512045	NaN	NaN	NaN	NaN	0.321187
4	2007-09-21	4837.549805	4837.549805	4855.700195	4733.700195	4752.950195	NaN	NaN	4611.313034	4555.522374	55.790660	20.367768	NaN	NaN	NaN	NaN	1.895715

Week 1: Introduction¶

1st Session Tasks¶

2nd Session Tasks - Visualizing the Data Sets¶

First Dataset: Installed Power Capacity of Indian States¶

Second Dataset: Indian Stock Market Return Since 2007 - NIFTY50¶