import pandas as pd
import numpy as np
import plotly.graph_objects as go #according to Gemini, this library is good for complex, "layered" charts so I can build a chart layer-by-layer

!pip install pandas numpy plotly ipywidgets

Requirement already satisfied: pandas in /opt/conda/lib/python3.13/site-packages (2.3.3)
Requirement already satisfied: numpy in /opt/conda/lib/python3.13/site-packages (2.3.3)
Requirement already satisfied: plotly in /opt/conda/lib/python3.13/site-packages (6.5.0)
Requirement already satisfied: ipywidgets in /opt/conda/lib/python3.13/site-packages (8.1.7)
Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.13/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.13/site-packages (from pandas) (2025.2)
Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.13/site-packages (from pandas) (2025.2)
Requirement already satisfied: narwhals>=1.15.1 in /opt/conda/lib/python3.13/site-packages (from plotly) (2.9.0)
Requirement already satisfied: packaging in /opt/conda/lib/python3.13/site-packages (from plotly) (25.0)
Requirement already satisfied: comm>=0.1.3 in /opt/conda/lib/python3.13/site-packages (from ipywidgets) (0.2.3)
Requirement already satisfied: ipython>=6.1.0 in /opt/conda/lib/python3.13/site-packages (from ipywidgets) (9.6.0)
Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.13/site-packages (from ipywidgets) (5.14.3)
Requirement already satisfied: widgetsnbextension~=4.0.14 in /opt/conda/lib/python3.13/site-packages (from ipywidgets) (4.0.14)
Requirement already satisfied: jupyterlab_widgets~=3.0.15 in /opt/conda/lib/python3.13/site-packages (from ipywidgets) (3.0.15)
Requirement already satisfied: decorator in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (5.2.1)
Requirement already satisfied: ipython-pygments-lexers in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (1.1.1)
Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.19.2)
Requirement already satisfied: matplotlib-inline in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)
Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (4.9.0)
Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.52)
Requirement already satisfied: pygments>=2.4.0 in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (2.19.2)
Requirement already satisfied: stack_data in /opt/conda/lib/python3.13/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)
Requirement already satisfied: wcwidth in /opt/conda/lib/python3.13/site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.14)
Requirement already satisfied: parso<0.9.0,>=0.8.4 in /opt/conda/lib/python3.13/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.5)
Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.13/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)
Requirement already satisfied: executing>=1.2.0 in /opt/conda/lib/python3.13/site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (2.2.1)
Requirement already satisfied: asttokens>=2.1.0 in /opt/conda/lib/python3.13/site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (3.0.0)
Requirement already satisfied: pure_eval in /opt/conda/lib/python3.13/site-packages (from stack_data->ipython>=6.1.0->ipywidgets) (0.2.3)

# Loading my file
df = pd.read_csv("datasets/Mortality cases3.csv")
df.head(5)

# Variable for grouping my data 
years = ['2018', '2019', '2020', '2021', '2022']

# Trimming whitespace:
df['Type of disease'] = df['Type of disease'].str.strip()

# Editing my data since some of the categories became confusing to work with:
df['Type of disease'] = df['Type of disease'].replace('Infectious', 'Infectious Diseases')

# New table! Grouping my data by the preferred category and summing the years for total deaths per category per year
df_grouped = df.groupby('Type of disease')[years].sum() #This is from Gemini

print("Categories for analysis (types of diseases):")
print(df_grouped.index.tolist())

# To show how many rows (categories/types of diseases) in my new grouped table
count = len(df_grouped)
print(f"There are {count} different types of diseases.")

Categories for analysis (types of diseases):
['Certain Conditions Originating in the Perinatal Period', 'Congenital Malformations, Deformations & Chromosomal Abnormalities', 'Diseases of the Blood & Blood-forming Organs', 'Diseases of the Circulatory System', 'Diseases of the Digestive System', 'Diseases of the Eye & Ear', 'Diseases of the Genito-Urinary System', 'Diseases of the Musculo-skeletal System & Congenital Deformities', 'Diseases of the Nervous System', 'Diseases of the Respiratory System', 'Diseases of the Skin &Subcutaneous Tissue', 'Endocrine, Nutritional & Metabolic Diseases', 'Infectious Diseases', 'Injury, Poisoning & certain other consequences of External Causes', 'Mental & Behavioural Disorders', 'Neoplasms', 'Other Conditions', 'Pregnancy, Childbirth & the Puerperium', 'Viral, Protozoal & Helminthic Diseases']
There are 19 different types of diseases.

def trend_analysis(typeofdisease):
    # 1. Checking if the category exists - New logic that I didn't know: check for errors first, not after!
    if typeofdisease not in df_grouped.index: # I learned that "typeofdisease" here is a parameter, not a variable, so I can define it later on
        print(f"Error: '{typeofdisease}' not found.")
        return
    
    # 2. Assigning my x and y values
    y_values = df_grouped.loc[typeofdisease].values # .loc helps me locate the specific row
    x_values = np.array([2018, 2019, 2020, 2021, 2022]) # - doubt here, not sure why it has to be an array
    
    # 3. Calculating the functions (?)
    coef_linear = np.polyfit(x_values, y_values, 1) #the 1 = order 1 = simple linear regression
    poly_linear = np.poly1d(coef_linear) # - doubt here
    
    coef_curve = np.polyfit(x_values, y_values, 2) #the 2 = order 2 = parabola
    poly_curve = np.poly1d(coef_curve) # - doubt here

    #would like to try a higher order here 

    # TESTING
    coef_curve2 = np.polyfit(x_values, y_values, 4)
    poly_curve2 = np.poly1d(coef_curve2) # - doubt here
    
    x_smooth = np.linspace(2018, 2023, 100) #instead of just 5 points (2018, 2019...), this creates 100 tiny steps between 2018 and 2023 
    x_stopped = np.linspace(2018, 2022, 100) #for my polynomial ? curved fit
    
    # Drawing the Plot
    fig = go.Figure() #for a blank plotly graph canvas 
    
    fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='markers', name='Actual Data',
                             marker=dict(size=12, color='black')))
    # add_trace: Adds a layer to the chart.
    # go.Scatter: The standard tool for plotting X/Y data.
    # mode='markers': Tells Plotly to not connect the dots 
    # marker=dict(...): Styles the dots to be big (size 12) and black.
    
    fig.add_trace(go.Scatter(x=x_smooth, y=poly_linear(x_smooth), mode='lines', name='Linear Fit',
                             line=dict(color='blue', dash='dash')))
    
    fig.add_trace(go.Scatter(x=x_smooth, y=poly_curve(x_smooth), mode='lines', name='Curved Fit',
                             line=dict(color='orange')))

    # TESTING
    fig.add_trace(go.Scatter(x=x_stopped, y=poly_curve2(x_stopped), mode='lines', name='Polynomial Curved? Fit',
                             line=dict(color='green')))
    
    # Add Prediction
    pred_2023 = poly_curve(2023)
    fig.add_annotation(x=2023, y=pred_2023, text=f"2023 Pred: {int(pred_2023)}",
                       showarrow=True, arrowhead=1) #adds a text box and arrow to highlight my prediction

    fig.update_layout(title=f"Trend Analysis for {typeofdisease}", template="plotly_white") #update_layout puts it on top
    fig.show()

import ipywidgets as widgets # the recommended library for widgets
from IPython.display import display

# 1. Creating the Dropdown Menu
dropdown = widgets.Dropdown(
    options=df_grouped.index.tolist(), # tolist converts my categories into a plain Python list for the widget
    description='Disease Type:',
   # AI had recommended style={'description_width': 'initial'}, but I prefer the look without it
)

# 2. Creating an "Output Screen" for the chart itself to appear - still not clear on what bug this addresses
chart_output = widgets.Output()

# 3. Defining the update function
def on_change(change): #b asically, runs every time there is a change to my new dropdown list
    if change['type'] == 'change' and change['name'] == 'value': #wouldn't have been able to do this on my own -- this ensures that if the option isn't changed, nothing happens
        with chart_output: # anything bellow to happen within the "output screen" 
            chart_output.clear_output(wait=True) # wait=True might take time but prevents flickering 
            trend_analysis(change['new']) # function from my previous cell runs again

# 4. Connecting the dropdown menu to the function
dropdown.observe(on_change) 

# 5. Displaying the Menu AND the Screen together
display(dropdown, chart_output)

# 6. Forcing the first disease to show, rather than something blank ... maybe not necessary though
with chart_output:
    trend_analysis(df_grouped.index[0])

Dropdown(description='Disease Type:', options=('Certain Conditions Originating in the Perinatal Period', 'Cong…

Output()

for disease in df_grouped.index:
    trend_analysis(disease)

	ICD10 CODE	Name of the Disease	Type of disease	2018	2019	2020	2021	2022
0	A02ᴳ	Diarrhoea	Infectious	6.0	6.0	2.0	NaN	2.0
1	A03ᴳ	Dysentery	Infectious	NaN	NaN	NaN	NaN	NaN
2	A15ᴳ	Tuberculosis	Infectious	22.0	20.0	20.0	31.0	17.0
3	A41ᴳ	Other Sepsis, including Septicaemia	Infectious	62.0	46.0	52.0	32.0	45.0
4	A50	Congenital Syphilis	Infectious	NaN	NaN	NaN	NaN	NaN

Fitting a Function¶

Importing Libraries¶

Fitting the Function¶

Cleaning the data¶

Fitting a curve¶

Note for self: suggested HTML for creating a band, for future reference¶