import pandas as pd

# Read the CSV file
ald_data = pd.read_csv("datasets/ALD_Data.csv")

# Display the data
print(ald_data)

          Year                                           Metric  \
0         2016                     Alcohol-related (ALD) deaths   
1  2012 → 2016                              Trend in ALD deaths   
2         2020  Number of deaths for ALD (in health facilities)   
3         2021                         Number of deaths for ALD   
4         2022         ALD share of facility-reported mortality   
5         2022              Change in ALD incidence (from 2021)   
6         2023                                       ALD deaths   

                      Value                                 Source/Notes  
0                       190        From the Bhutan Health Journal study.  
1  ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.  
2                       166  Vital Statistics Report, Bhutan’s 2021 VSR.  
3                       141              Reported by Ministry of Health.  
4                    12.22%                 Annual Health Bulletin 2023.  
5            −0.26% decline                        Reported in AHB 2023.  
6                       129                Reported in media citing AHB.

import pandas as pd
import matplotlib.pyplot as plt

# Read the CSV file
ald_data = pd.read_csv("datasets/ALD_Data.csv")
# Filter rows with numeric death values only
# Remove rows where Value is not a simple number (like "~140 (2012) → 190 (2016)" or "−0.26% decline")
ald_data_numeric = ald_data[ald_data['Value'].str.replace('%','', regex=True).str.replace('−','-').str.isnumeric()]

# Convert Value column to numeric
ald_data_numeric['Value'] = pd.to_numeric(ald_data_numeric['Value'])

# Plotting
plt.figure(figsize=(10,6))
plt.plot(ald_data_numeric['Year'], ald_data_numeric['Value'], marker='o', linestyle='-', color='teal')
plt.title("Alcohol-related (ALD) Deaths Over Years")
plt.xlabel("Year")
plt.ylabel("Number of Deaths")
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

/tmp/ipykernel_117/468468828.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ald_data_numeric['Value'] = pd.to_numeric(ald_data_numeric['Value'])

import pandas as pd
import numpy as np

# Load the dataset
ald_data = pd.read_csv("datasets/ALD_Data.csv")

# Function to add slight numeric variation safely
def add_variation(val):
    try:
        # Remove non-numeric symbols like '~', '%', '→', '−'
        numeric_val = float(str(val).replace('~','').replace('%','').replace('→','').replace('−','-'))
        # Add random variation between -5 and +5
        return max(numeric_val + np.random.randint(-5,6), 0)  # avoid negative values
    except:
        # Return original value if not numeric
        return val

# Clone dataset 5 times
big_data = pd.concat([ald_data]*5, ignore_index=True)

# Apply variation to 'Value' column
big_data['Value'] = big_data['Value'].apply(add_variation)

# Optional: mark original vs cloned rows
big_data['Source'] = 'Original'
big_data.loc[len(ald_data):, 'Source'] = 'Cloned'

# Shuffle dataset for randomness
big_data = big_data.sample(frac=1, random_state=42).reset_index(drop=True)

# View the bigger dataset
print(big_data)

# Optional: save to CSV
big_data.to_csv("datasets/ALD_Data_Big.csv", index=False)

           Year                                           Metric  \
0          2022              Change in ALD incidence (from 2021)   
1          2023                                       ALD deaths   
2          2021                         Number of deaths for ALD   
3          2016                     Alcohol-related (ALD) deaths   
4   2012 → 2016                              Trend in ALD deaths   
5   2012 → 2016                              Trend in ALD deaths   
6          2022              Change in ALD incidence (from 2021)   
7          2022              Change in ALD incidence (from 2021)   
8   2012 → 2016                              Trend in ALD deaths   
9          2020  Number of deaths for ALD (in health facilities)   
10         2020  Number of deaths for ALD (in health facilities)   
11         2022              Change in ALD incidence (from 2021)   
12         2016                     Alcohol-related (ALD) deaths   
13         2022         ALD share of facility-reported mortality   
14         2021                         Number of deaths for ALD   
15         2023                                       ALD deaths   
16         2022              Change in ALD incidence (from 2021)   
17         2022         ALD share of facility-reported mortality   
18  2012 → 2016                              Trend in ALD deaths   
19         2020  Number of deaths for ALD (in health facilities)   
20         2022         ALD share of facility-reported mortality   
21         2021                         Number of deaths for ALD   
22         2020  Number of deaths for ALD (in health facilities)   
23         2020  Number of deaths for ALD (in health facilities)   
24         2021                         Number of deaths for ALD   
25         2021                         Number of deaths for ALD   
26  2012 → 2016                              Trend in ALD deaths   
27         2022         ALD share of facility-reported mortality   
28         2022         ALD share of facility-reported mortality   
29         2023                                       ALD deaths   
30         2023                                       ALD deaths   
31         2023                                       ALD deaths   
32         2016                     Alcohol-related (ALD) deaths   
33         2016                     Alcohol-related (ALD) deaths   
34         2016                     Alcohol-related (ALD) deaths   

                       Value                                 Source/Notes  \
0             −0.26% decline                        Reported in AHB 2023.   
1                      125.0                Reported in media citing AHB.   
2                      142.0              Reported by Ministry of Health.   
3                      186.0        From the Bhutan Health Journal study.   
4   ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.   
5   ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.   
6             −0.26% decline                        Reported in AHB 2023.   
7             −0.26% decline                        Reported in AHB 2023.   
8   ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.   
9                      171.0  Vital Statistics Report, Bhutan’s 2021 VSR.   
10                     162.0  Vital Statistics Report, Bhutan’s 2021 VSR.   
11            −0.26% decline                        Reported in AHB 2023.   
12                     191.0        From the Bhutan Health Journal study.   
13                     15.22                 Annual Health Bulletin 2023.   
14                     137.0              Reported by Ministry of Health.   
15                     134.0                Reported in media citing AHB.   
16            −0.26% decline                        Reported in AHB 2023.   
17                      8.22                 Annual Health Bulletin 2023.   
18  ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.   
19                     169.0  Vital Statistics Report, Bhutan’s 2021 VSR.   
20                     10.22                 Annual Health Bulletin 2023.   
21                     143.0              Reported by Ministry of Health.   
22                     169.0  Vital Statistics Report, Bhutan’s 2021 VSR.   
23                     171.0  Vital Statistics Report, Bhutan’s 2021 VSR.   
24                     136.0              Reported by Ministry of Health.   
25                     136.0              Reported by Ministry of Health.   
26  ~140 (2012) → 190 (2016)                 Annual Health Bulletin 2017.   
27                     15.22                 Annual Health Bulletin 2023.   
28                      8.22                 Annual Health Bulletin 2023.   
29                     131.0                Reported in media citing AHB.   
30                     133.0                Reported in media citing AHB.   
31                     124.0                Reported in media citing AHB.   
32                     188.0        From the Bhutan Health Journal study.   
33                     191.0        From the Bhutan Health Journal study.   
34                     190.0        From the Bhutan Health Journal study.   

      Source  
0     Cloned  
1     Cloned  
2     Cloned  
3     Cloned  
4     Cloned  
5     Cloned  
6     Cloned  
7     Cloned  
8     Cloned  
9     Cloned  
10    Cloned  
11    Cloned  
12  Original  
13  Original  
14    Cloned  
15    Cloned  
16  Original  
17    Cloned  
18  Original  
19  Original  
20    Cloned  
21  Original  
22    Cloned  
23    Cloned  
24    Cloned  
25    Cloned  
26    Cloned  
27    Cloned  
28    Cloned  
29  Original  
30    Cloned  
31    Cloned  
32    Cloned  
33    Cloned  
34    Cloned

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the CSV file
data = pd.read_csv("datasets/ALD_Data_Big.csv")

# Function to safely convert 'Year' and 'Value' to numeric
def clean_numeric(val):
    try:
        # Remove symbols and convert to float
        val = str(val).replace('~','').replace('%','').replace('−','-').split()[0]
        return float(val)
    except:
        return np.nan  # Return NaN if conversion fails

# Clean 'Year' and 'Value' columns
data['Year_num'] = data['Year'].apply(clean_numeric)
data['Value_num'] = data['Value'].apply(clean_numeric)

# Drop rows where conversion failed
clean_data = data.dropna(subset=['Year_num','Value_num'])

# Extract x and y for fitting
x = clean_data['Year_num'].values
y = clean_data['Value_num'].values

# Fit polynomial (2nd order example)
coeff2 = np.polyfit(x, y, 2)
pfit2 = np.poly1d(coeff2)

# Fit higher order polynomial (5th order example)
coeff5 = np.polyfit(x, y, 5)
pfit5 = np.poly1d(coeff5)

# Generate points for smooth plotting
xfit = np.linspace(min(x), max(x), 100)
yfit2 = pfit2(xfit)
yfit5 = pfit5(xfit)

# Plot data and polynomial fits
plt.scatter(x, y, color='blue', label='Data Points')
plt.plot(xfit, yfit2, 'g-', label='2nd Order Fit')
plt.plot(xfit, yfit5, 'r-', label='5th Order Fit')
plt.xlabel('Year')
plt.ylabel('Value')
plt.title('Polynomial Fit on ALD Data')
plt.legend()
plt.show()

/tmp/ipykernel_2876/1861055605.py:33: RankWarning: Polyfit may be poorly conditioned
  coeff5 = np.polyfit(x, y, 5)

Week 3:Function Fit(25 November 2025)¶

Assignments: We are asked to choose one dataset and function fit to our dataset¶

Introduction to the Dataset¶

Table summarizing key data points from public sources (Annual Health Bulletin, WHO, national reports):¶

Plotting Data into Graphical Representations¶

Data Clonied-¶

Polynomial fit on the numeric Year vs Value.¶

Year	Metric	Value	Source/ Notes
2016	Alcohol-related (ALD) deaths	190	From the Bhutan Health Journal study.Source
2012 → 2016	Trend in ALD deaths	~ 140 (2012) → 190 (2016)	Annual Health Bulletin 2017.Source
2020	Number of deaths for ALD (in health facilities)	166	Vital Statistics Report, BH’s 2021 VSR.Source
2021	Number of deaths for ALD	141	Reported by Ministry of Health.Source
2022	ALD share of facility-reported mortality	12.22%	From Annual Health Bulletin 2023, health facility deaths.Source
2022	Change in ALD incidence (from 2021)	−0.26% decline	Reported in AHB 2023.Source
2023	ALD deaths	129	Reported in media citing AHB.Source

Week 3:Function Fit(25 November 2025)¶

Assignments: We are asked to choose one dataset and function fit to our dataset¶

Compiled Dataset: Alcohol-Related Deaths / Burden in Bhutan¶

Introduction to the Dataset¶

Table summarizing key data points from public sources (Annual Health Bulletin, WHO, national reports):¶

Plotting Data into Graphical Representations¶

Data Clonied-¶

Polynomial fit on the numeric Year vs Value.¶