In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Load the CSV data
df = pd.read_csv('datasets/data.csv')
# Clean column names (remove any whitespace)
df.columns = df.columns.str.strip()
# Sort by percentage for better visualization
df_sorted = df.sort_values('percentage')
# Create the line graph
plt.figure(figsize=(14, 8))
# Plot the line
plt.plot(df_sorted['Dzongkhag'], df_sorted['percentage'],
marker='o', linewidth=2.5, markersize=8,
color='steelblue', markerfacecolor='darkorange')
# Customize the plot
plt.title('Alcohol Consumption Percentage by Dzongkhag', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Dzongkhag', fontsize=12, fontweight='bold')
plt.ylabel('Percentage (%)', fontsize=12, fontweight='bold')
# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)
# Add grid for better readability
plt.grid(True, alpha=0.3, linestyle='--')
# Add value labels on each point
for i, (dzongkhag, perc) in enumerate(zip(df_sorted['Dzongkhag'], df_sorted['percentage'])):
plt.annotate(f'{perc}%',
xy=(i, perc),
xytext=(0, 10),
textcoords='offset points',
ha='center',
fontsize=9,
color='darkred',
fontweight='bold')
# Add horizontal line at the average
average_consumption = df_sorted['percentage'].mean()
plt.axhline(y=average_consumption, color='red', linestyle='--', alpha=0.7,
label=f'Average: {average_consumption:.1f}%')
# Add a legend
plt.legend()
# Adjust layout to prevent label cutoff
plt.tight_layout()
# Show the plot
plt.show()
# Print some statistics
print("Alcohol Consumption Statistics:")
print(f"Average consumption: {average_consumption:.1f}%")
print(f"Maximum consumption: {df_sorted['percentage'].max():.1f}% (in {df_sorted.loc[df_sorted['percentage'].idxmax(), 'Dzongkhag']})")
print(f"Minimum consumption: {df_sorted['percentage'].min():.1f}% (in {df_sorted.loc[df_sorted['percentage'].idxmin(), 'Dzongkhag']})")
print(f"Range: {df_sorted['percentage'].max() - df_sorted['percentage'].min():.1f}%")
Alcohol Consumption Statistics: Average consumption: 33.6% Maximum consumption: 50.6% (in lhuentse) Minimum consumption: 21.8% (in bumthang) Range: 28.8%
In [ ]: