[Your-Name-Here] - Fab Futures - Data Science
Home About
In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load the CSV data
df = pd.read_csv('datasets/data.csv')

# Clean column names (remove any whitespace)
df.columns = df.columns.str.strip()

# Sort by percentage for better visualization
df_sorted = df.sort_values('percentage')

# Create the line graph
plt.figure(figsize=(14, 8))

# Plot the line
plt.plot(df_sorted['Dzongkhag'], df_sorted['percentage'], 
         marker='o', linewidth=2.5, markersize=8, 
         color='steelblue', markerfacecolor='darkorange')

# Customize the plot
plt.title('Alcohol Consumption Percentage by Dzongkhag', fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Dzongkhag', fontsize=12, fontweight='bold')
plt.ylabel('Percentage (%)', fontsize=12, fontweight='bold')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.yticks(fontsize=10)

# Add grid for better readability
plt.grid(True, alpha=0.3, linestyle='--')

# Add value labels on each point
for i, (dzongkhag, perc) in enumerate(zip(df_sorted['Dzongkhag'], df_sorted['percentage'])):
    plt.annotate(f'{perc}%', 
                 xy=(i, perc), 
                 xytext=(0, 10), 
                 textcoords='offset points',
                 ha='center', 
                 fontsize=9,
                 color='darkred',
                 fontweight='bold')

# Add horizontal line at the average
average_consumption = df_sorted['percentage'].mean()
plt.axhline(y=average_consumption, color='red', linestyle='--', alpha=0.7, 
            label=f'Average: {average_consumption:.1f}%')

# Add a legend
plt.legend()

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Show the plot
plt.show()

# Print some statistics
print("Alcohol Consumption Statistics:")
print(f"Average consumption: {average_consumption:.1f}%")
print(f"Maximum consumption: {df_sorted['percentage'].max():.1f}% (in {df_sorted.loc[df_sorted['percentage'].idxmax(), 'Dzongkhag']})")
print(f"Minimum consumption: {df_sorted['percentage'].min():.1f}% (in {df_sorted.loc[df_sorted['percentage'].idxmin(), 'Dzongkhag']})")
print(f"Range: {df_sorted['percentage'].max() - df_sorted['percentage'].min():.1f}%")
No description has been provided for this image
Alcohol Consumption Statistics:
Average consumption: 33.6%
Maximum consumption: 50.6% (in lhuentse)
Minimum consumption: 21.8% (in bumthang)
Range: 28.8%
In [ ]: