import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("datasets/GHGEmissionFactors_1.csv")
df.head()
top = df.sort_values("Supply Chain Emission Factors with Margins", ascending=False).head(15)
plt.hlines(
    y=top["2017 NAICS Title"],
    xmin=0,
    xmax=top["Supply Chain Emission Factors with Margins"]
)
plt.plot(
    top["Supply Chain Emission Factors with Margins"],
    top["2017 NAICS Title"],
    "o",
    markersize=8
)

[<matplotlib.lines.Line2D at 0xfb80b725c050>]

import pandas as pd
import plotly.express as px

df = pd.read_csv("datasets/GHGEmissionFactors_1.csv")

fig = px.line(df, x = '2017 NAICS Title', y = 'Supply Chain Emission Factors with Margins', title = 'CO2 Factors')
fig.show()

import pandas as pd
import plotly.express as px

# Load your dataset
df = pd.read_csv("datasets/GHGEmissionFactors_1.csv")

# ------------------------------------------------------------
# 1. OFFICIAL NAICS SECTOR NAMES (2-digit)
# ------------------------------------------------------------
sector_names = {
    "11": "Agriculture, Forestry, Fishing & Hunting",
    "21": "Mining, Quarrying, Oil & Gas Extraction",
    "22": "Utilities",
    "23": "Construction",
    "31": "Manufacturing",
    "32": "Manufacturing",
    "33": "Manufacturing",
    "42": "Wholesale Trade",
    "44": "Retail Trade",
    "45": "Retail Trade",
    "48": "Transportation & Warehousing",
    "49": "Transportation & Warehousing",
    "51": "Information",
    "52": "Finance & Insurance",
    "53": "Real Estate & Rental & Leasing",
    "54": "Professional, Scientific & Technical Services",
    "55": "Management of Companies",
    "56": "Administrative & Support",
    "61": "Educational Services",
    "62": "Health Care & Social Assistance",
    "71": "Arts, Entertainment & Recreation",
    "72": "Accommodation & Food Services",
    "81": "Other Services",
    "92": "Public Administration"
}

# ------------------------------------------------------------
# 2. OFFICIAL SUBSECTOR NAMES (3–4 digit NAICS)
# Only a few examples included – add more if needed.
# ------------------------------------------------------------
subsector_names = {
    "1111": "Oilseed & Grain Farming",
    "1112": "Vegetable & Melon Farming",
    "1113": "Fruit & Tree Nut Farming",
    "1114": "Greenhouse, Nursery & Floriculture",
    "1121": "Cattle Ranching & Farming",
    "1122": "Hog & Pig Farming",
    "2111": "Oil & Gas Extraction",
    "2121": "Coal Mining",
    "2122": "Metal Ore Mining",
    "2211": "Electric Power Generation",
    "3111": "Animal Food Manufacturing",
    "3112": "Grain & Oilseed Milling",
    "3113": "Sugar & Confectionery Manufacturing"
    # Add more as needed
}

# ------------------------------------------------------------
# 3. BUILD HIERARCHY COLUMNS
# ------------------------------------------------------------
df["SectorCode"] = df["2017 NAICS Code"].astype(str).str[:2]
df["SubsectorCode"] = df["2017 NAICS Code"].astype(str).str[:4]

df["Sector"] = df["SectorCode"].map(sector_names).fillna("Other Sector")
df["Subsector"] = df["SubsectorCode"].map(subsector_names).fillna("Other Subsector")
df["Commodity"] = df["2017 NAICS Title"]

# Emission level column (adjust name if needed)
emission_col = "GHG Emissions per USD output (kg CO2e/USD 2022)"

# ------------------------------------------------------------
# 4. INTERACTIVE SUNBURST (HIERARCHICAL PIE)
# ------------------------------------------------------------
fig = px.sunburst(
    df,
    path=["Sector", "Subsector", "Commodity"],
    values="Supply Chain Emission Factors with Margins",      # sizes slices by emission levels
    color="Supply Chain Emission Factors with Margins",       # colors show emission intensity
    color_continuous_scale="RdBu_r",
    maxdepth=-1,
    title="Interactive Sector → Subsector → Commodity Pie Chart (NAICS + Emission Levels)"
)

# Add visual spacing / margins
fig.update_layout(
    width=1000,   # make graph wider
    height=700,   # make graph taller
    margin=dict(l=40, r=40, t=80, b=40)
)

fig.update_traces(textinfo="label+percent entry")

fig.show()

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV
df = pd.read_csv('datasets/BTC_USD_full_data.csv', parse_dates=['Date'])

# Plot Closing Price vs Date
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['Close'], color='blue', label='Closing Price')
plt.title('BTC-USD Closing Price Over Time', fontsize=16)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Closing Price (USD)', fontsize=14)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

import pandas as pd
import plotly.express as px


# Load the CSV
df = pd.read_csv('datasets/BTC_USD_full_data.csv', parse_dates=['Date'])

fig = px.line(df, x='Date', y="Close")
fig.show()

import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

# Load BTC data
df = pd.read_csv("datasets/BTC_USD_full_data.csv", parse_dates=['Date'])
df = df.sort_values('Date')

#Calculate daily returns
df['Return'] = df['Close'].pct_change()
df = df.dropna(subset=['Return'])

#Normalize return for color
colorscale = px.colors.diverging.RdBu  # Red = gain, Blue = loss
df['Return_norm'] = (df['Return'] - df['Return'].min()) / (df['Return'].max() - df['Return'].min())

# Create Heat Ribbon
fig = go.Figure()

for i in range(len(df)-1):
    fig.add_trace(go.Scatter(
        x=[df['Date'].iloc[i], df['Date'].iloc[i+1]],
        y=[df['Close'].iloc[i], df['Close'].iloc[i+1]],
        line=dict(color=px.colors.sample_colorscale(colorscale, df['Return_norm'].iloc[i])[0], width=6),
        hoverinfo='x+y+text',
        text=f"Return: {df['Return'].iloc[i]:.2%}<br>Price: ${df['Close'].iloc[i]:,.2f}",
        mode='lines'
    ))

#Update layout
fig.update_layout(
    title="Interactive Bitcoin Heat Ribbon",
    xaxis_title="Date",
    yaxis_title="Price (USD)",
    hovermode="x unified",
    template="plotly_dark",
    height=600,
    showlegend=False,
)

fig.show()

/tmp/ipykernel_17930/2843191559.py:11: FutureWarning:

The default fill_method='pad' in Series.pct_change is deprecated and will be removed in a future version. Either fill in any non-leading NA values prior to calling pct_change or specify 'fill_method=None' to not fill NA values.

Data Visualization¶

To visualize the data I chose, which was Greenhouse Gas emissions from different sectors of industry, I first tried a simple plot on Matplotlib and I realized that the graph looked more like data representation than visualization, and was far too boring. The graph below was what I plotted:¶

To make my visualization more interesting, I surfed the net for a visualization that was done for similar data. And what captivated me was a sunburst pie chart on the World Resources Website¶

After referring to the above data visualization, I decided to use Plotly for my visualization and installed that in my notebook.¶

Once the installation was complete, just to test it, I tried plotting my data using plotly as shown below:¶

As the simple line graph in plotly was successful, I proceeded to work on my final data visualization, which was aimed to look more like the interactive pie chart that captivated me.¶

Bitcoin Visualization¶

References¶