import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

df = pd.read_csv('./datasets/Wine_dataset.csv')
df.head()

## import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

df = pd.read_csv('./datasets/Wine_dataset.csv')

base_data = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
first_data = df[df.keys()[1]]
second_data= df[df.keys()[2]]
third_data= df[df.keys()[3]]
forth_data= df[df.keys()[4]]
fifth_data= df[df.keys()[5]]
sixth_data= df[df.keys()[6]]
seventh_data= df[df.keys()[7]]
eighth_data= df[df.keys()[8]]
nineth_data= df[df.keys()[9]]
tenth_data= df[df.keys()[10]]

plt.plot(first_data, second_data, linestyle='solid', marker='o')
plt.plot(first_data, second_data, linestyle='solid', marker='o')
plt.plot(first_data, third_data, linestyle='solid', marker='o')
plt.plot(first_data, forth_data, linestyle='solid', marker='o')
plt.plot(first_data, fifth_data, linestyle='solid', marker='o')
plt.plot(first_data, sixth_data, linestyle='solid', marker='o')
plt.plot(first_data, seventh_data, linestyle='solid', marker='o')
plt.plot(first_data, eighth_data, linestyle='solid', marker='o')
plt.plot(first_data, nineth_data, linestyle='solid', marker='o')
plt.plot(first_data, tenth_data, linestyle='solid', marker='o')
plt.show()

#from chatgpt

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('./datasets/Wine_dataset.csv')


plt.figure(figsize=(10, 4))

plt.plot(df["Alcohol"])
plt.title("Alcohol Line Chart")
plt.xlabel("Index")
plt.ylabel("Alcohol")

plt.show()

for i

# using chatgpt and youtube(https://www.youtube.com/watch?v=ahC7Mpiy9R8)

import pandas as pd
import matplotlib.pyplot as plt
import tkinter as tk

df = pd.read_csv('./datasets/Wine_dataset.csv')

#for label, g in df.groupby("class"):
#    plt.plot(g["Alcohol"], label=f"class {label}")
#    plt.plot(g.index, g["Alcohol"], label=f"class {label}")

for label, g in df.groupby("class"):
    g = g.reset_index()          # set the index number from 0 for each class
    plt.plot(g.index, g["Alcohol"], label=f"class {label}")

plt.title("Alcohol Line Chart by Class")
plt.xlabel("Index")
plt.ylabel("Alcohol")
plt.legend()
plt.show()

from ipywidgets import

#check example code from https://gist.github.com/takeshiD/6a2957fbe709633d7f6e 

from ipywidgets import widgets
from IPython.display import display
w = widgets.Dropdown(
    options = ["1", "2", "3"], # 選択肢のリスト
    value = "2",               # 初期値
    description = "Number:",   # ラベル
)
display(w)  # ウィジェットの表示

Dropdown(description='Number:', index=1, options=('1', '2', '3'), value='2')

from ipywidgets import widgets
from IPython.display import display
w = widgets.Dropdown(
    options = [('all',0), ('1',1), ('2',2), ('3',3)], # 選択肢のリスト
    value = 0,               # Initial value
    description = "class:",   # label
)
display(w)

Dropdown(description='class:', options=(('all', 0), ('1', 1), ('2', 2), ('3', 3)), value=0)

# asking to chatgpt how to make pulldown menu from each column

import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import widgets

# Read CSV
df = pd.read_csv('./datasets/Wine_dataset.csv')

# ▼ 1. ドロップダウンに列名を入れる
dropdown = widgets.Dropdown(
    options=df.columns,
    description='Column:',
)

# ▼ 2. 描画する関数
def plot_column(change):
    column = change['new']          # 変更された値（列名）

    plt.figure(figsize=(10,4))
    plt.plot(df[column])
    plt.title(f"{column} Line Chart")
    plt.xlabel("Index")
    plt.ylabel(column)
    plt.show()

# ▼ 3. 値が変更されたらグラフ描画
dropdown.observe(plot_column, names='value')

# ▼ 4. 最初に表示
display(dropdown)

Dropdown(description='Column:', options=('class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnes…

import pandas as pd
import plotly.graph_objects as go

# データ読み込み
df = pd.read_csv('./datasets/Wine_dataset.csv')

# ---- ① 数値をカテゴリに変換 ----
# Alcohol を Low / Medium / High に分類
df["Alcohol_group"] = pd.qcut(df["Alcohol"], 3, labels=["Low", "Medium", "High"])

# Color intensity も 3 つに分ける
df["Color_group"] = pd.qcut(df["Color intensity"], 3, labels=["Light", "Middle", "Strong"])

# ---- ② ノード（箱）を定義 ----
labels = [
    "Class 1", "Class 2", "Class 3",
    "Alcohol Low", "Alcohol Medium", "Alcohol High",
    "Color Light", "Color Middle", "Color Strong"
]

# ノードの番号（位置）を簡単に参照する辞書
label_index = {label: i for i, label in enumerate(labels)}


# ---- ③ 流れを集計（ class → Alcohol_group ）----
links1 = df.groupby(["class", "Alcohol_group"]).size().reset_index(name="count")

# ---- ④ 流れを集計（ Alcohol_group → Color_group ）----
links2 = df.groupby(["Alcohol_group", "Color_group"]).size().reset_index(name="count")


# ---- ⑤ Sankey用のリンクデータを作る ----
sources = []
targets = []
values = []

# class → alcohol
for _, row in links1.iterrows():
    sources.append(label_index[f"Class {row['class']}"])
    targets.append(label_index[f"Alcohol {row['Alcohol_group']}"])
    values.append(row["count"])

# alcohol → color
for _, row in links2.iterrows():
    sources.append(label_index[f"Alcohol {row['Alcohol_group']}"])
    targets.append(label_index[f"Color {row['Color_group']}"])
    values.append(row["count"])

# ---- ⑥ Sankey 図の描画 ----
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values
    )
)])

fig.update_layout(title_text="Wine Dataset Sankey Diagram", font_size=12)
fig.show()

/tmp/ipykernel_4569/4158301510.py:26: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  links1 = df.groupby(["class", "Alcohol_group"]).size().reset_index(name="count")
/tmp/ipykernel_4569/4158301510.py:29: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
  links2 = df.groupby(["Alcohol_group", "Color_group"]).size().reset_index(name="count")

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

# データ読み込み
df = pd.read_csv('./datasets/Wine_dataset.csv')
# Calculate the correlation matrix
correlation_matrix = df.corr()

# Create a heatmap for the correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Matrix Heatmap")
plt.show()

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

df = pd.read_csv('./datasets/Wine_dataset.csv')

plt.figure()
plt.hist(df["Alcohol"], bins=20)
plt.xlabel("Alcohol")
plt.ylabel("Count")
plt.title("Distribution of Alcohol")
plt.show()

plt.plot(df["Alcohol"])
plt.xlabel('pi calculation points')
plt.ylabel('pi error')
plt.show()

	class	Alcohol	Malic acid	Ash	Alcalinity of ash	Magnesium	Total phenols	Flavanoids	Nonflavanoid phenols	Proanthocyanins	Color intensity	Hue	OD280/OD315 of diluted wines	Proline
0	1	14.23	1.71	2.43	15.6	127	2.80	3.06	0.28	2.29	5.64	1.04	3.92	1065
1	1	13.20	1.78	2.14	11.2	100	2.65	2.76	0.26	1.28	4.38	1.05	3.40	1050
2	1	13.16	2.36	2.67	18.6	101	2.80	3.24	0.30	2.81	5.68	1.03	3.17	1185
3	1	14.37	1.95	2.50	16.8	113	3.85	3.49	0.24	2.18	7.80	0.86	3.45	1480
4	1	13.24	2.59	2.87	21.0	118	2.80	2.69	0.39	1.82	4.32	1.04	2.93	735

2.Tools¶

Goals¶

Assignment¶

What I've learnt¶

Read the dataset file¶

Making graph¶

visualizing challenge¶

Sankey Diagram¶