In [4]:
import pandas as pd
import seaborn as sns
sns.set(color_codes=True)
In [5]:
wine = pd.read_csv('wq.csv')
In [3]:
wine.head()
Out[3]:
| fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| 1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
| 2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
| 3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
| 4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
In [4]:
wine.tail()
Out[4]:
| fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1594 | 6.2 | 0.600 | 0.08 | 2.0 | 0.090 | 32.0 | 44.0 | 0.99490 | 3.45 | 0.58 | 10.5 | 5 |
| 1595 | 5.9 | 0.550 | 0.10 | 2.2 | 0.062 | 39.0 | 51.0 | 0.99512 | 3.52 | 0.76 | 11.2 | 6 |
| 1596 | 6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29.0 | 40.0 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 |
| 1597 | 5.9 | 0.645 | 0.12 | 2.0 | 0.075 | 32.0 | 44.0 | 0.99547 | 3.57 | 0.71 | 10.2 | 5 |
| 1598 | 6.0 | 0.310 | 0.47 | 3.6 | 0.067 | 18.0 | 42.0 | 0.99549 | 3.39 | 0.66 | 11.0 | 6 |
In [6]:
wine.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1599 entries, 0 to 1598 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 fixed acidity 1599 non-null float64 1 volatile acidity 1599 non-null float64 2 citric acid 1599 non-null float64 3 residual sugar 1599 non-null float64 4 chlorides 1599 non-null float64 5 free sulfur dioxide 1599 non-null float64 6 total sulfur dioxide 1599 non-null float64 7 density 1599 non-null float64 8 pH 1599 non-null float64 9 sulphates 1599 non-null float64 10 alcohol 1599 non-null float64 11 quality 1599 non-null int64 dtypes: float64(11), int64(1) memory usage: 150.0 KB
In [17]:
sns.barplot(x=wine['citric acid'], y=wine['pH'])
Out[17]:
<Axes: xlabel='citric acid', ylabel='pH'>
In [12]:
sns.displot(wine['pH'])
Out[12]:
<seaborn.axisgrid.FacetGrid at 0xf2e458673230>
In [18]:
sns.jointplot(x=wine['citric acid'], y=wine['pH'])
Out[18]:
<seaborn.axisgrid.JointGrid at 0xf2e45847fe00>
In [20]:
sns.jointplot(x=wine['citric acid'], y=wine['pH'], kind="kde")
Out[20]:
<seaborn.axisgrid.JointGrid at 0xf2e44eb67610>
In [22]:
sns.pairplot(wine[['citric acid', 'pH', 'density']])
Out[22]:
<seaborn.axisgrid.PairGrid at 0xf2e454598ec0>
In [24]:
sns.stripplot(x=wine['density'], y=wine['pH'])
Out[24]:
<Axes: xlabel='density', ylabel='pH'>
In [25]:
sns.stripplot(x=wine['density'], y=wine['pH'], jitter = True)
Out[25]:
<Axes: xlabel='density', ylabel='pH'>
In [28]:
sns.boxplot(x='density', y='alcohol', hue='pH', data=wine)
Out[28]:
<Axes: xlabel='density', ylabel='alcohol'>
/opt/conda/lib/python3.13/site-packages/IPython/core/events.py:82: UserWarning: Creating legend with loc="best" can be slow with large amounts of data. func(*args, **kwargs) /opt/conda/lib/python3.13/site-packages/IPython/core/pylabtools.py:170: UserWarning: Creating legend with loc="best" can be slow with large amounts of data. fig.canvas.print_figure(bytes_io, **kw)
In [29]:
sns.countplot(wine['pH'])
Out[29]:
<Axes: ylabel='count'>
In [30]:
sns.lmplot(x='alcohol', y= 'pH', data = wine)
Out[30]:
<seaborn.axisgrid.FacetGrid at 0xf2e445465810>
In [ ]: