[Rinchen Khandu] - Fab Futures - Data Science
Home About
In [ ]:
# Tools
In [ ]:
## student_depression_dataset
In [2]:
import pandas as pd 
from matplotlib import pyplot as plt 
In [12]:
# Replace 'your_file.csv' with the name or path of your actual CSV file
data = pd.read_csv('~/work/rinchen-khandu/datasets/student_depression_dataset.csv')
data
Out[12]:
id Gender Age City Profession Academic Pressure Work Pressure CGPA Study Satisfaction Job Satisfaction Sleep Duration Dietary Habits Degree Have you ever had suicidal thoughts ? Work/Study Hours Financial Stress Family History of Mental Illness Depression
0 2 Male 33.0 Visakhapatnam Student 5.0 0.0 8.97 2.0 0.0 '5-6 hours' Healthy B.Pharm Yes 3.0 1.0 No 1
1 8 Female 24.0 Bangalore Student 2.0 0.0 5.90 5.0 0.0 '5-6 hours' Moderate BSc No 3.0 2.0 Yes 0
2 26 Male 31.0 Srinagar Student 3.0 0.0 7.03 5.0 0.0 'Less than 5 hours' Healthy BA No 9.0 1.0 Yes 0
3 30 Female 28.0 Varanasi Student 3.0 0.0 5.59 2.0 0.0 '7-8 hours' Moderate BCA Yes 4.0 5.0 Yes 1
4 32 Female 25.0 Jaipur Student 4.0 0.0 8.13 3.0 0.0 '5-6 hours' Moderate M.Tech Yes 1.0 1.0 No 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
27896 140685 Female 27.0 Surat Student 5.0 0.0 5.75 5.0 0.0 '5-6 hours' Unhealthy 'Class 12' Yes 7.0 1.0 Yes 0
27897 140686 Male 27.0 Ludhiana Student 2.0 0.0 9.40 3.0 0.0 'Less than 5 hours' Healthy MSc No 0.0 3.0 Yes 0
27898 140689 Male 31.0 Faridabad Student 3.0 0.0 6.61 4.0 0.0 '5-6 hours' Unhealthy MD No 12.0 2.0 No 0
27899 140690 Female 18.0 Ludhiana Student 5.0 0.0 6.88 2.0 0.0 'Less than 5 hours' Healthy 'Class 12' Yes 10.0 5.0 No 1
27900 140699 Male 27.0 Patna Student 4.0 0.0 9.24 1.0 0.0 'Less than 5 hours' Healthy BCA Yes 2.0 3.0 Yes 1

27901 rows × 18 columns

In [13]:
# data cleaning
In [14]:
data.head()
Out[14]:
id Gender Age City Profession Academic Pressure Work Pressure CGPA Study Satisfaction Job Satisfaction Sleep Duration Dietary Habits Degree Have you ever had suicidal thoughts ? Work/Study Hours Financial Stress Family History of Mental Illness Depression
0 2 Male 33.0 Visakhapatnam Student 5.0 0.0 8.97 2.0 0.0 '5-6 hours' Healthy B.Pharm Yes 3.0 1.0 No 1
1 8 Female 24.0 Bangalore Student 2.0 0.0 5.90 5.0 0.0 '5-6 hours' Moderate BSc No 3.0 2.0 Yes 0
2 26 Male 31.0 Srinagar Student 3.0 0.0 7.03 5.0 0.0 'Less than 5 hours' Healthy BA No 9.0 1.0 Yes 0
3 30 Female 28.0 Varanasi Student 3.0 0.0 5.59 2.0 0.0 '7-8 hours' Moderate BCA Yes 4.0 5.0 Yes 1
4 32 Female 25.0 Jaipur Student 4.0 0.0 8.13 3.0 0.0 '5-6 hours' Moderate M.Tech Yes 1.0 1.0 No 0
In [15]:
data.columns
Out[15]:
Index(['id', 'Gender', 'Age', 'City', 'Profession', 'Academic Pressure',
       'Work Pressure', 'CGPA', 'Study Satisfaction', 'Job Satisfaction',
       'Sleep Duration', 'Dietary Habits', 'Degree',
       'Have you ever had suicidal thoughts ?', 'Work/Study Hours',
       'Financial Stress', 'Family History of Mental Illness', 'Depression'],
      dtype='object')
In [16]:
data.describe()
Out[16]:
id Age Academic Pressure Work Pressure CGPA Study Satisfaction Job Satisfaction Work/Study Hours Depression
count 27901.000000 27901.000000 27901.000000 27901.000000 27901.000000 27901.000000 27901.000000 27901.000000 27901.000000
mean 70442.149421 25.822300 3.141214 0.000430 7.656104 2.943837 0.000681 7.156984 0.585499
std 40641.175216 4.905687 1.381465 0.043992 1.470707 1.361148 0.044394 3.707642 0.492645
min 2.000000 18.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 35039.000000 21.000000 2.000000 0.000000 6.290000 2.000000 0.000000 4.000000 0.000000
50% 70684.000000 25.000000 3.000000 0.000000 7.770000 3.000000 0.000000 8.000000 1.000000
75% 105818.000000 30.000000 4.000000 0.000000 8.920000 4.000000 0.000000 10.000000 1.000000
max 140699.000000 59.000000 5.000000 5.000000 10.000000 5.000000 4.000000 12.000000 1.000000
In [17]:
data.isnull().sum()
Out[17]:
id                                       0
Gender                                   0
Age                                      0
City                                     0
Profession                               0
Academic Pressure                        0
Work Pressure                            0
CGPA                                     0
Study Satisfaction                       0
Job Satisfaction                         0
Sleep Duration                           0
Dietary Habits                           0
Degree                                   0
Have you ever had suicidal thoughts ?    0
Work/Study Hours                         0
Financial Stress                         0
Family History of Mental Illness         0
Depression                               0
dtype: int64
In [18]:
data.dropna(inplace=True)
In [19]:
data.dtypes
Out[19]:
id                                         int64
Gender                                    object
Age                                      float64
City                                      object
Profession                                object
Academic Pressure                        float64
Work Pressure                            float64
CGPA                                     float64
Study Satisfaction                       float64
Job Satisfaction                         float64
Sleep Duration                            object
Dietary Habits                            object
Degree                                    object
Have you ever had suicidal thoughts ?     object
Work/Study Hours                         float64
Financial Stress                          object
Family History of Mental Illness          object
Depression                                 int64
dtype: object
In [20]:
Q1 = data['Age'].quantile(0.25)
Q3 = data['Age'].quantile(0.75)
IQR = Q3 - Q1
data = data[~((data['Age'] < Q1 - 1.5 * IQR) | (data['Age'] > Q3 + 1.5 * IQR))]
In [21]:
data.Gender
Out[21]:
0          Male
1        Female
2          Male
3        Female
4        Female
          ...  
27896    Female
27897      Male
27898      Male
27899    Female
27900      Male
Name: Gender, Length: 27889, dtype: object
In [22]:
data["Gender"].value_counts()
Out[22]:
Gender
Male      15543
Female    12346
Name: count, dtype: int64
In [25]:
data.tail()
Out[25]:
id Gender Age City Profession Academic Pressure Work Pressure CGPA Study Satisfaction Job Satisfaction Sleep Duration Dietary Habits Degree Have you ever had suicidal thoughts ? Work/Study Hours Financial Stress Family History of Mental Illness Depression
27896 140685 Female 27.0 Surat Student 5.0 0.0 5.75 5.0 0.0 '5-6 hours' Unhealthy 'Class 12' Yes 7.0 1.0 Yes 0
27897 140686 Male 27.0 Ludhiana Student 2.0 0.0 9.40 3.0 0.0 'Less than 5 hours' Healthy MSc No 0.0 3.0 Yes 0
27898 140689 Male 31.0 Faridabad Student 3.0 0.0 6.61 4.0 0.0 '5-6 hours' Unhealthy MD No 12.0 2.0 No 0
27899 140690 Female 18.0 Ludhiana Student 5.0 0.0 6.88 2.0 0.0 'Less than 5 hours' Healthy 'Class 12' Yes 10.0 5.0 No 1
27900 140699 Male 27.0 Patna Student 4.0 0.0 9.24 1.0 0.0 'Less than 5 hours' Healthy BCA Yes 2.0 3.0 Yes 1

Data Visualization¶

Line Graph¶

Students Depression¶

In [1]:
data.plot(kind = "line", title = "students depression", xlabel = "Work Pressure", ylabel = "Age")
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 1
----> 1 data.plot(kind = "line", title = "students depression", xlabel = "Work Pressure", ylabel = "Age")
      2 data

NameError: name 'data' is not defined

Scatter plot¶

In [41]:
data.plot(kind = "scatter", title = "students depression", x = "Age", y = "Profession")
Out[41]:
<Axes: title={'center': 'students depression'}, xlabel='Age', ylabel='Profession'>
No description has been provided for this image
In [31]:
import matplotlib.pyplot as plt
genders = data ["Gender"].unique()
for g in genders:
    subset = data[data ["Gender"] == g]
    plt.scatter(subset["Job Satisfaction"], subset["Work Pressure"], label = g)
    plt.xlabel("Job Satisfaction")
    plt.ylabel("Work Pressure")
    plt.legend()
    plt.show()
No description has been provided for this image
No description has been provided for this image
In [34]:
# Bar Graph
In [32]:
import matplotlib.pyplot as plt
data['Gender'].value_counts().plot(kind='bar')
plt.title("Number of Participants by Gender")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.show()
No description has been provided for this image
In [35]:
import matplotlib.pyplot as plt
data['Profession'].value_counts().plot(kind='bar')
plt.title('Profession Distribution')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
In [33]:
profession_count = data['Profession'].value_counts()

plt.figure(figsize=(10, 6))
profession_count.plot(kind='bar')
plt.title('Number of People by Profession')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()
No description has been provided for this image
In [36]:
import pandas as pd
import matplotlib.pyplot as plt
grouped = data.groupby("Age")["Depression"].mean()
grouped.plot(kind = "bar")
plt.xlabel("Age")
plt.ylabel("Depression")
plt.title("Student Depression")
plt.show()
No description has been provided for this image
In [ ]:
 
In [ ]: