< Home
Session2: Tools Orientation 20/11/2025¶
Syntax for Markdown in Jupyter Notebook Click here
Numpy¶
-Numerical Python library and used for fast mathematical operations, arrays, matrices
In [1]:
import numpy as np
arr = np.array([1, 2, 3, 4])
print(arr + 5)
[6 7 8 9]
Matplotlib¶
Library used to create charts and graphs
In [2]:
import matplotlib.pyplot as plt
marks = [80, 70, 90, 85]
students = ["A", "B", "C", "D"]
plt.bar(students, marks)
plt.title("Student Marks")
plt.show()
Pandas¶
-Used for data analysis and uses DataFrame (table-like data)
In [3]:
import pandas as pd
data = {
"Name": ["Karma", "Sonam"],
"Marks": [85, 92]
}
df = pd.DataFrame(data)
print(df)
Name Marks 0 Karma 85 1 Sonam 92
Assignment-01¶
In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.sankey import Sankey
import plotly.graph_objects as go
In [5]:
# Load CSV
df = pd.read_csv('datasets/youtube_video.csv')
# === 1. Scatter Plot: Views vs Likes ===
plt.figure()
plt.scatter(df['view_count'], df['like_count'])
plt.xlabel('View Count')
plt.ylabel('Like Count')
plt.title('Views vs Likes')
plt.show()
# === 2. Bar Chart: Top 10 Videos by View Count ===
top = df.nlargest(10, 'view_count')
plt.figure()
plt.bar(np.arange(len(top)), top['view_count'])
plt.xticks(np.arange(len(top)), top['title'], rotation=90)
plt.ylabel('View Count')
plt.title('Top 10 Videos by View Count')
plt.tight_layout()
plt.show()
/tmp/ipykernel_2246/622102809.py:20: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations. plt.tight_layout()
In [6]:
# Load CSV
df = pd.read_csv('datasets/youtube_video.csv')
# Convert published_date to datetime
df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')
# =========================================================
# 1. Histogram of View Counts
# =========================================================
plt.figure()
plt.hist(df['view_count'].dropna())
plt.xlabel('View Count')
plt.ylabel('Frequency')
plt.title('Distribution of View Counts')
plt.show()
# =========================================================
# 2. Scatter Plot – Likes vs Comments
# =========================================================
plt.figure()
plt.scatter(df['like_count'], df['comment_count'])
plt.xlabel('Like Count')
plt.ylabel('Comment Count')
plt.title('Likes vs Comments')
plt.show()
# =========================================================
# 3. Line Plot – Views Over Time
# =========================================================
df_sorted = df.sort_values('published_date')
plt.figure()
plt.plot(df_sorted['published_date'], df_sorted['view_count'])
plt.xlabel('Published Date')
plt.ylabel('View Count')
plt.title('Views Over Time')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
Sankey Diagram¶
In [7]:
# Load your CSV
df = pd.read_csv('datasets/youtube_video.csv')
# Aggregate numbers
total_views = df['view_count'].sum()
total_likes = df['like_count'].sum()
total_comments = df['comment_count'].sum()
# Matplotlib Sankey Diagram
plt.figure(figsize=(10, 6))
sankey = Sankey(unit=None) # no units displayed
sankey.add(
flows=[total_views, -total_likes, 0], # Views → Likes
labels=['Total Views', 'Total Likes', ''],
orientations=[0, 0, 0]
)
sankey.add(
flows=[total_likes, -total_comments], # Likes → Comments
labels=['', 'Total Comments'],
orientations=[0, 0],
prior=0, # connect to the previous sankey group
connect=(1, 0)
)
diagrams = sankey.finish()
plt.title("YouTube Engagement Flow: Views → Likes → Comments")
plt.show()
<Figure size 1000x600 with 0 Axes>
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
In [8]:
df = pd.read_csv('datasets/youtube_video.csv')
total_views = df['view_count'].sum()
total_likes = df['like_count'].sum()
total_comments = df['comment_count'].sum()
nodes = ["Views", "Likes", "Comments"]
source = [0, 1]
target = [1, 2]
value = [total_likes, total_comments]
fig = go.Figure(data=[go.Sankey(
node=dict(
label=nodes,
pad=25,
thickness=20
),
link=dict(
source=source,
target=target,
value=value
)
)])
fig.update_layout(title_text="YouTube Engagement Flow (Views → Likes → Comments)", font_size=12)
fig