[Your-Name-Here] - Fab Futures - Data Science
Home About
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO

csv_text = """Sl. No.,Name,Attendance,Topic,Exploration,Communication,Creativity,Any Additional Comment
1,Ashish Wakley,TRUE,Climate Change,Good,Good,Very Good,very confident but need to explore more.
2,Dorji Tshewang,TRUE,Climate Change,Good,Good,Good,"Limited only to textbook, need to be confident and explore more"
3,Jigme Namgyel,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,"Oral presentation, Most of the time he is silence and hardly speak "
4,Kelden Drukda,TRUE,Climate Change,Need improvement,Need improvement,Very Good,"Didnot see effort on exploration, poor in communication"
5,Kinga Tobgay,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
6,Kinley Dendup,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
7,Kinley Tshering,TRUE,Climate Change,Good,Need improvement,Need improvement,Poor communication skills.
8,Lekden Thujee Drakpa,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,Did not explore for information.Got few points from the sister.Very poor communication.(noding and in words only)
9,Pema Namgay,TRUE,Climate Change,Very Good,Very Good,Very Good,Exploration done well and mentioned the sources . Language very fluent and confident
10,Rinzin Dorji,TRUE,Climate Change,Good,Need improvement,Need improvement,oral presentation/ responding in Dzongkha and lack of fluency.
""" 
df = pd.read_csv(StringIO(csv_text))

rating_map = {"Need improvement": 1, "Good": 2, "Very Good": 3}
df["Creativity_score"] = df["Creativity"].map(rating_map)
creativity_scores = df["Creativity_score"].dropna().values

npts = len(creativity_scores)
mean = np.mean(creativity_scores)
stddev = np.std(creativity_scores)

plt.figure(figsize=(8,5))

plt.hist(creativity_scores, bins=npts, density=True, alpha=0.6, color='skyblue', edgecolor='black')

plt.plot(creativity_scores, 0*creativity_scores, '|', ms=20, color='black')

xi = np.linspace(mean-3*stddev, mean+3*stddev, 100)
yi = np.exp(-(xi-mean)**2/(2*stddev**2))/np.sqrt(2*np.pi*stddev**2)
plt.plot(xi, yi, 'r', lw=2, label='Gaussian fit')

plt.xlabel("Creativity score")
plt.ylabel("Density")
plt.title("Histogram of Creativity scores with Gaussian overlay")
plt.legend()
plt.show()
No description has been provided for this image
In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO

csv_text = """Sl. No.,Name,Attendance,Topic,Exploration,Communication,Creativity,Any Additional Comment
1,Ashish Wakley,TRUE,Climate Change,Good,Good,Very Good,very confident but need to explore more.
2,Dorji Tshewang,TRUE,Climate Change,Good,Good,Good,"Limited only to textbook, need to be confident and explore more"
3,Jigme Namgyel,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,"Oral presentation, Most of the time he is silence and hardly speak "
4,Kelden Drukda,TRUE,Climate Change,Need improvement,Need improvement,Very Good,"Didnot see effort on exploration, poor in communication"
5,Kinga Tobgay,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
6,Kinley Dendup,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
7,Kinley Tshering,TRUE,Climate Change,Good,Need improvement,Need improvement,Poor communication skills.
8,Lekden Thujee Drakpa,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,Did not explore for information.Got few points from the sister.Very poor communication.(noding and in words only)
9,Pema Namgay,TRUE,Climate Change,Very Good,Very Good,Very Good,Exploration done well and mentioned the sources . Language very fluent and confident
10,Rinzin Dorji,TRUE,Climate Change,Good,Need improvement,Need improvement,oral presentation/ responding in Dzongkha and lack of fluency.
"""  

df = pd.read_csv(StringIO(csv_text))

rating_map = {"Need improvement": 1, "Good": 2, "Very Good": 3}
df["Creativity_score"] = df["Creativity"].map(rating_map)
scores = df["Creativity_score"].dropna().values

trials = 100
points = np.arange(1, len(scores)+1)  # sample sizes from 1 to number of students
means = np.zeros((trials, len(points)))

for i, n in enumerate(points):
    for t in range(trials):
        means[t, i] = np.mean(np.random.choice(scores, size=n, replace=True))  # sampling with replacement

mean_estimates = np.mean(means, axis=0)
std_estimates = np.std(means, axis=0)

plt.figure(figsize=(8,5))

plt.errorbar(points, mean_estimates, yerr=std_estimates, fmt='k-o', capsize=5, label='estimated')

mean_score = np.mean(scores)
std_score = np.std(scores)
plt.plot(points, mean_score + std_score/np.sqrt(points), 'r', label='calculated')
plt.plot(points, mean_score - std_score/np.sqrt(points), 'r')

for i, n in enumerate(points):
    plt.plot(np.full(trials, n), means[:, i], 'o', markersize=3, alpha=0.5)

plt.xlabel("number of samples averaged")
plt.ylabel("mean estimates of Creativity")
plt.title("Mean estimates of Creativity scores vs sample size")
plt.legend()
plt.show()
No description has been provided for this image
In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from io import StringIO

csv_text = """Sl. No.,Name,Attendance,Topic,Exploration,Communication,Creativity,Any Additional Comment
1,Ashish Wakley,TRUE,Climate Change,Good,Good,Very Good,very confident but need to explore more.
2,Dorji Tshewang,TRUE,Climate Change,Good,Good,Good,"Limited only to textbook, need to be confident and explore more"
3,Jigme Namgyel,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,"Oral presentation, Most of the time he is silence and hardly speak "
4,Kelden Drukda,TRUE,Climate Change,Need improvement,Need improvement,Very Good,"Didnot see effort on exploration, poor in communication"
5,Kinga Tobgay,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
6,Kinley Dendup,TRUE,Climate Change,Good,Good,Need improvement,Well prepared but need to explore more on the given topic.
7,Kinley Tshering,TRUE,Climate Change,Good,Need improvement,Need improvement,Poor communication skills.
8,Lekden Thujee Drakpa,TRUE,Climate Change,Need improvement,Need improvement,Need improvement,Did not explore for information.Got few points from the sister.Very poor communication.(noding and in words only)
9,Pema Namgay,TRUE,Climate Change,Very Good,Very Good,Very Good,Exploration done well and mentioned the sources . Language very fluent and confident
10,Rinzin Dorji,TRUE,Climate Change,Good,Need improvement,Need improvement,oral presentation/ responding in Dzongkha and lack of fluency.
"""  

df = pd.read_csv(StringIO(csv_text))

rating_map = {"Need improvement": 1, "Good": 2, "Very Good": 3}
df["Exploration_score"] = df["Exploration"].map(rating_map)
df["Creativity_score"] = df["Creativity"].map(rating_map)
df["Communication_score"] = df["Communication"].map(rating_map)

csv_points = df[["Exploration_score","Creativity_score"]].dropna().values

np.random.seed(10)
mean_sim = [2,2]
cov_sim = [[0.8,0.3],[0.3,0.5]]
sim_points = np.random.multivariate_normal(mean_sim,cov_sim,100)

all_points = np.vstack((csv_points, sim_points))

csv_mean = np.mean(csv_points, axis=0)
csv_std = np.sqrt(np.var(csv_points, axis=0))

sim_mean = np.mean(sim_points, axis=0)
sim_cov = np.cov(sim_points,rowvar=False)
evals, evecs = np.linalg.eig(sim_cov)
dx0 = evecs[0,0]*np.sqrt(evals[0])
dx1 = evecs[1,0]*np.sqrt(evals[1])
dy0 = evecs[0,1]*np.sqrt(evals[0])
dy1 = evecs[1,1]*np.sqrt(evals[1])

csv_rect_x = [csv_mean[0]-csv_std[0],csv_mean[0]+csv_std[0],None,csv_mean[0],csv_mean[0]]
csv_rect_y = [csv_mean[1],csv_mean[1],None,csv_mean[1]-csv_std[1],csv_mean[1]+csv_std[1]]
sim_rect_x = [sim_mean[0]-dx0,sim_mean[0]+dx0,None,sim_mean[0]-dx1,sim_mean[0]+dx1]
sim_rect_y = [sim_mean[1]+dy0,sim_mean[1]-dy0,None,sim_mean[1]+dy1,sim_mean[1]+dy1]

plt.figure(figsize=(8,8))

plt.scatter(csv_points[:,0], csv_points[:,1], c='blue', alpha=0.7, s=70, label='CSV Data')

plt.scatter(sim_points[:,0], sim_points[:,1], c='orange', alpha=0.4, s=50, label='Simulated')

plt.scatter(csv_mean[0], csv_mean[1], c='darkblue', marker='X', s=120, label='CSV Mean')
plt.scatter(sim_mean[0], sim_mean[1], c='darkorange', marker='X', s=120, label='Simulated Mean')

plt.plot(csv_rect_x, csv_rect_y, 'b--', linewidth=2)
plt.plot(sim_rect_x, sim_rect_y, 'orange', linewidth=2)

plt.xlabel("Exploration score", fontsize=14)
plt.ylabel("Creativity score", fontsize=14)
plt.title("Variance and Covariance Visualization", fontsize=16)
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()

print("CSV covariance matrix (Exploration vs Creativity):")
print(np.cov(csv_points,rowvar=False))
print("\nSimulated covariance matrix:")
print(sim_cov)
No description has been provided for this image
CSV covariance matrix (Exploration vs Creativity):
[[0.4        0.15555556]
 [0.15555556 0.9       ]]

Simulated covariance matrix:
[[0.82690113 0.24279527]
 [0.24279527 0.40468789]]
In [6]:
import numpy as np
import matplotlib.pyplot as plt

nbins = 256
xmin, xmax = -4, 4
x = np.linspace(xmin, xmax, nbins)
print(f"{nbins} bins = {np.log2(nbins):.0f} bits")

def entropy(dist):
    positives = dist[dist > 0]  # avoid log(0)
    return -np.sum(positives * np.log2(positives))

uniform = np.ones(nbins) / nbins

mean, std = 0, 1
normal = np.exp(-(x-mean)**2 / (2*std**2))
normal /= np.sum(normal)

onehot = np.zeros(nbins)
onehot[nbins//2] = 1

fig, axs = plt.subplots(3, 1, figsize=(10, 8), sharex=True)
width = 1.0 * (xmax - xmin) / nbins

axs[0].bar(x, uniform, width=width, color='skyblue')
axs[0].set_title(f"Uniform distribution entropy: {entropy(uniform):.2f} bits")
axs[0].grid(True, linestyle='--', alpha=0.5)

axs[1].bar(x, normal, width=width, color='orange')
axs[1].set_title(f"Gaussian distribution entropy: {entropy(normal):.2f} bits")
axs[1].grid(True, linestyle='--', alpha=0.5)

axs[2].bar(x, onehot, width=width, color='green')
axs[2].set_title(f"One-hot distribution entropy: {entropy(onehot):.2f} bits")
axs[2].grid(True, linestyle='--', alpha=0.5)

plt.xlabel("x")
plt.ylabel("Probability")
plt.tight_layout()
plt.show()
256 bins = 8 bits
No description has been provided for this image
In [10]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(10)
np.set_printoptions(precision=1, suppress=True)

npts = int(1e5)
nbins = 256
print(f"{nbins} bins\n")

def entropy(dist):
    positives = dist[dist > 0]
    return -np.sum(positives * np.log2(positives))

def entropy2(dist):
    positives = dist[dist > 0]
    return -np.sum(positives * np.log2(positives))

def information(x, y):
    xhist, _ = np.histogram(x, nbins)
    yhist, _ = np.histogram(y, nbins)
    xyhist, _, _ = np.histogram2d(x, y, [nbins, nbins])
    
    xdist = xhist / np.sum(xhist)
    ydist = yhist / np.sum(yhist)
    xydist = xyhist / np.sum(xyhist)
    
    Hx = entropy(xdist)
    Hy = entropy(ydist)
    Hxy = entropy2(xydist)
    
    return Hx + Hy - Hxy

# Uniform distribution (large)
xuniform = np.random.uniform(-1, 1, npts)
yuniform = np.random.uniform(-1, 1, npts)
covar = np.cov(np.c_[xuniform, yuniform], rowvar=False)
print(f"{npts:.0e} points")
print(f"uniform covariance:\n{covar}")
I = information(xuniform, yuniform)
plt.figure(figsize=(6,6))
plt.plot(xuniform, yuniform, 'o', markersize=1, alpha=0.3)
plt.title(f"Uniform mutual information: {I:.1f} bits")
plt.grid(True)
plt.show()

# Uniform distribution (small)
xuniform = np.random.uniform(-1, 1, npts//100)
yuniform = np.random.uniform(-1, 1, npts//100)
covar = np.cov(np.c_[xuniform, yuniform], rowvar=False)
print(f"{npts//100:.0e} points")
print(f"uniform covariance:\n{covar}")
I = information(xuniform, yuniform)
plt.figure(figsize=(6,6))
plt.plot(xuniform, yuniform, 'o', markersize=3, alpha=0.7)
plt.title(f"Uniform mutual information: {I:.1f} bits")
plt.grid(True)
plt.show()

# Circular distribution
angles = np.random.uniform(0, 2*np.pi, npts)
r = 1
xcircle = r * np.cos(angles)
ycircle = r * np.sin(angles)
covar = np.cov(np.c_[xcircle, ycircle], rowvar=False)
print(f"{npts:.0e} points")
print(f"circle covariance:\n{covar}")
I = information(xcircle, ycircle)
plt.figure(figsize=(6,6))
plt.plot(xcircle, ycircle, 'o', markersize=1, alpha=0.3)
plt.title(f"Circle mutual information: {I:.1f} bits")
plt.axis('equal')
plt.grid(True)
plt.show()

# Linear distribution
xlinear = np.random.uniform(-1, 1, npts)
ylinear = xlinear
covar = np.cov(np.c_[xlinear, ylinear], rowvar=False)
print(f"{npts:.0e} points")
print(f"linear covariance:\n{covar}")
I = information(xlinear, ylinear)
plt.figure(figsize=(6,6))
plt.plot(xlinear, ylinear, 'o', markersize=1, alpha=0.3)
plt.title(f"Linear mutual information: {I:.1f} bits")
plt.grid(True)
plt.show()
256 bins

1e+05 points
uniform covariance:
[[0.3 0. ]
 [0.  0.3]]
No description has been provided for this image
1e+03 points
uniform covariance:
[[ 0.3 -0. ]
 [-0.   0.3]]
No description has been provided for this image
1e+05 points
circle covariance:
[[ 0.5 -0. ]
 [-0.   0.5]]
No description has been provided for this image
1e+05 points
linear covariance:
[[0.3 0.3]
 [0.3 0.3]]
No description has been provided for this image
In [ ]: