%%javascript
console.log('hello world')

1+1

2

'''
this is an example
of a few ways to calculate pi
'''
import math # bring in the math package
print(f"pi = {math.pi}") # pre-defined pi variable
print(f"2*arcsin(1) = {2*math.asin(1)}") # find pi from trig function
def calc_pi(N): # define function to calculate pi
    pi = 0
    for i in range(1,N): # loop over index of terms
        pi += 0.5/((i-0.75)*(i-0.25))
    return(pi)
calc_pi_result = [] # empty list to accumulate results
for n in range(1,7): # loop over pi calculation size
    N = 10**n
    calc_pi_result.append([N,calc_pi(N)])
print("[[N,calc_pi(N)],...]")
print(calc_pi_result)

pi = 3.141592653589793
2*arcsin(1) = 3.141592653589793
[[N,calc_pi(N)],...]
[[10, 3.0860798011238324], [100, 3.136542180744826], [1000, 3.1410921531206317], [10000, 3.1415426485893203], [100000, 3.1415876535398177], [1000000, 3.141592153589402]]

import ipywidgets as widgets
def button_update():
    global count
    output.clear_output()
    count += 1
    print(f"button pressed {count} times")
def button_handler(self):
    with output:
        button_update()
button = widgets.Button(description='click me')
output = widgets.Output()
count = 0
button.on_click(button_handler)
display(button,output)

Button(description='click me', style=ButtonStyle())

Output()

import numpy as np
N = 10000000
i = np.arange(1,(N+1))
print("array:\n")
print(f"   shape: {i.shape}")
print(f"   start: {i[0:10]}")
print(f"   type: {i.dtype}")
pi = np.sum(0.5/((i-0.75)*(i-.25)))
print(f"pi ~= {pi}")

array:

   shape: (10000000,)
   start: [ 1  2  3  4  5  6  7  8  9 10]
   type: int64
pi ~= 3.1415926035897934

import matplotlib.pyplot as plt
import numpy as np
calc_pi_array = np.array(calc_pi_result)
#
# linear plot
#
plt.plot(calc_pi_array[:,0],math.pi-calc_pi_array[:,1],'o-')
plt.xlabel('pi calculation points')
plt.ylabel('pi error')
plt.show()
#
# log plot
#
plt.plot(calc_pi_array[:,0],math.pi-calc_pi_array[:,1],'o-')
plt.xlabel('pi calculation points')
plt.ylabel('pi error')
plt.xscale('log')
plt.yscale('log')
plt.show()

# 
# enable interactive features
#
%matplotlib ipympl
#
# imports
#
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
#
# set up interactive plot
#
l = 15.5
k = 1
x = np.arange(-l,l,.1)
y = np.sin(k*x)/(k*x)
plt.ion()
fig,ax = plt.subplots()
fig.canvas.header_visible = False
line, = ax.plot(x,y)
plt.title('sin(kx)/(kx)')
plt.show()
#
# handle slider changes
#
def slider_handler(change):
    k = change['new']
    y = np.sin(k*x)/(k*x)
    line.set_ydata(y)
    fig.canvas.draw_idle()
#
# add slider
#
slider = widgets.FloatSlider(value=1,min=0.01,max=10.0,step=0.1,
    description='adjust k:',
    continuous_update=True,
    orientation='horizontal',
    readout_format='.1f',)
slider.observe(slider_handler,names='value')
display(slider)

FloatSlider(value=1.0, description='adjust k:', max=10.0, min=0.01, readout_format='.1f')

import time
NPTS = 10000000
print("\nPython version:")
pi = 0
start_time = time.time()
for i in range(1,(NPTS+1)):
   pi += 0.5/((i-0.75)*(i-0.25))
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))

Python version:
NPTS = 10000000, pi = 3.141593
time = 2.271767, estimated MFlops = 22.009303

import numpy as np
import time
NPTS = 10000000
print("\nNumPy version:")
start_time = time.time()
i = np.arange(1,(NPTS+1),dtype=float)
pi = np.sum(0.5/((i-0.75)*(i-0.25)))
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))

NumPy version:
NPTS = 10000000, pi = 3.141593
time = 0.136608, estimated MFlops = 366.011083

import jax
import jax.numpy as jnp
import time
NPTS = 10000000
#
a = 0.5
b = 0.75
c = 0.25
#
# alternate compilation values to prevent caching
#
a0 = 0.6
b0 = 0.7
c0 = 0.2
#
print("\nrun Jax version:")
#
def jax_calcpi(a,b,c):
   i = jnp.arange(1,(NPTS+1),dtype=float)
   pi = jnp.sum(a/((i-b)*(i-c)))
   return pi
start_time = time.time()
pi = jax_calcpi(a0,b0,c0).block_until_ready()
end_time = time.time()
print("time = %f"%(end_time-start_time))
#
print("\ntime Jax version:")
#
start_time = time.time()
pi = jax_calcpi(a,b,c).block_until_ready()
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
print("\ncompile and run Jax Jit version:")
#
jax_jit_calcpi = jax.jit(jax_calcpi)
start_time = time.time()
pi = jax_jit_calcpi(a0,b0,c0).block_until_ready()
end_time = time.time()
print("time = %f"%(end_time-start_time))
#
print("\ntime Jax Jit version:")
#
start_time = time.time()
pi = jax_jit_calcpi(a,b,c).block_until_ready()
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))

run Jax version:
time = 0.324914

time Jax version:
NPTS = 10000000, pi = 3.141593
time = 0.156081, estimated MFlops = 320.345588

compile and run Jax Jit version:
time = 1.290077

time Jax Jit version:
NPTS = 10000000, pi = 3.141593
time = 0.018329, estimated MFlops = 2727.896147

from numba import jit
import time
NPTS = 10000000
print("\nNumba version:")
import time
NPTS = 100000000
@jit(nopython=True)
def calc():
   pi = 0
   for i in range(1,(NPTS+1)):
      pi += 0.5/((i-0.75)*(i-0.25))
   return pi
pi = calc() # first call to compile the function
start_time = time.time()
pi = calc() # second call uses the cached compilation
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
print("\nNumba parallel version:")
from numba import njit,prange
NPTS = 10000000000
@njit(parallel=True,fastmath=True)
def calc():
   pi = 0
   for i in prange(1,(NPTS+1)):
      pi += 0.5/((i-0.75)*(i-0.25))
   return pi
pi = calc() # first call to compile the function
start_time = time.time()
pi = calc() # second call uses the cached compilation
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
print("NPTS = %d, pi = %f"%(NPTS,pi))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))

Numba version:
NPTS = 100000000, pi = 3.141593
time = 0.257533, estimated MFlops = 1941.501801

Numba parallel version:
NPTS = 10000000000, pi = 3.141593
time = 3.916974, estimated MFlops = 12764.954808

import os
import numpy as np

N = 100

vfloat = np.ones((N,N),dtype=np.float64)
vint = np.ones((N,N),dtype=np.int8)

np.save('datasets/vfloat.npy',vfloat)
np.save('datasets/vint.npy',vint)
np.savetxt('datasets/vfloat.csv',vfloat,delimiter=',')
np.savetxt('datasets/vint.csv',vint,delimiter=',')

print(f"write files: {os.listdir('datasets')}")
print("file sizes:")
print(f"   float 64 binary: {os.path.getsize('datasets/vfloat.npy'):.1e}")
print(f"   int 8 binary: {os.path.getsize('datasets/vint.npy'):.1e}")
print(f"   float 64 text: {os.path.getsize('datasets/vfloat.csv'):.1e}")
print(f"   int 8 binary text: {os.path.getsize('datasets/vint.csv'):.1e}")

print("read files:")
vfloat = np.load('datasets/vfloat.npy')
vint = np.load('datasets/vint.npy')

print(f"   float 64 binary shape: {vfloat.shape} type: {vfloat.dtype}")
print(f"   int 8 binary shape: {vint.shape} type; {vint.dtype}")

vfloat = np.loadtxt('datasets/vfloat.csv',delimiter=',')
vint = np.loadtxt('datasets/vint.csv',delimiter=',')

print(f"   float 64 text shape: {vfloat.shape} type: {vfloat.dtype}")
print(f"   int 8 text shape: {vint.shape} type: {vint.dtype}")

write files: ['MNIST', 'vint.npy', 'vint.csv', 'vfloat.csv', 'vfloat.npy']
file sizes:
   float 64 binary: 8.0e+04
   int 8 binary: 1.0e+04
   float 64 text: 2.5e+05
   int 8 binary text: 2.5e+05
read files:
   float 64 binary shape: (100, 100) type: float64
   int 8 binary shape: (100, 100) type; int8
   float 64 text shape: (100, 100) type: float64
   int 8 text shape: (100, 100) type: float64

Fab Futures - Data Science - Curriculum 2025

Data Science: Tools¶

Goal¶

Types¶

open source¶

commercially supported¶

closed source¶

Programming¶

language types¶

interpreted¶

compiled¶

processor types¶

CPU¶

GPU¶

Javascript¶

Rust¶

Math¶

Visualization¶

graphing¶

live updating¶

Images¶

Performance¶

Data¶

files¶

spreadsheets¶

Assignment¶

Review¶