< Home
Week 4: Final presentation¶
Description of data set¶
My data set was collected using an animal-borne multisensor tag, known as the Mixed-DTAG, in collaboration the University of St Andrews. The tag was attached to a male killer whale in Vestmannaeyjar on 30 June 2023. Deployment ID: oo23_181a. Data from the tag were pre-processed and downsampled in Matlab to create a multivariate time series at 1 Hz resolution.
Reference for data set:
- Selbmann et al. (in review). Aversive behavioural responses of killer whales to sounds of long-finned pilot whales. Sci. Rep.
Variables¶
- Time in UTC
- Latitude in decimal degrees
- Longitude in decimal degrees
- Depth in m
- Pitch in radians
- Roll in radians
- Heading in radians (relative to true North)
- Acceleration in g, x
- Acceleration in g, y
- Acceleration in g, z
Load data set¶
import pandas as pd
import math
df = pd.read_csv("datasets/data_oo23_181a.csv", sep=",") # import as data frame
# Keep every 10th row for speed (row splicing) and reset index
df = df.iloc[::10].reset_index(drop=True)
#print(df)
# Convert radians to degrees
df[['pitch','roll','head']] = df[['pitch','roll','head']]/math.pi*180
import numpy as np
import matplotlib.pyplot as plt
# Convert to numpy array but skip the time stamps in column 0
#data = np.array(df) # use only with numerical data
data = df.iloc[:, 1:].to_numpy(dtype=float)
t = np.arange(data.shape[0])/3600*10 # time in hours since start
# Plot dive profile
plt.figure(figsize=(6, 4))
plt.subplot(3,1,1)
plt.plot(t, -data[:,2], '-', linewidth = .5)
plt.ylabel('Depth (m)')
# Plot pitch and roll angles
plt.subplot(3,1,2)
plt.plot(t, data[:,3], 'g-', linewidth = .5)
plt.ylabel('Pitch (deg)')
plt.subplot(3,1,3)
plt.plot(t, data[:,4], 'r-', linewidth = .5)
plt.xlabel('Time (h)')
plt.ylabel('Roll (deg)')
plt.tight_layout()
#plt.show()
Track plot¶
# Note: uses a simple conversion from lat/lon to x,y that is OK for short tracks
# reference latitude (midpoint)
lat0 = np.mean(data[:,0])
# approximate meters per degree
meters_per_deg_lat = 111_320
meters_per_deg_lon = 111_320 * np.cos(np.radians(lat0))
# convert to Cartesian coordinates
x = (data[:,1] - data[0,1]) * meters_per_deg_lon
y = (data[:,0] - data[0,0]) * meters_per_deg_lat
# Plot the track
pmin = 25 # min depth to plot
plt.figure(figsize=(10, 6))
plt.scatter(x[data[:,2]>pmin]/1000, y[data[:,2]>pmin]/1000, c=data[data[:,2]>pmin,2], cmap="jet", s=30)
plt.plot(x/1000, y/1000, 'k-', linewidth=1)
plt.axis('equal') # keeps scale consistent
plt.title("Track of killer whale oo23_181a")
plt.xlabel("x (km)")
plt.ylabel("y (km)")
plt.grid(True, alpha=0.3)
plt.colorbar(label="depth (m)")
plt.show()
x = df["pitch"].values # pulling data from df not array, .values to ensure ndim=1
y = df["ax"].values
xmin = min(x)
xmax = max(x)
npts = 100
coeff1 = np.polyfit(x,y,1) # fit 1st-order polynomial
coeff2 = np.polyfit(x,y,2) # fit 2nd-order polynomial
coeff3 = np.polyfit(x,y,3) # fit 3rd-order polynomial
xfit = np.arange(xmin,xmax,(xmax-xmin)/npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate fit
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate fit
pfit3 = np.poly1d(coeff3)
yfit3 = pfit3(xfit) # evaluate fit
print(f"fit coefficients: {coeff3}")
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='1st order')
plt.plot(xfit,yfit2,'r-',label='2nd order')
plt.plot(xfit,yfit3,'y-',label='3rd order')
plt.xlabel("Pitch (degrees)")
plt.ylabel("Ax (g)")
plt.legend()
plt.show()
fit coefficients: [-7.69397245e-07 9.77174679e-07 1.72519368e-02 -9.37386999e-05]
3rd order closely approximated the sigmoidal relationship between x-axis acceleration and pitch. Probably should fit an arctan function here.
Pair plot¶
Let's explore this data further using a pair plot. Plotted on top are the two correlation coefficients, Pearson's r and Spearman's rho, and the mutual information based on the histogram method.
import seaborn as sns
from scipy.stats import spearmanr
# Generate pairplot
g = sns.pairplot(
df[['depth','pitch','roll','head','ax','ay','az']],
corner=True, # remove upper triangle
aspect=1.3,
plot_kws=dict(s=12, alpha=0.6)
)
# Function to annotate correlations
def corr_coefficient(x, y, **kwargs):
r = np.corrcoef(x, y)[0,1]
rho, p = spearmanr(x, y)
ax = plt.gca()
ax.annotate(
f"r: {r:.2f} ρ: {rho:.2f}",
xy=(0.5, 0.9),
xycoords=ax.transAxes,
ha="center", va="center",
fontsize=14, fontweight="bold"
)
# Functions to annotate mutual information
nbins = 256 # no. histogram bins
def entropy(dist):
index = np.where(dist > 0) # 0 log(0) = 0
positives = dist[index]
return -np.sum(positives*np.log2(positives))
def entropy2(dist):
indexx,indexy = np.where(dist > 0) # 0 log(0) = 0
positives = dist[indexx,indexy]
return -np.sum(positives*np.log2(positives))
def information(x,y, **kwargs):
xhist,xedges = np.histogram(x,nbins)
xdist = xhist/np.sum(xhist)
yhist,yedges = np.histogram(y,nbins)
ydist = yhist/np.sum(yhist)
xyhist,xedges,yedges = np.histogram2d(x,y,[nbins,nbins])
xydist = xyhist/np.sum(xyhist)
Hx = entropy(xdist)
Hy = entropy(ydist)
Hxy = entropy2(xydist)
I = Hx+Hy-Hxy
ax = plt.gca()
ax.annotate(
f"{I:.1f} bits",
xy=(0.5, 0.1),
xycoords=ax.transAxes,
ha="center", va="center",
fontsize=14, fontweight="bold"
)
# Apply functions to lower triangle
g.map_lower(corr_coefficient)
g.map_lower(information)
plt.show()
The pair plot shows various types of distributions (long tailed, centred, uniform) and relationships including nonlinear ones and one perfectly monotonic. Below the same plot but with orange showing data for depths deeper than 25 m.
# Create categorical variable with deep vs shallow
df = df.assign(deep=df[['depth']]>25)
df["deep"] = df["deep"].map({True: ">25m", False: "<=25m"}).astype("category")
#print(df.columns) # column names
# Pair Plot
# Generate pairplot
sns.pairplot(
df[['depth','pitch','roll','head','ax','ay','az','deep']], hue='deep',
corner=True, # remove upper triangle
aspect=1.3,
plot_kws=dict(s=12, alpha=0.6)
)
plt.show()
Looks like at depth the specific acceleration is higher, probably due to the whale feeding on herring.
import matplotlib.pyplot as plt
from scipy.spatial import Voronoi,voronoi_plot_2d
import numpy as np
import time
## y = -depth and x = pitch, to save time
eps = 1e-12
y = -data[:,2]
x = data[:,3]
# Gaussuam mixture model parameters
npts = 8892
nclusters = 2
nsteps = 25
nplot = 100
np.random.seed(0)
# choose starting points and initialize
indices = np.random.uniform(low=0,high=len(x),size=nclusters).astype(int)
mux = x[indices]
muy = y[indices]
varx = (np.max(x)-np.min(x))**2
vary = (np.max(y)-np.min(y))**2
pc = np.ones(nclusters)/nclusters
# do E-M iterations
#
for i in range(nsteps):
#
# construct matrices
#
xm = np.outer(x,np.ones(nclusters))
ym = np.outer(y,np.ones(nclusters))
muxm = np.outer(np.ones(npts),mux)
muym = np.outer(np.ones(npts),muy)
varxm = np.outer(np.ones(npts),varx)
varym = np.outer(np.ones(npts),varx)
pcm = np.outer(np.ones(npts),pc)
#
# use model to update probabilities
#
pvgc = (1/np.sqrt(2*np.pi*varxm))*\
np.exp(-(xm-muxm)**2/(2*varxm))*\
(1/np.sqrt(2*np.pi*varym))*\
np.exp(-(ym-muym)**2/(2*varym))
pvc = pvgc*np.outer(np.ones(npts),pc)
pcgv = pvc/np.outer(np.sum(pvc,1),np.ones(nclusters))
#
# use probabilities to update model
#
pc = np.sum(pcgv,0)/npts
mux = np.sum(xm*pcgv,0)/(npts*pc)
muy = np.sum(ym*pcgv,0)/(npts*pc)
varx = 0.1+np.sum((xm-muxm)**2*pcgv,0)/(npts*pc)
vary = 0.1+np.sum((ym-muym)**2*pcgv,0)/(npts*pc)
# Plot data with GMM means and sds
fig,ax = plt.subplots()
plt.plot(x,y,'.')
plt.errorbar(mux,muy,xerr=np.sqrt(varx),yerr=np.sqrt(vary),fmt='r.',markersize=20)
plt.autoscale()
plt.xlabel("Pitch (deg)")
plt.ylabel("Depth (m)")
plt.show()
# plot predicted probability surface
xplot = np.linspace(np.min(x),np.max(x),nplot)
yplot = np.linspace(np.min(y),np.max(y),nplot)
(X,Y) = np.meshgrid(xplot,yplot)
p = np.zeros((nplot,nplot))
for c in range(nclusters):
p += np.exp(-(X-mux[c])**2/(2*varx[c]))/np.sqrt(2*np.pi*varx[c])\
*np.exp(-(Y-muy[c])**2/(2*vary[c]))/np.sqrt(2*np.pi*vary[c])\
*pc[c]
fig, ax = plt.subplots(subplot_kw={"projection":"3d"})
#eps = 1e-12
#ax.plot_surface(X, Y, np.log(p + eps))
ax.plot_surface(X, Y, p)
ax.set_ylabel("depth (m)")
ax.set_xlabel("pitch (deg)")
#plt.title("log(probability)")
plt.show()
These are timeseries data so probability density represents "time spent". The whale spent most of the time near the surface where its pitch orientation was generally close to 0. At deeper depths the distribution of the pitch angles is less centred and more uniform, so includes steeper ascent and descent angles.