Class 5: Probability¶
I revisited the Westfjords ocean temperature data (logged in the island Æðey) and investigated their probability distribution. Let's plot them first:
In [3]:
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
from datetime import datetime
import matplotlib.pyplot as plt
df = pd.read_csv('datasets/ocean-temperature-westfjords-2024/cmems_obs-ins_nws_phybgcwav_mynrt_na_irr_1763979809373_noheader.csv')
time = df["time"]
temperature = df["value"]
iso_date_string = "2023-05-29T10:30:00Z"
parsed_date = datetime.fromisoformat(iso_date_string[:-1]) # Removing the 'Z' at the end
parsed_time = [0] * len(time)
for i in range(len(time)):
parsed_time[i] = datetime.fromisoformat(time[i])
fig, axs = plt.subplots(12, 1, layout='constrained', figsize=(6, 30))
# January: lims = [2024-01-02T00:00:00.000Z,2024-01-31T23:50:00.000Z]
lims = [(np.datetime64('2024-01'), np.datetime64('2024-02')),
(np.datetime64('2024-02'), np.datetime64('2024-03')),
(np.datetime64('2024-03'), np.datetime64('2024-04')),
(np.datetime64('2024-04'), np.datetime64('2024-05')),
(np.datetime64('2024-05'), np.datetime64('2024-06')),
(np.datetime64('2024-06'), np.datetime64('2024-07')),
(np.datetime64('2024-07'), np.datetime64('2024-08')),
(np.datetime64('2024-08'), np.datetime64('2024-09')),
(np.datetime64('2024-09'), np.datetime64('2024-10')),
(np.datetime64('2024-10'), np.datetime64('2024-11')),
(np.datetime64('2024-11'), np.datetime64('2024-12')),
(np.datetime64('2024-12'), np.datetime64('2025-01'))]
for nn, ax in enumerate(axs):
locator = mdates.AutoDateLocator(minticks=3, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
ax.plot(parsed_time, temperature)
ax.set_xlim(lims[nn])
axs[0].set_title('Æðey sea temperature 2024')
plt.ylabel("Temperature [ËšC]")
plt.show()
I used Neil's code for density estimation on my ocean temperature data and calculated the mean and standard deviation instead of setting them manually:
In [28]:
npts = 1000
mean = np.mean(temperature)
stddev = np.std(temperature)
plt.hist(temperature,bins=npts//50,density=True)
plt.plot(temperature,0*temperature,'|',ms=npts/20)
plt.text(6,0.23,'Mean = ' + str(round(mean,3)))
plt.text(6,0.22, 'Standard deviation = ' + str(round(stddev,3)))
plt.title('Histogram of Æðey ocean temperature 2024')
plt.xlabel('Temperature [ËšC]')
plt.ylabel('Occurance')
plt.show()
In [ ]: