Density estimation¶
Gaussian Mixture Models (GMM)¶
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_parquet("/home/jovyan/work/jeogeorge/datasets/soybean_yield_country.parquet")
import numpy as np
import matplotlib.pyplot as plt
country_name = "United States of America" #select the country to show
sel = df[(df["Country"] == country_name) & df["Yield"]]
groups = sel.groupby("year")
years = sorted(sel["year"].unique())
data_by_year = [groups.get_group(y)["Yield"].values for y in years]
sel_median = sel.groupby("year")["Yield"].median()
x = sel_median.index.to_numpy()
y = sel_median.values
yieldusa = sel["Yield"]
x = yieldusa.values
xmin = x.min()
xmax = x.max()
npts = xmax-xmin+1
ncenters = npts
# Gaussian Mixture Models (GMM)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=2)
gmm.fit(x.reshape(-1, 1))
means = gmm.means_
weights = gmm.weights_
means, weights
xs = np.linspace(x.min(), x.max(), 100).reshape(-1, 1)
logprob = gmm.score_samples(xs)
pdf = np.exp(logprob)
plt.hist(x, bins=60, density=True)
plt.plot(xs, pdf)
plt.show()
In [ ]: