Jeoffrey George - Fab Futures - Data Science
Home About

< Week 3 - Probability distribution ...Week 4 - Transform >

Density estimation¶

Gaussian Mixture Models (GMM)¶

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_parquet("/home/jovyan/work/jeogeorge/datasets/soybean_yield_country.parquet") 
import numpy as np
import matplotlib.pyplot as plt
country_name = "United States of America" #select the country to show
sel = df[(df["Country"] == country_name) & df["Yield"]]
groups = sel.groupby("year")  
years = sorted(sel["year"].unique())
data_by_year = [groups.get_group(y)["Yield"].values for y in years]

sel_median = sel.groupby("year")["Yield"].median()
x = sel_median.index.to_numpy() 
y = sel_median.values 

yieldusa = sel["Yield"]
x = yieldusa.values

xmin = x.min()
xmax = x.max()
npts = xmax-xmin+1
ncenters = npts

# Gaussian Mixture Models (GMM)
from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=2)
gmm.fit(x.reshape(-1, 1))

means = gmm.means_
weights = gmm.weights_

means, weights

xs = np.linspace(x.min(), x.max(), 100).reshape(-1, 1)
logprob = gmm.score_samples(xs)
pdf = np.exp(logprob)

plt.hist(x, bins=60, density=True)
plt.plot(xs, pdf)
plt.show()
No description has been provided for this image
In [ ]: