path_hdr25 = "./datasets/hdr25.csv"
#!pip install pycountry  #install library ”pycountry”
import pandas as pd #import Panda Library
import numpy as np  #import Numpy
import pycountry
import requests
import json
import matplotlib.pyplot as plt
#HDI
df_hdr25 = pd.read_csv(path_hdr25, encoding='utf-8', encoding_errors='ignore') #df = data flame = read dataset path "path_hdr25" defined above with panda 
df_hdr25.fillna(0, inplace=True) #replace N/A to 0 
#Lab list
url = 'https://api.fablabs.io/0/labs.json'
r = requests.get(url)
data = r.json()
df_lablist = pd.DataFrame(data)
#CountryCodeMarge
def alpha2_to_alpha3(code2):
    country = pycountry.countries.get(alpha_2=code2.upper())
    return country.alpha_3 if country else None
df_lablist['ccd3'] = df_lablist['country_code'].apply(alpha2_to_alpha3)
#CountLabNo
df_labcount = (df_lablist.groupby('ccd3').agg(lab_count=('id', 'count')).reset_index())
#CountLabKind
df_kindcount = (df_lablist.groupby(['ccd3', 'kind_name']).agg(kind_count=('id', 'count')).reset_index())
#Marge HDI with Lab(Number)
df_labcount = df_lablist.groupby('country_code').agg(lab_count=('id', 'count')).reset_index()
df_labcount['ccd3'] = df_labcount['country_code'].apply(alpha2_to_alpha3)
df_labno_hdi = pd.merge(df_labcount,df_hdr25,left_on='ccd3',right_on='iso3',how='left')
#Marge HDI with Lab(bykind)
df_kind_hdi = pd.merge(df_kindcount,df_hdr25,left_on='ccd3',right_on='iso3',how='left')
#Encoding
from sklearn.preprocessing import LabelEncoder
#Encoding for HDI x Lab Kind
le = LabelEncoder()
encoded_country = le.fit_transform(df_labno_hdi['country'].values)
decoded_country = le.inverse_transform(encoded_country)
#Encoding for HDI x Lab Number list
df_labno_hdi['country_encoded'] = encoded_country
encoded = le.fit_transform(df_kind_hdi['kind_name'].values)
decoded = le.inverse_transform(encoded)
df_kind_hdi['kind_name_encoded'] = encoded
encoded_country = le.fit_transform(df_kind_hdi['country'].values)
decoded_country = le.inverse_transform(encoded_country)
df_kind_hdi['country_encoded'] = encoded_country

Collecting pycountry
  Using cached pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Using cached pycountry-24.6.1-py3-none-any.whl (6.3 MB)
Installing collected packages: pycountry
Successfully installed pycountry-24.6.1

cols = df_labno_hdi.columns
cols2023 = []
for item in cols:
    if '2023' in item:
        cols2023.append(item)
#cols2023
#[c for c in cols2023 if 'rank' in c.lower()]
cols2023 = [c for c in cols2023 if 'rank' not in c.lower()]
cols2023 = [c for c in cols2023 if 'hdi' not in c.lower()]
cols2023

['le_2023',
 'eys_2023',
 'mys_2023',
 'gnipc_2023',
 'gdi_group_2023',
 'gdi_2023',
 'le_f_2023',
 'eys_f_2023',
 'mys_f_2023',
 'gni_pc_f_2023',
 'le_m_2023',
 'eys_m_2023',
 'mys_m_2023',
 'gni_pc_m_2023',
 'coef_ineq_2023',
 'loss_2023',
 'ineq_le_2023',
 'ineq_edu_2023',
 'ineq_inc_2023',
 'gii_2023',
 'mmr_2023',
 'abr_2023',
 'se_f_2023',
 'se_m_2023',
 'pr_f_2023',
 'pr_m_2023',
 'lfpr_f_2023',
 'lfpr_m_2023',
 'co2_prod_2023',
 'mf_2023',
 'pop_total_2023']

from scipy.stats import spearmanr
import numpy as np
import seaborn as sns

cols = ['lab_count'] + cols2023
df_fil = df_labno_hdi[cols].dropna()

cormatrix = pd.DataFrame(index=cols,columns=cols,dtype=float)
p_val_matrix = pd.DataFrame(index=cols,columns=cols,dtype=float)

for colx in cols:
    for coly in cols:
        if colx == coly:
            cormatrix.loc[colx,coly] = 1.0
            p_val_matrix.loc[colx,coly] = 1.0
        elif(pd.isna(cormatrix.loc[colx,coly])):
            corr_coef,p_value = spearmanr(df_fil[colx],df_fil[coly])

            cormatrix.loc[colx,coly] = corr_coef
            cormatrix.loc[coly,colx] = corr_coef
            p_val_matrix.loc[colx,coly] = p_value
            p_val_matrix.loc[coly,colx] = p_value

plt.figure(figsize=(10,8))
sns.heatmap(p_val_matrix,annot=True,annot_kws={"size": 6},cmap="binary_r",vmin=0.0,vmax=0.05)
plt.title("p-value of HDI indicators significantly correlated with the number of labs")
plt.tight_layout()

plt.figure(figsize=(10,8))
sns.heatmap(cormatrix,annot=True,annot_kws={"size": 6}, cmap="coolwarm",vmin=-1.0,vmax=1.0)
plt.title("co-efficience of HDI indicators significantly correlated with the number of labs")
plt.tight_layout()

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

cols_pca = cols2023

df_pca = df_labno_hdi[cols_pca].dropna()

#標準化
scaler = StandardScaler()
Xscale = scaler.fit_transform(df_pca)

pca = PCA()
Xpca = pca.fit_transform(Xscale)

plt.plot(pca.explained_variance_ratio_, 'o-')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('PCA explained variance (HDI + lab_count)')
plt.show()

loadings = pd.DataFrame(
    pca.components_.T,
    index=cols_pca,
    columns=[f'PC{i+1}' for i in range(len(cols_pca))]
)

loadings

summary = pd.DataFrame({
    'PC1_loading': loadings['PC1'],
    'PC1_abs': loadings['PC1'].abs(),
    'PC2_loading': loadings['PC2'],
    'PC2_abs': loadings['PC2'].abs()
})

summary_sorted = summary.sort_values(
    by=['PC1_abs', 'PC2_abs'],
    ascending=False
)

summary_sorted

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd

# PCAに使う元データ
X = df_labno_hdi[cols2023].dropna()

# 標準化
X_std = StandardScaler().fit_transform(X)

# PCA（PC1, PC2）
pca = PCA(n_components=2)
PC = pca.fit_transform(X_std)

# PC1, PC2 を明示的に DataFrame 化
df_pca = pd.DataFrame(
    PC,
    columns=['PC1', 'PC2'],
    index=X.index
)

df_pca.head()

from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42)
df_pca['cluster'] = kmeans.fit_predict(df_pca[['PC1', 'PC2']])

import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))

for c in sorted(df_pca['cluster'].unique()):
    subset = df_pca[df_pca['cluster'] == c]
    plt.scatter(
        subset['PC1'],
        subset['PC2'],
        label=f'Cluster {c}',
        alpha=0.7
    )

plt.axhline(0, color='gray', linewidth=0.5)
plt.axvline(0, color='gray', linewidth=0.5)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('K-means clustering on PC1 × PC2')
plt.legend()
plt.show()

cluster_summary = (
    df_pca
    .groupby('cluster')[['PC1', 'PC2']]
    .mean()
)

cluster_summary

hdi_cols = [
    'le_2023','gni_pc_f_2023','gni_pc_m_2023','gii_2023','mys_2023', 'eys_2023', 'se_f_2023', 'se_m_2023'
]

cluster_hdi = (
    df_labno_hdi
    .join(df_pca['cluster'])
    .groupby('cluster')[hdi_cols]
    .mean()
)

cluster_hdi

# PCデータと LabNo を結合
df_plot = df_pca.join(df_labno_hdi['lab_count'])

plt.figure(figsize=(8, 6))

scatter = plt.scatter(
    df_plot['PC1'],
    df_plot['PC2'],
    c=df_plot['cluster'],
    s=df_plot['lab_count'] * 3,  # サイズ調整
    alpha=0.7
)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('Clusters on PC1×PC2 (size = Lab count)')
plt.colorbar(scatter, label='Cluster')
plt.show()

# cluster 情報を country_encoded をキーに付与
df_kind_plot = df_kind_hdi.join(
    df_pca['cluster'],
    on='country_encoded'
)

#df_kind_plot.head()

df_kind_cluster = (
    df_kind_plot
    .groupby(['cluster', 'kind_name'])
    .agg(kind_count=('kind_count', 'sum'))
    .reset_index()
)

df_kind_cluster.head()

import seaborn as sns
import matplotlib.pyplot as plt

# cluster × kind
heat = df_kind_cluster.pivot_table(
    index='cluster',
    columns='kind_name',
    values='kind_count',
    aggfunc='sum',
    fill_value=0
)

plt.figure(figsize=(10, 5))
sns.heatmap(
    heat,
    cmap='Blues',
    annot=True,
    fmt='d'
)

plt.title('Lab kind distribution by cluster')
plt.xlabel('Lab kind')
plt.ylabel('Cluster')
plt.tight_layout()
plt.show()

import pandas as pd
import numpy as np

# cluster × kind のピボット
radar_df = df_kind_cluster.pivot_table(
    index='cluster',
    columns='kind_name',
    values='kind_count',
    aggfunc='sum',
    fill_value=0
)

radar_ratio = radar_df.div(radar_df.sum(axis=1), axis=0)
#radar_ratio

import matplotlib.pyplot as plt

labels = radar_ratio.columns.tolist()
num_vars = len(labels)

angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()
angles += angles[:1]  # 閉じる

plt.figure(figsize=(7, 7))
ax = plt.subplot(111, polar=True)

for cluster in radar_ratio.index:
    values = radar_ratio.loc[cluster].tolist()
    values += values[:1]  # 閉じる

    ax.plot(angles, values, label=f'Cluster {cluster}')
    ax.fill(angles, values, alpha=0.15)

ax.set_thetagrids(np.degrees(angles[:-1]), labels)
ax.set_title('Cluster characteristics by Lab kind', y=1.08)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

plt.tight_layout()
plt.show()

	PC1	PC2	PC3	PC4	PC5	PC6	PC7	PC8	PC9	PC10	...	PC22	PC23	PC24	PC25	PC26	PC27	PC28	PC29	PC30	PC31
le_2023	0.211800	-0.054955	0.006493	-0.178181	0.088710	0.125428	-0.209398	-0.040073	-0.050286	-0.068270	...	0.072414	-0.054780	-0.002740	0.027417	0.011697	-0.029523	-0.003097	-0.021003	-0.267493	0.767548
eys_2023	0.199153	0.141232	-0.052108	-0.127472	0.175753	-0.058357	-0.092404	0.358083	0.130728	0.086997	...	0.061719	0.015184	0.001479	-0.010426	0.027925	0.710214	0.375476	-0.022960	0.097672	0.058061
mys_2023	0.214473	0.006789	-0.102758	0.142446	-0.003577	-0.155231	0.080404	0.056790	-0.191360	0.007087	...	0.031260	0.065197	-0.017072	0.019097	0.021783	0.124301	-0.411460	0.298799	0.584668	0.215839
gnipc_2023	0.191551	0.085989	0.311239	-0.156832	-0.089751	-0.144176	0.042655	-0.105132	0.006519	-0.151343	...	-0.070319	-0.088483	-0.130204	0.116873	-0.006921	-0.346723	0.577168	0.174528	0.356150	0.117540
gdi_group_2023	-0.185174	-0.121411	0.104656	-0.259199	0.012978	-0.116093	0.153079	0.263856	-0.167125	0.020977	...	0.002893	0.032880	0.030471	0.005649	0.040057	0.004752	-0.007849	0.007023	-0.001145	0.000579
gdi_2023	0.172972	0.101036	-0.092019	0.351664	0.045146	0.049023	-0.232165	-0.299690	0.150610	0.077034	...	-0.090648	-0.228916	-0.023554	-0.253552	0.036217	-0.015029	-0.010701	-0.004781	0.000700	0.003182
le_f_2023	0.214184	-0.057746	-0.046537	-0.134580	0.082264	0.131882	-0.207145	-0.081447	-0.052691	-0.082047	...	0.381496	-0.220258	0.075606	0.459603	0.123931	0.010116	0.002004	0.029977	0.111404	-0.399892
eys_f_2023	0.204225	0.117513	-0.049402	-0.094016	0.161991	-0.034451	-0.121027	0.284088	0.123326	0.117720	...	0.335340	0.028528	0.150843	0.187961	-0.023761	-0.373693	-0.201076	0.010092	-0.066587	-0.027507
mys_f_2023	0.216016	-0.016660	-0.085728	0.162607	-0.021453	-0.114133	0.055653	0.014378	-0.173312	0.029969	...	-0.119614	0.431618	0.067314	0.284097	-0.429098	-0.041008	0.210853	-0.130655	-0.277633	-0.112213
gni_pc_f_2023	0.188344	0.166816	0.244324	-0.144821	-0.087688	-0.192127	0.043147	-0.139370	0.099030	-0.251464	...	0.328864	0.289853	0.448030	-0.359057	0.054396	0.075061	-0.158537	-0.067321	-0.104746	-0.037604
le_m_2023	0.204494	-0.054475	0.065139	-0.221224	0.095273	0.119134	-0.205319	0.002865	-0.054447	-0.053656	...	-0.235828	0.091328	-0.073948	-0.421900	-0.066270	0.006675	-0.006167	0.004656	0.150428	-0.377984
eys_m_2023	0.187834	0.161909	-0.034279	-0.162293	0.185281	-0.091832	-0.055358	0.439334	0.130307	0.055079	...	-0.327149	-0.025556	-0.116786	-0.142105	0.002612	-0.346965	-0.170464	0.010135	-0.035148	-0.029404
mys_m_2023	0.209152	0.024037	-0.105907	0.122087	0.011932	-0.202141	0.102435	0.100470	-0.215926	-0.013003	...	0.048938	-0.345747	-0.018386	-0.211191	0.372756	-0.082446	0.208608	-0.175804	-0.298061	-0.099397
gni_pc_m_2023	0.192575	0.055091	0.308025	-0.173328	-0.085467	-0.134957	0.045311	-0.084388	-0.023452	-0.134691	...	-0.249247	-0.247305	-0.339802	0.279940	-0.047213	0.278223	-0.413045	-0.107376	-0.249491	-0.080412
coef_ineq_2023	-0.205367	0.193879	0.040937	-0.096187	0.155514	-0.165244	-0.140803	-0.053936	-0.161983	0.044801	...	0.307971	0.007371	-0.334281	-0.117718	-0.219805	-0.060613	-0.028691	-0.631231	0.262905	0.064554
loss_2023	-0.203884	0.196734	0.034615	-0.095790	0.163762	-0.168585	-0.152879	-0.063007	-0.176856	0.056773	...	0.269453	0.041571	-0.310835	-0.156941	-0.139635	0.032893	0.039637	0.640933	-0.313486	-0.087280
ineq_le_2023	-0.213720	0.099523	0.110893	0.055542	-0.078768	-0.136643	0.158494	0.063754	0.041715	0.001256	...	0.101555	-0.295551	0.161374	0.034672	0.202189	0.002004	0.008778	-0.001407	-0.007282	0.007519
ineq_edu_2023	-0.202323	0.023040	0.145253	-0.214685	0.077493	-0.008584	-0.104349	0.030095	-0.001373	0.065298	...	-0.304065	0.088321	0.323305	0.176316	0.177154	0.008218	-0.004761	-0.008746	0.022703	0.011082
ineq_inc_2023	-0.151130	0.284860	-0.083913	-0.011183	0.286608	-0.213959	-0.334815	-0.223849	-0.357488	0.095566	...	-0.269799	0.025173	0.300998	0.133351	0.173642	0.019662	-0.003998	-0.025387	0.033826	0.013032
gii_2023	-0.214163	-0.058405	0.042103	0.070494	0.030231	-0.032609	-0.091483	0.104190	-0.063198	0.156086	...	0.078698	-0.064246	-0.085838	0.069039	-0.018941	-0.004886	-0.014632	0.001773	0.000314	-0.003756
mmr_2023	-0.183510	0.135600	0.186668	0.078934	-0.079896	-0.257394	0.203237	0.219475	0.118925	-0.077244	...	-0.002338	-0.030935	0.018976	-0.009940	0.000143	-0.002311	-0.008775	0.007249	0.003992	0.000823
abr_2023	-0.196339	0.169508	0.070765	0.133710	-0.087780	-0.022122	-0.049254	0.027478	-0.089443	0.036309	...	-0.035396	0.058323	0.013913	0.001315	-0.013755	-0.005584	-0.000556	0.004609	-0.000464	0.000634
se_f_2023	0.207002	-0.011452	-0.091240	0.171110	0.008456	-0.173565	0.152437	0.011241	-0.208683	0.116087	...	0.030055	0.405034	-0.288274	0.071368	0.528742	-0.029021	-0.002613	-0.045493	-0.025512	-0.009393
se_m_2023	0.196818	0.002286	-0.106410	0.138410	0.061998	-0.266237	0.195780	0.068459	-0.259803	0.107390	...	-0.040579	-0.372078	0.287526	-0.052573	-0.440338	0.019849	0.000717	0.040736	0.023116	0.005792
pr_f_2023	0.051320	0.477575	-0.062820	-0.145855	-0.121967	0.342388	0.188762	-0.028203	0.119573	0.554718	...	0.000298	-0.038520	0.013267	0.002095	0.013645	-0.002242	0.000325	-0.004334	-0.002242	-0.003434
pr_m_2023	-0.050419	-0.434407	0.154754	0.205723	0.169486	-0.310497	-0.326334	0.095291	0.380100	0.287414	...	0.016607	0.002973	0.014322	-0.000762	0.019518	0.001530	0.005550	-0.003564	0.000059	-0.002595
lfpr_f_2023	0.052178	0.427480	0.215121	0.440462	0.072315	-0.023562	-0.051331	0.040155	0.303265	-0.145436	...	-0.017974	0.092558	-0.055638	0.179293	-0.035640	0.018610	-0.006549	0.012702	-0.001853	-0.005622
lfpr_m_2023	-0.016457	-0.004394	0.411251	0.329300	0.238040	0.488852	-0.075762	0.329465	-0.361805	-0.203493	...	0.027442	-0.030683	0.054720	-0.064918	0.021354	-0.000239	-0.000577	-0.005976	-0.004564	0.001873
co2_prod_2023	0.126752	-0.205013	0.396698	0.061701	-0.075351	0.091620	0.118572	-0.044025	-0.181995	0.526929	...	0.112939	0.044967	0.051364	-0.072707	-0.018676	0.014877	-0.013233	-0.007028	-0.004834	-0.000051
mf_2023	0.153236	0.023010	0.424826	-0.066648	-0.028695	-0.128031	-0.086131	-0.230458	-0.023354	0.207786	...	-0.044999	-0.012890	0.002123	0.014181	0.012104	0.003351	-0.010162	-0.000140	-0.004417	0.001626
pop_total_2023	-0.018477	-0.077157	0.038714	-0.046263	0.763339	0.053504	0.514550	-0.278595	0.169128	-0.022413	...	-0.006534	0.011732	-0.016792	0.017295	0.005762	-0.003451	0.001148	0.000565	-0.000762	0.000693

	PC1_loading	PC1_abs	PC2_loading	PC2_abs
mys_f_2023	0.216016	0.216016	-0.016660	0.016660
mys_2023	0.214473	0.214473	0.006789	0.006789
le_f_2023	0.214184	0.214184	-0.057746	0.057746
gii_2023	-0.214163	0.214163	-0.058405	0.058405
ineq_le_2023	-0.213720	0.213720	0.099523	0.099523
le_2023	0.211800	0.211800	-0.054955	0.054955
mys_m_2023	0.209152	0.209152	0.024037	0.024037
se_f_2023	0.207002	0.207002	-0.011452	0.011452
coef_ineq_2023	-0.205367	0.205367	0.193879	0.193879
le_m_2023	0.204494	0.204494	-0.054475	0.054475
eys_f_2023	0.204225	0.204225	0.117513	0.117513
loss_2023	-0.203884	0.203884	0.196734	0.196734
ineq_edu_2023	-0.202323	0.202323	0.023040	0.023040
eys_2023	0.199153	0.199153	0.141232	0.141232
se_m_2023	0.196818	0.196818	0.002286	0.002286
abr_2023	-0.196339	0.196339	0.169508	0.169508
gni_pc_m_2023	0.192575	0.192575	0.055091	0.055091
gnipc_2023	0.191551	0.191551	0.085989	0.085989
gni_pc_f_2023	0.188344	0.188344	0.166816	0.166816
eys_m_2023	0.187834	0.187834	0.161909	0.161909
gdi_group_2023	-0.185174	0.185174	-0.121411	0.121411
mmr_2023	-0.183510	0.183510	0.135600	0.135600
gdi_2023	0.172972	0.172972	0.101036	0.101036
mf_2023	0.153236	0.153236	0.023010	0.023010
ineq_inc_2023	-0.151130	0.151130	0.284860	0.284860
co2_prod_2023	0.126752	0.126752	-0.205013	0.205013
lfpr_f_2023	0.052178	0.052178	0.427480	0.427480
pr_f_2023	0.051320	0.051320	0.477575	0.477575
pr_m_2023	-0.050419	0.050419	-0.434407	0.434407
pop_total_2023	-0.018477	0.018477	-0.077157	0.077157
lfpr_m_2023	-0.016457	0.016457	-0.004394	0.004394

	PC1	PC2
cluster
0	-1.054291	-0.738622
1	3.779757	0.186801
2	-6.750372	0.720682

	le_2023	gni_pc_f_2023	gni_pc_m_2023	gii_2023	mys_2023	eys_2023	se_f_2023	se_m_2023
cluster
0.0	73.585140	9354.713982	18257.990229	0.354953	8.946904	13.194642	58.170347	61.481492
1.0	80.067339	38069.804750	59809.293607	0.109177	12.125226	16.528256	88.319294	89.972756
2.0	63.848536	2852.471504	4994.721266	0.560929	4.544651	10.023278	19.750143	33.082322

	cluster	kind_name	kind_count
0	0.0	fab_lab	517
1	0.0	mini_fab_lab	94
2	0.0	mobile	8
3	1.0	fab_lab	1149
4	1.0	mini_fab_lab	169

Presentation¶

Select Data for analysis¶

Spearman¶

PCA¶

What are PC1 and PC2?¶

K-mean Clustering¶

Additional to see the relationship between lab kind.¶