path_hdr25 = "./datasets/hdr25.csv" #define dataset path

import pandas as pd #import Panda Library
import numpy as np  #import Numpy
import matplotlib.pyplot as plt

xmin = 50
xmax = 90
noise = 0.05
npts = 100

df_hdr25 = pd.read_csv(path_hdr25, encoding='utf-8', encoding_errors='ignore') #df = data flame = read dataset path "path_hdr25" defined above with panda 
df_hdr25.fillna(0, inplace=True) #replace N/A to 0 

x= df_hdr25['le_2023']
y = df_hdr25['gnipc_2023']

coeff1 = np.polyfit(x,y,1) # fit first-order polynomial ：polyfitで関数を表示
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()
print(coeff1) #y=b+ax のaとb
print(coeff2) #y=ax(2乗)++bx+c のaとbとc

first-order fit coefficients: [   2603.24055225 -166404.62042288]
second-order fit coefficients: [ 1.42730702e+02 -1.79948755e+04  5.69471814e+05]

[   2603.24055225 -166404.62042288]
[ 1.42730702e+02 -1.79948755e+04  5.69471814e+05]

import matplotlib.pyplot as plt

xmin = 0
xmax = 15
noise = 0.05
npts = 100

x= df_hdr25['mys_2023']
y = df_hdr25['gnipc_2023']
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial ：polyfitで関数を表示
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()
print(coeff1) #y=b+ax のaとb
print(coeff2) #y=ax(2乗)++bx+c のaとbとc

first-order fit coefficients: [  5108.64521224 -22137.39363599]
second-order fit coefficients: [  580.90019247 -4314.39551824  9446.25545114]

[  5108.64521224 -22137.39363599]
[  580.90019247 -4314.39551824  9446.25545114]

import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pycountry  #import library

url = 'https://api.fablabs.io/0/labs.json'
r = requests.get(url)
data = r.json()
df_lablist = pd.DataFrame(data)

country_code = df_lablist['country_code']

def alpha2_to_alpha3(code2): #define function(関数)”alpha2_to_alpha”, "code2" is a variable(変数) for 2 letter code (alpha2_to_alpha3 という関数を定義している,code2は「2文字の国コード」を受け取るための変数)
    country = pycountry.countries.get(alpha_2=code2.upper()) #code2を大文字に変換し、pycountryから国コード合致しているものを探す。このことをcountryと定義
    return country.alpha_3 if country else None #if found the country, return 3 letter code, otherwise retrun None　合致すれば3文字コードを返す、なければNoneと返す

ccd = country_code.to_list() #make Series（列）"country_code"in Panda to Python list(Panda上のcountry_code列を通常のPythonリストに変換、このことをccdと定義)

ccd3 = [] #prepare empty list (to put "3 letter code")3文字コードを入れるための空のリストを用意
for c in ccd: #ccd(=リスト化されたcountry_code(2桁)) の中身を1つずつ c に取り出す　（ループ）
    cc = alpha2_to_alpha3(c) #2文字コード c を、先ほど作った関数「alpha2_to_alpha3」(受け取った2文字コードに対してpycountryから合致しているものを探し、合致すれば3文字コードを返す)で3文字コードに変換
    ccd3.append(cc) #変換した結果を ccd3 リストに追加

df_labno = df_lablist.groupby('country_code').agg(lab_count=('id', 'count')).reset_index()
df_labno['ccd3'] = df_labno['country_code'].apply(alpha2_to_alpha3)

df_merge2 = pd.merge(df_labno,df_hdr25,left_on='ccd3',right_on='iso3',how='left')

x= df_merge2['hdi_2023']
y = df_merge2['lab_count']
mask = np.isfinite(x) & np.isfinite(y)
x = x[mask]
y = y[mask]
xmin = x.min()
xmax = x.max()
noise = 0.05
npts = 100

xplot = np.linspace(xmin, xmax, npts)
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()

first-order fit coefficients: [ 76.67519719 -39.02056718]
second-order fit coefficients: [ 139.66602023 -124.63886809   29.98633067]

#print(x.isna().sum())
#print(x.dtype)
#print(y.dtype)
#print(df_merge2['hdi_2023'].dtype)
#print(x.describe())
#print(y.describe())

Week2-1: Fitting¶

Graph for HDI¶

Life Expectancy and Gross National Income Per Capita¶

Mean Years of Schooling and Gross National Income Per Capita¶

Graph for HDI and Lab No¶