< Home
Week2-1: Fitting¶
I tried Neil's code with my data.
Graph for HDI¶
Life Expectancy and Gross National Income Per Capita¶
In [21]:
path_hdr25 = "./datasets/hdr25.csv" #define dataset path
import pandas as pd #import Panda Library
import numpy as np #import Numpy
import matplotlib.pyplot as plt
xmin = 50
xmax = 90
noise = 0.05
npts = 100
df_hdr25 = pd.read_csv(path_hdr25, encoding='utf-8', encoding_errors='ignore') #df = data flame = read dataset path "path_hdr25" defined above with panda
df_hdr25.fillna(0, inplace=True) #replace N/A to 0
x= df_hdr25['le_2023']
y = df_hdr25['gnipc_2023']
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial :polyfitで関数を表示
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()
print(coeff1) #y=b+ax のaとb
print(coeff2) #y=ax(2乗)++bx+c のaとbとc
first-order fit coefficients: [ 2603.24055225 -166404.62042288] second-order fit coefficients: [ 1.42730702e+02 -1.79948755e+04 5.69471814e+05]
[ 2603.24055225 -166404.62042288] [ 1.42730702e+02 -1.79948755e+04 5.69471814e+05]
Mean Years of Schooling and Gross National Income Per Capita¶
In [25]:
import matplotlib.pyplot as plt
xmin = 0
xmax = 15
noise = 0.05
npts = 100
x= df_hdr25['mys_2023']
y = df_hdr25['gnipc_2023']
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial :polyfitで関数を表示
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()
print(coeff1) #y=b+ax のaとb
print(coeff2) #y=ax(2乗)++bx+c のaとbとc
first-order fit coefficients: [ 5108.64521224 -22137.39363599] second-order fit coefficients: [ 580.90019247 -4314.39551824 9446.25545114]
[ 5108.64521224 -22137.39363599] [ 580.90019247 -4314.39551824 9446.25545114]
Graph for HDI and Lab No¶
In [66]:
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pycountry #import library
url = 'https://api.fablabs.io/0/labs.json'
r = requests.get(url)
data = r.json()
df_lablist = pd.DataFrame(data)
country_code = df_lablist['country_code']
def alpha2_to_alpha3(code2): #define function(関数)”alpha2_to_alpha”, "code2" is a variable(変数) for 2 letter code (alpha2_to_alpha3 という関数を定義している,code2は「2文字の国コード」を受け取るための変数)
country = pycountry.countries.get(alpha_2=code2.upper()) #code2を大文字に変換し、pycountryから国コード合致しているものを探す。このことをcountryと定義
return country.alpha_3 if country else None #if found the country, return 3 letter code, otherwise retrun None 合致すれば3文字コードを返す、なければNoneと返す
ccd = country_code.to_list() #make Series(列)"country_code"in Panda to Python list(Panda上のcountry_code列を通常のPythonリストに変換、このことをccdと定義)
ccd3 = [] #prepare empty list (to put "3 letter code")3文字コードを入れるための空のリストを用意
for c in ccd: #ccd(=リスト化されたcountry_code(2桁)) の中身を1つずつ c に取り出す (ループ)
cc = alpha2_to_alpha3(c) #2文字コード c を、先ほど作った関数「alpha2_to_alpha3」(受け取った2文字コードに対してpycountryから合致しているものを探し、合致すれば3文字コードを返す)で3文字コードに変換
ccd3.append(cc) #変換した結果を ccd3 リストに追加
df_labno = df_lablist.groupby('country_code').agg(lab_count=('id', 'count')).reset_index()
df_labno['ccd3'] = df_labno['country_code'].apply(alpha2_to_alpha3)
df_merge2 = pd.merge(df_labno,df_hdr25,left_on='ccd3',right_on='iso3',how='left')
x= df_merge2['hdi_2023']
y = df_merge2['lab_count']
mask = np.isfinite(x) & np.isfinite(y)
x = x[mask]
y = y[mask]
xmin = x.min()
xmax = x.max()
noise = 0.05
npts = 100
xplot = np.linspace(xmin, xmax, npts)
coeff1 = np.polyfit(x,y,1) # fit first-order polynomial
coeff2 = np.polyfit(x,y,2) # fit second-order polynomial
xfit = np.linspace(xmin,xmax,npts)
pfit1 = np.poly1d(coeff1)
yfit1 = pfit1(xfit) # evaluate first-order fit
print(f"first-order fit coefficients: {coeff1}")
pfit2 = np.poly1d(coeff2)
yfit2 = pfit2(xfit) # evaluate second-order fit
print(f"second-order fit coefficients: {coeff2}")
plt.figure()
plt.plot(x,y,'o')
plt.plot(xfit,yfit1,'g-',label='linear')
plt.plot(xfit,yfit2,'r-',label='quadratic')
plt.legend()
plt.show()
first-order fit coefficients: [ 76.67519719 -39.02056718] second-order fit coefficients: [ 139.66602023 -124.63886809 29.98633067]
In [64]:
#print(x.isna().sum())
#print(x.dtype)
#print(y.dtype)
#print(df_merge2['hdi_2023'].dtype)
#print(x.describe())
#print(y.describe())
In [ ]: