import numpy as np

x = np.linspace(-1, 1, 10)
y = x + 0.1*np.random.randn(10)

print(x)
print(y)

[-1.         -0.77777778 -0.55555556 -0.33333333 -0.11111111  0.11111111
  0.33333333  0.55555556  0.77777778  1.        ]
[-0.88833741 -0.76938407 -0.50452391 -0.3598181  -0.13191568  0.18267083
  0.32884573  0.42656282  0.70169181  1.01129203]

import matplotlib.pyplot as plt

x = np.linspace(-1, 1, 10)
y = x + 0.1*np.random.randn(10)

plt.scatter(x, y)
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# 1. datos del ejemplo anterior
x = np.linspace(-1, 1, 10)
y = x + 0.1*np.random.randn(10)

# 2. Solicitar ajustar un polinomio grado 1
deg = 1
p = np.polyfit(x, y, deg)

# 3. Crear un conjunto más fino de x para dibujar la curva suave
xp = np.linspace(-1, 1, 100)
yp = np.polyval(p, xp)

# 4. Graficar
plt.figure(figsize=(8,5))
plt.scatter(x, y, color='blue', label='Original Data')
plt.plot(xp, yp, color='red', label='Polynomial Adjust (grade 1)')
plt.legend()
plt.title("Polynomial Adjust grade 1")
plt.xlabel("x")
plt.ylabel("y")

plt.show()

import numpy as np
import matplotlib.pyplot as plt

# 1. datos del ejemplo anterior
x = np.linspace(-1, 1, 10)
y = x + 0.1*np.random.randn(10)

# 2. Solicitar ajustar un polinomio grado 5
deg = 5
p = np.polyfit(x, y, deg)

# 3. Crear un conjunto más fino de x para dibujar la curva suave
xp = np.linspace(-1, 1, 100)
yp = np.polyval(p, xp)

# 4. Graficar
plt.figure(figsize=(8,5))
plt.scatter(x, y, color='blue', label='Original Data')
plt.plot(xp, yp, color='red', label='Polynomial Adjust (grade 5)')
plt.legend()
plt.title("Polynomial Adjust grade 5")
plt.xlabel("x")
plt.ylabel("y")

plt.show()

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import statsmodels.api as sm
import networkx as nx

from sklearn.cross_decomposition import PLSRegression

# Optional: make plots a bit larger by default
plt.rcParams['figure.figsize'] = (8, 5)

# Show plots inside the notebook (Jupyter)
%matplotlib inline

import pandas as pd

df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=";")
df.head()

# Loading the original dataset
df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=";")

print("Shape of full dataset:", df.shape)
df.head()

Shape of full dataset: (39, 41)

df_active = df[df["AFOUND"] == 1].copy()
df_active.head(), df_active.shape

(      Marca temporal                          NOM  GEN  EAGE  FOUND  CAGE1  \
 0  4/4/2025 18:10:28                  iFurniture     2    35      1      2   
 1  4/6/2025 13:09:46  Salvy Natural - Indes Perú     2    37      1      2   
 2  4/7/2025 16:07:37               AVR Technology    1    23      1      2   
 3  4/7/2025 21:49:59                 AIO SENSORS     1    32      1      1   
 4  4/8/2025 17:54:07                      Face Me    1    30      1      2   
 
    AFOUND  CBASED  CSECT  EEXP  ...  INNOV2  INNOV3  INNOV4  CAGE2  TECHBS  \
 0       1       2      9     1  ...       4       2       4      1       1   
 1       1       2     12     1  ...       5       5       5      1       1   
 2       1       2     15     0  ...       4       4       4      0       1   
 3       1       3      9     0  ...       4       4       4      0       1   
 4       1       3      5     0  ...       4       4       4      1       1   
 
    ETEAM  EAOS  SEEDF  OPERF  INCC  
 0      1     1      1      1     1  
 1      1     1      0      0     0  
 2      1     1      1      1     1  
 3      1     1      0      1     1  
 4      0     1      1      1     1  
 
 [5 rows x 41 columns],
 (37, 41))

# Filter only active founders (AFOUND == 1)
df_active = df[df["AFOUND"] == 1].copy()

print("Shape of active-founders dataset:", df_active.shape)
df_active.head()

Shape of active-founders dataset: (37, 41)

import pandas as pd

df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=";")
df_active = df[df["AFOUND"] == 1].copy()
df_active.head(), df_active.shape

# Listas de items
frug_items  = [f'FRUG{i}' for i in range(1, 8)]
bric_items  = [f'BRIC{i}' for i in range(1, 9)]
innov_items = [f'INNOV{i}' for i in range(1, 5)]

# Mean scores
df_active['FRUG_mean']  = df_active[frug_items].mean(axis=1)
df_active['BRIC_mean']  = df_active[bric_items].mean(axis=1)
df_active['INNOV_mean'] = df_active[innov_items].mean(axis=1)

df_active[['FRUG_mean','BRIC_mean','INNOV_mean']].head()

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))

# FRUG
plt.subplot(3, 1, 1)
plt.hist(df_active['FRUG_mean'], bins=10, edgecolor='black')
plt.title("Frugality (FRUG_mean)")

# BRIC
plt.subplot(3, 1, 2)
plt.hist(df_active['BRIC_mean'], bins=10, edgecolor='black')
plt.title("Bricolage (BRIC_mean)")

# INNOV
plt.subplot(3, 1, 3)
plt.hist(df_active['INNOV_mean'], bins=10, edgecolor='black')
plt.title("Innovation (INNOV_mean)")

plt.tight_layout()
plt.show()

plt.scatter(df_active['FRUG_mean'], df_active['BRIC_mean'])
plt.xlabel("Frugality (FRUG_mean)")
plt.ylabel("Bricolage (BRIC_mean)")
plt.title("Scatterplot: Frugality → Bricolage")
plt.show()

plt.scatter(df_active['BRIC_mean'], df_active['INNOV_mean'])
plt.xlabel("Bricolage (BRIC_mean)")
plt.ylabel("Innovation (INNOV_mean)")
plt.title("Scatterplot: Bricolage → Innovation")
plt.show()

plt.scatter(df_active['FRUG_mean'], df_active['INNOV_mean'])
plt.xlabel("Frugality (FRUG_mean)")
plt.ylabel("Innovation (INNOV_mean)")
plt.title("Scatterplot: Frugality → Innovation")
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Variables
x = df_active['FRUG_mean']
y = df_active['BRIC_mean']

# Linear fit
coef = np.polyfit(x, y, 1)
model = np.poly1d(coef)

# Plot
plt.scatter(x, y)
plt.plot(x, model(x), color='red')
plt.xlabel("Frugality (FRUG_mean)")
plt.ylabel("Bricolage (BRIC_mean)")
plt.title("Linear Fit: Frugality → Bricolage")
plt.show()

# Print model
coef

array([-0.0251913 ,  4.41082375])

y_pred = model(x)
SS_res = np.sum((y - y_pred)**2)
SS_tot = np.sum((y - np.mean(y))**2)
R2 = 1 - (SS_res / SS_tot)
R2

np.float64(0.0014589990012413567)

x = df_active['BRIC_mean']
y = df_active['INNOV_mean']

coef = np.polyfit(x, y, 1)
model = np.poly1d(coef)

plt.scatter(x, y)
plt.plot(x, model(x), color='red')
plt.xlabel("Bricolage (BRIC_mean)")
plt.ylabel("Innovation (INNOV_mean)")
plt.title("Linear Fit: Bricolage → Innovation")
plt.show()

# R²
y_pred = model(x)
SS_res = np.sum((y - y_pred)**2)
SS_tot = np.sum((y - np.mean(y))**2)
R2 = 1 - (SS_res / SS_tot)
R2

np.float64(0.11605670441386895)

x = df_active['FRUG_mean']
y = df_active['INNOV_mean']

coef = np.polyfit(x, y, 1)
model = np.poly1d(coef)

plt.scatter(x, y)
plt.plot(x, model(x), color='red')
plt.xlabel("Frugality (FRUG_mean)")
plt.ylabel("Innovation (INNOV_mean)")
plt.title("Linear Fit: Frugality → Innovation")
plt.show()

# R²
y_pred = model(x)
SS_res = np.sum((y - y_pred)**2)
SS_tot = np.sum((y - np.mean(y))**2)
R2 = 1 - (SS_res / SS_tot)
R2

np.float64(0.07432234697273854)

import numpy as np

def cronbach_alpha(df_items):
    df_items = df_items.dropna()  # por seguridad
    item_vars = df_items.var(axis=0, ddof=1)
    total_var = df_items.sum(axis=1).var(ddof=1)
    n_items = df_items.shape[1]
    alpha = (n_items / (n_items - 1)) * (1 - item_vars.sum() / total_var)
    return alpha

frug_items  = [f'FRUG{i}' for i in range(1, 8)]
bric_items  = [f'BRIC{i}' for i in range(1, 9)]
innov_items = [f'INNOV{i}' for i in range(1, 5)]

alpha_frug = cronbach_alpha(df_active[frug_items])
alpha_frug

np.float64(0.7970463064738873)

alpha_bric = cronbach_alpha(df_active[bric_items])
alpha_bric

np.float64(0.7196649161227224)

alpha_innov = cronbach_alpha(df_active[innov_items])
alpha_innov

np.float64(0.7246906066139176)

import networkx as nx
import matplotlib.pyplot as plt

G = nx.DiGraph()

G.add_edge("Frugality", "Bricolage", weight=-0.025)
G.add_edge("Bricolage", "Innovation", weight=0.116)
G.add_edge("Frugality", "Innovation", weight=-0.074)

pos = {
    "Frugality": (0, 1),
    "Bricolage": (1, 1),
    "Innovation": (1, 0)
}

plt.figure(figsize=(8,6))
nx.draw(G, pos, with_labels=True, node_size=4000, node_color="lightblue", font_size=12, arrowsize=20)

labels = {
    ("Frugality", "Bricolage"): "-0.025",
    ("Bricolage", "Innovation"): "0.116",
    ("Frugality", "Innovation"): "-0.074"
}

nx.draw_networkx_edge_labels(G, pos, edge_labels=labels, font_color='red')
plt.title("Path Model: Frugality, Bricolage and Innovation")
plt.show()

import statsmodels.api as sm
import pandas as pd

# Crear el término de interacción
df_active['interaction'] = df_active['FRUG_mean'] * df_active['BRIC_mean']

# Variables independientes
X = df_active[['FRUG_mean', 'BRIC_mean', 'interaction']]
X = sm.add_constant(X)

# Variable dependiente
y = df_active['INNOV_mean']

# Ajustar modelo lineal
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             INNOV_mean   R-squared:                       0.215
Model:                            OLS   Adj. R-squared:                  0.144
Method:                 Least Squares   F-statistic:                     3.012
Date:                Sun, 30 Nov 2025   Prob (F-statistic):             0.0439
Time:                        05:17:11   Log-Likelihood:                -33.254
No. Observations:                  37   AIC:                             74.51
Df Residuals:                      33   BIC:                             80.95
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const         -10.1494     11.465     -0.885      0.382     -33.476      13.177
FRUG_mean       2.9300      2.812      1.042      0.305      -2.790       8.650
BRIC_mean       3.6296      2.679      1.355      0.185      -1.822       9.081
interaction    -0.7554      0.658     -1.149      0.259      -2.093       0.582
==============================================================================
Omnibus:                        2.448   Durbin-Watson:                   1.809
Prob(Omnibus):                  0.294   Jarque-Bera (JB):                1.966
Skew:                          -0.562   Prob(JB):                        0.374
Kurtosis:                       2.896   Cond. No.                     2.21e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.21e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

from sklearn.cross_decomposition import PLSRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# 1. Definir ítems
frug_items  = [f'FRUG{i}' for i in range(1, 8)]
bric_items  = [f'BRIC{i}' for i in range(1, 9)]
innov_items = [f'INNOV{i}' for i in range(1, 5)]

# 2. Estandarizar indicadores
scaler = StandardScaler()
df_z = pd.DataFrame(
    scaler.fit_transform(df_active[frug_items + bric_items + innov_items]),
    columns = frug_items + bric_items + innov_items
)

# 3. LVs como promedio estandarizado
df_active['FRUG_LV']  = df_z[frug_items].mean(axis=1)
df_active['BRIC_LV']  = df_z[bric_items].mean(axis=1)
df_active['INNOV_LV'] = df_z[innov_items].mean(axis=1)

# 4. DataFrame sin NaN
df_pls = df_active[['FRUG_LV', 'BRIC_LV', 'INNOV_LV']].dropna()

print("N original:", len(df_active))
print("N sin NaN (para PLS):", len(df_pls))

# ========== MODELO PLS 1: FRUG → BRIC ==========
X1 = df_pls[['FRUG_LV']].values           # (n, 1)
Y1 = df_pls[['BRIC_LV']].values           # (n, 1)

pls1 = PLSRegression(n_components=1)
pls1.fit(X1, Y1)

# ========== MODELO PLS 2: FRUG + BRIC → INNOV ==========
X2 = df_pls[['FRUG_LV', 'BRIC_LV']].values   # (n, 2)
Y2 = df_pls[['INNOV_LV']].values            # (n, 1)

pls2 = PLSRegression(n_components=1)
pls2.fit(X2, Y2)

print("\nShape coef_ pls2:", pls2.coef_.shape)
print("Matriz coef_ pls2:\n", pls2.coef_)

coef_frug = pls2.coef_[0, 0]  # FRUG → INNOV
coef_bric = pls2.coef_[0, 1]  # BRIC → INNOV

print("\nPath coefficients:")
print("FRUG → BRIC:",  pls1.coef_[0, 0])
print("FRUG → INNOV:", coef_frug)
print("BRIC → INNOV:", coef_bric)

print("\nR²:")
print("R² BRIC  =", pls1.score(X1, Y1))
print("R² INNOV =", pls2.score(X2, Y2))

N original: 37
N sin NaN (para PLS): 35

Shape coef_ pls2: (1, 2)
Matriz coef_ pls2:
 [[-0.27767944  0.41300091]]

Path coefficients:
FRUG → BRIC: -0.07999765255262897
FRUG → INNOV: -0.2776794381003554
BRIC → INNOV: 0.41300091261055083

R²:
R² BRIC  = 0.010180442019163016
R² INNOV = 0.17567543689134357

df_active[['FRUG_LV', 'BRIC_LV', 'INNOV_LV']].isna().sum()

FRUG_LV     2
BRIC_LV     2
INNOV_LV    2
dtype: int64

df_active[df_active[['FRUG_LV','BRIC_LV','INNOV_LV']].isna().any(axis=1)]

df_active.loc[[37, 38], frug_items + bric_items + innov_items]

df_active.loc[[37, 38], frug_items].std(axis=1)
df_active.loc[[37, 38], bric_items].std(axis=1)
df_active.loc[[37, 38], innov_items].std(axis=1)

37    0.000000
38    2.309401
dtype: float64

# Construir LVs como promedios simples
df_active['FRUG_LV']  = df_active[frug_items].mean(axis=1)
df_active['BRIC_LV']  = df_active[bric_items].mean(axis=1)
df_active['INNOV_LV'] = df_active[innov_items].mean(axis=1)

# Ahora ya no habrá NaN
df_active[['FRUG_LV', 'BRIC_LV', 'INNOV_LV']].isna().sum()
# debería dar 0 en las tres

# Y el PLS se hace con los 37 casos:
df_pls = df_active[['FRUG_LV', 'BRIC_LV', 'INNOV_LV']]  # sin dropna

from sklearn.cross_decomposition import PLSRegression
import numpy as np

X1 = df_pls[['FRUG_LV']].values
Y1 = df_pls[['BRIC_LV']].values
pls1 = PLSRegression(n_components=1).fit(X1, Y1)

X2 = df_pls[['FRUG_LV', 'BRIC_LV']].values
Y2 = df_pls[['INNOV_LV']].values
pls2 = PLSRegression(n_components=1).fit(X2, Y2)

print("FRUG → BRIC:",  pls1.coef_[0, 0])
print("FRUG → INNOV:", pls2.coef_[0, 0])
print("BRIC → INNOV:", pls2.coef_[0, 1])
print("R² BRIC =",  pls1.score(X1, Y1))
print("R² INNOV =", pls2.score(X2, Y2))

FRUG → BRIC: -0.02519129956878327
FRUG → INNOV: -0.2969265226888666
BRIC → INNOV: 0.5626020708431478
R² BRIC = 0.0014589990012412457
R² INNOV = 0.18353896726416463

	FRUG_mean	BRIC_mean	INNOV_mean
0	5.000000	4.125	3.25
1	3.571429	4.750	5.00
2	4.000000	4.000	4.00
3	4.000000	4.375	4.00
4	4.285714	4.375	4.00

	FRUG1	FRUG2	FRUG3	FRUG4	FRUG5	FRUG6	FRUG7	BRIC1	BRIC2	BRIC3	BRIC4	BRIC5	BRIC6	BRIC7	BRIC8	INNOV1	INNOV2	INNOV3	INNOV4
37	5	5	4	3	4	5	5	4	4	3	4	5	4	4	4	5	5	5	5
38	5	5	4	3	4	4	5	5	5	5	5	3	5	5	5	5	1	5	1

Week 2 - 1st Class: Fitting¶

Using Python to Model Fitting¶

Artificial Data Creation¶

Adjusting a Polinomial¶

Trying libraries with my Data¶

1. Setup and libraries¶

2. Loading the pilot survey data¶

3. Filtering active entrepreneurs¶

4. Building latent-variable mean scores¶

5. Distributions of latent-variable scores¶

6. Pairwise relationships (scatter plots)¶

7. Simple linear fits and R²¶

8. Internal reliability – Cronbach’s alpha¶

9. Simple path diagram (visual summary)¶

10. Moderation model: FRUG × BRIC¶

11. PLS-style analysis (FRUG, BRIC, INNOV)¶

12. Short interpretation notes¶

	Marca temporal	NOM	GEN	EAGE	FOUND	CAGE1	AFOUND	CBASED	CSECT	EEXP	...	INNOV2	INNOV3	INNOV4	CAGE2	TECHBS	ETEAM	EAOS	SEEDF	OPERF	INCC
0	4/4/2025 18:10:28	iFurniture	2	35	1	2	1	2	9	1	...	4	2	4	1	1	1	1	1	1	1
1	4/6/2025 13:09:46	Salvy Natural - Indes Perú	2	37	1	2	1	2	12	1	...	5	5	5	1	1	1	1	0	0	0
2	4/7/2025 16:07:37	AVR Technology	1	23	1	2	1	2	15	0	...	4	4	4	0	1	1	1	1	1	1
3	4/7/2025 21:49:59	AIO SENSORS	1	32	1	1	1	3	9	0	...	4	4	4	0	1	1	1	0	1	1
4	4/8/2025 17:54:07	Face Me	1	30	1	2	1	3	5	0	...	4	4	4	1	1	0	1	1	1	1

	Marca temporal	NOM	GEN	EAGE	FOUND	CAGE1	AFOUND	CBASED	CSECT	EEXP	...	SEEDF	OPERF	INCC	FRUG_mean	BRIC_mean	INNOV_mean	interaction	FRUG_LV	BRIC_LV	INNOV_LV
37	7/25/2025 19:15:51	Glexco Robotics and Automation	1	55	1	1	1	3	15	0	...	0	0	0	4.428571	4.00	5.0	17.714286	NaN	NaN	NaN
38	7/27/2025 3:40:23	UMA	2	25	1	2	1	2	3	0	...	0	1	1	4.285714	4.75	3.0	20.357143	NaN	NaN	NaN