import matplotlib.pyplot as plt

x = [1, 2, 3, 4, 5]
y = [10, 14, 12, 18, 20]

plt.figure(figsize=(10,7))
plt.plot(x, y, marker="o")
plt.title("Simple Line Plot")
plt.xlabel("X values")
plt.ylabel("Y values")
plt.grid(True)
plt.show()

import matplotlib.pyplot as plt
import numpy as np

data = np.random.randn(1000)

plt.figure(figsize=(10,7))
plt.hist(data, bins=100, color="pink", alpha=0.7)
plt.title("Histogram of Random Data")
plt.show()

import pandas as pd

df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=",")
df.head()

import pandas as pd

df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=";")
df.head()

import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.hist(df["EAGE"].dropna(), bins=12, color="teal", edgecolor="black", alpha=0.7)
plt.title("Distribution of Entrepreneur Age", fontsize=14)
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.grid(True, alpha=0.3)
plt.show()

import matplotlib.pyplot as plt
import numpy as np

# Define segmentos (bins)
bins = [20, 25, 30, 35, 40, 45, 50, 55]

plt.figure(figsize=(8,5))
plt.hist(df["EAGE"].dropna(), bins=bins, color="teal", edgecolor="black", alpha=0.7)

# Usar los mismos cortes como etiquetas del eje X
plt.xticks(bins)

plt.title("Distribution of Entrepreneur Age", fontsize=14)
plt.xlabel("Age (bin edges)")
plt.ylabel("Frequency")
plt.grid(True, alpha=0.3)
plt.show()

gender_map = {1: "Male", 2: "Female", 3: "Other"}
df["GEN_label"] = df["GEN"].map(gender_map)

gender_counts = df["GEN_label"].value_counts()
gender_counts

GEN_label
Male      27
Female    12
Name: count, dtype: int64

import matplotlib.pyplot as plt

plt.figure(figsize=(7,5))
plt.bar(gender_counts.index, gender_counts.values,
        color=["#4B8BBE", "#F89C74", "#9C27B0"],   # colores bonitos
        edgecolor="black")

plt.title("Entrepreneurs by Gender", fontsize=14)
plt.xlabel("Gender")
plt.ylabel("Count")
plt.grid(axis="y", alpha=0.3)

plt.show()

import matplotlib.pyplot as plt

plt.figure(figsize=(6,6))

plt.pie(
    gender_counts, 
    labels=gender_counts.index,
    autopct="%1.1f%%",      # muestra porcentajes
    startangle=90,          # inicia desde arriba
    colors=["#4B8BBE", "#F89C74", "#9C27B0"],  # colores elegantes
    explode=[0.03]*len(gender_counts)         # separación ligera de las porciones
)

plt.title("Gender Distribution of Entrepreneurs", fontsize=14)
plt.show()

# 1. Mapear los valores numéricos de género a etiquetas de texto
gender_map = {1: "Male", 2: "Female", 3: "Other"}
df["GEN_label"] = df["GEN"].map(gender_map)

# 2. Obtener el conteo de cada categoría
gender_counts = df["GEN_label"].value_counts()

# 3. Pie chart (gráfico de pastel)
import matplotlib.pyplot as plt

plt.figure(figsize=(6,6))

plt.pie(
    gender_counts,
    labels=gender_counts.index,
    autopct="%1.1f%%",
    startangle=90,
    colors=["#4B8BBE", "#F89C74", "#9C27B0"],
    explode=[0.05]*len(gender_counts)   # pequeña separación estética
)

plt.title("Gender Distribution of Entrepreneurs", fontsize=14)
plt.show()

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[1], line 3
      1 # 1. Mapear los valores numéricos de género a etiquetas de texto
      2 gender_map = {1: "Male", 2: "Female", 3: "Other"}
----> 3 df["GEN_label"] = df["GEN"].map(gender_map)
      5 # 2. Obtener el conteo de cada categoría
      6 gender_counts = df["GEN_label"].value_counts()

NameError: name 'df' is not defined

import pandas as pd

df = pd.read_csv("datasets/2nd_Class_Assignmt_Data/Entrepreneurs.csv", sep=";")
df.head()

# 1. Mapear los valores numéricos de género a etiquetas de texto
gender_map = {1: "Male", 2: "Female", 3: "Other"}
df["GEN_label"] = df["GEN"].map(gender_map)

# 2. Obtener el conteo de cada categoría
gender_counts = df["GEN_label"].value_counts()

# 3. Pie chart (gráfico de pastel)
import matplotlib.pyplot as plt

plt.figure(figsize=(6,6))

plt.pie(
    gender_counts,
    labels=gender_counts.index,
    autopct="%1.1f%%",
    startangle=90,
    colors=["#4B8BBE", "#F89C74", "#9C27B0"],
    explode=[0.05]*len(gender_counts)   # pequeña separación estética
)

plt.title("Gender Distribution of Entrepreneurs", fontsize=14)
plt.show()

	Marca temporal;NOM;GEN;EAGE;FOUND;CAGE1;AFOUND;CBASED;CSECT;EEXP;EEDUC;INVT;MNGEXP;WEXP;SEBCK;FRUG1;FRUG2;FRUG3;FRUG4;FRUG5;FRUG6;FRUG7;BRIC1;BRIC2;BRIC3;BRIC4;BRIC5;BRIC6;BRIC7;BRIC8;INNOV1;INNOV2;INNOV3;INNOV4;CAGE2;TECHBS;ETEAM;EAOS;SEEDF;OPERF;INCC
0	4/4/2025 18:10:28;iFurniture ;2;35;1;2;1;2;9;1...
1	4/6/2025 13:09:46;Salvy Natural - Indes Perú ;...
2	4/7/2025 16:07:37;AVR Technology;1;23;1;2;1;2;...
3	4/7/2025 21:49:59;AIO SENSORS ;1;32;1;1;1;3;9;...
4	4/8/2025 17:54:07;Face Me;1;30;1;2;1;3;5;0;1;1...

	Marca temporal	NOM	GEN	EAGE	FOUND	CAGE1	AFOUND	CBASED	CSECT	EEXP	...	INNOV2	INNOV3	INNOV4	CAGE2	TECHBS	ETEAM	EAOS	SEEDF	OPERF	INCC
0	4/4/2025 18:10:28	iFurniture	2	35	1	2	1	2	9	1	...	4	2	4	1	1	1	1	1	1	1
1	4/6/2025 13:09:46	Salvy Natural - Indes Perú	2	37	1	2	1	2	12	1	...	5	5	5	1	1	1	1	0	0	0
2	4/7/2025 16:07:37	AVR Technology	1	23	1	2	1	2	15	0	...	4	4	4	0	1	1	1	1	1	1
3	4/7/2025 21:49:59	AIO SENSORS	1	32	1	1	1	3	9	0	...	4	4	4	0	1	1	1	0	1	1
4	4/8/2025 17:54:07	Face Me	1	30	1	2	1	3	5	0	...	4	4	4	1	1	0	1	1	1	1

Week 1 - 2nd Class: Tools¶

Using Python to visualize Data¶

Trying Matplotlib¶

Trying Matplotlib with my Data¶