Metrics

precision recall f1-score support 0 0.783 0.837 0.809 43 1 0.562 0.474 0.514 19 accuracy 0.726 62 macro avg 0.673 0.655 0.662 62 weighted avg 0.715 0.726 0.719 62 2025-09-30T12:48:28.425467 image/svg+xml Matplotlib v3.10.6, https://matplotlib.org/ 2025-09-30T12:48:28.600018 image/svg+xml Matplotlib v3.10.6, https://matplotlib.org/

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA

# ===== 1) Carrega o CSV =====
df = pd.read_csv('https://raw.githubusercontent.com/marcelademartini/Machine-Learning-1/refs/heads/main/Testing.csv')

# Define a coluna alvo 
target = 'Outcome' if 'Outcome' in df.columns else df.columns[-1]

# X e y (dummies para categóricas)
X_raw = df.drop(columns=[target])
X = pd.get_dummies(X_raw, drop_first=True)
y = df[target]

# Codifica alvo não numérico
if not np.issubdtype(y.dtype, np.number):
    y = pd.factorize(y)[0]

# Trata NaN
X = X.fillna(X.median(numeric_only=True))

# ===== 2) Split + escala =====
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y if len(np.unique(y)) > 1 else None
)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# ===== 3) Treina KNN =====
k = 3
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train_s, y_train)
y_pred = knn.predict(X_test_s)

# ===== 4) Métricas =====
acc = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred, digits=3))


# ===== Helper: imprimir figura como SVG  =====
def print_svg_current_fig():
    buf = StringIO()
    plt.savefig(buf, format="svg", transparent=True, bbox_inches="tight")
    print(buf.getvalue())
    plt.close()

# ===== 5) Matriz de confusão  =====
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4), dpi=120)
plt.imshow(cm, interpolation='nearest')
plt.title("Matriz de Confusão (teste)")
plt.xlabel("Predito")
plt.ylabel("Real")
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, str(cm[i, j]), ha="center", va="center")
plt.colorbar()
print_svg_current_fig()

# ===== 6) Visualização 2D (PCA) da fronteira de decisão  =====
if X_train.shape[1] >= 2:
    pca = PCA(n_components=2, random_state=42)
    X_train_2d = pca.fit_transform(X_train_s)
    X_test_2d  = pca.transform(X_test_s)

    knn_viz = KNeighborsClassifier(n_neighbors=k).fit(X_train_2d, y_train)

    h = 0.05
    x_min, x_max = X_train_2d[:, 0].min() - 0.5, X_train_2d[:, 0].max() + 0.5
    y_min, y_max = X_train_2d[:, 1].min() - 0.5, X_train_2d[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = knn_viz.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

    plt.figure(figsize=(6,5), dpi=120)
    plt.contourf(xx, yy, Z, alpha=0.30)
    plt.scatter(X_train_2d[:,0], X_train_2d[:,1], c=y_train, s=20, marker='o', label='treino')
    plt.scatter(X_test_2d[:,0],  X_test_2d[:,1],  c=y_test,  s=40, marker='x', label='teste')
    plt.title(f"Fronteira de Decisão (PCA 2D) — KNN k={k}")
    plt.xlabel("PC1")
    plt.ylabel("PC2")
    plt.legend(loc="best")
    print_svg_current_fig()

2025-09-30T12:48:28.738603 image/svg+xml Matplotlib v3.10.6, https://matplotlib.org/
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import StringIO
from sklearn.cluster import KMeans

# Usa o CSV como base (duas primeiras colunas numéricas, para manter o mesmo plot)
df = pd.read_csv('https://raw.githubusercontent.com/marcelademartini/Machine-Learning-1/refs/heads/main/Testing.csv')
X_num = df.select_dtypes(include=[np.number]).dropna()

if X_num.shape[1] >= 2:
    X = X_num.iloc[:, :2].to_numpy()
else:
    # Se só houver 1 coluna numérica, duplica para conseguir plotar em 2D
    col = X_num.iloc[:, 0].to_numpy().reshape(-1, 1)
    X = np.hstack([col, col])


plt.figure(figsize=(12, 10))


# Run K-Means
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=100, random_state=42)
labels = kmeans.fit_predict(X)

# Plot results
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', s=50)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], 
           c='red', marker='*', s=200, label='Centroids')
plt.title('K-Means Clustering Results')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()

# # Print centroids and inertia
# print("Final centroids:", kmeans.cluster_centers_)
# print("Inertia (WCSS):", kmeans.inertia_)

# # Display the plot
buffer = StringIO()
plt.savefig(buffer, format="svg", transparent=True)
print(buffer.getvalue())