Importando Librerias Básicas de Python¶

In [ ]:
# Importando librerias básicas
import pandas as pd
import geopandas as gpd
import numpy as np
In [ ]:
dfcorales = pd.read_csv("Corales Veracruz.csv")
dfcorales = pd.DataFrame(dfcorales)

Importando Archivo CSV con registros de especímenes del orden Cheloniidae¶

In [ ]:
# Importacion achivo csv con observaciones de ejemplares familia cheloniidae (Tortugas Marinas)
cheloniidae = pd.read_csv("cheloniidae.csv")
dfchelonidos = pd.DataFrame(cheloniidae)
#display(dfchelonidos)
#dfchelonidos.isna().sum()

Removiendo valores Nulos o NaN¶

In [ ]:
# Se remueven los NAN en la columna "taxon_genus_name"
dfchelonidos = dfchelonidos.loc[dfchelonidos["taxon_genus_name"].notna()]
#dfchelonidos1.isna().sum()

Dataframe del archivo Cheloniidae¶

In [ ]:
dfchelonidos.dtypes
Out[ ]:
0
id int64
observed_on object
user_id int64
latitude float64
longitude float64
taxon_phylum_name object
taxon_class_name object
taxon_order_name object
taxon_family_name object
taxon_genus_name object
taxon_species_name object

Convirtiendo "observed_on" objeto a cadena de tiempo¶

In [ ]:
from datetime import date
from datetime import datetime
import time
In [ ]:
dfchelonidos["observed_on"] = pd.to_datetime(dfchelonidos["observed_on"])
In [ ]:
dfchelonidos1 = dfchelonidos

Insertando columnas para "Year" y "Month"¶

In [ ]:
dfchelonidos1.insert(1,"Year", 3)
dfchelonidos1['Year'] = dfchelonidos1['observed_on'].dt.year
dfchelonidos1.insert(2, "Month", 1)
dfchelonidos1["Month"] = dfchelonidos1["observed_on"].dt.month
#dfchelonidos1.head()

Se separan las especies¶

In [ ]:
dfchelonidos1.describe()
Out[ ]:
id Year Month observed_on user_id latitude longitude
count 3.220000e+02 322.000000 322.000000 322 3.220000e+02 322.000000 322.000000
mean 8.682040e+07 2019.531056 6.409938 2020-01-09 18:06:42.484472064 2.348665e+06 19.889498 -96.327058
min 5.552830e+05 2007.000000 1.000000 2007-03-24 00:00:00 3.853000e+03 18.005020 -97.799078
25% 2.096800e+07 2018.000000 4.000000 2018-05-20 06:00:00 8.502600e+04 19.041485 -97.213413
50% 7.438455e+07 2019.000000 7.000000 2019-10-19 12:00:00 1.346550e+06 20.033715 -96.572456
75% 1.543041e+08 2022.000000 8.000000 2022-06-13 06:00:00 4.421974e+06 20.712820 -96.002179
max 2.345715e+08 2024.000000 12.000000 2024-07-28 00:00:00 8.290759e+06 22.379514 -94.032332
std 7.026009e+07 3.019247 2.544243 NaN 2.608908e+06 1.249098 1.087251
In [ ]:
# Se generan data frames independientes para cada género taxonómico

chelonia = dfchelonidos1[dfchelonidos1["taxon_genus_name"] == "Chelonia"]
lepidochelys = dfchelonidos1[dfchelonidos1["taxon_genus_name"] == "Lepidochelys"]
caretta = dfchelonidos1[dfchelonidos1["taxon_genus_name"] == "Caretta"]
eretmochelys = dfchelonidos1[dfchelonidos1["taxon_genus_name"] == "Eretmochelys"]

Se genera un Geopandas DataFrame¶

In [ ]:
# Se Genera un GeoDataFrame para los diferentes generos taxonomicos
chelonia_points = gpd.points_from_xy(chelonia.longitude, chelonia.latitude)
gdfchelonia = gpd.GeoDataFrame(chelonia, geometry = chelonia_points)
#gdfchelonia

lepidochelys_points = gpd.points_from_xy(lepidochelys.longitude, lepidochelys.latitude)
gdflepidochelys = gpd.GeoDataFrame(lepidochelys, geometry = lepidochelys_points)
# gdflepidochelys

caretta_points = gpd.points_from_xy(caretta.longitude, caretta.latitude)
gdfcaretta = gpd.GeoDataFrame(caretta, geometry = caretta_points)
# gdfcaretta

eretmochelys_points = gpd.points_from_xy(eretmochelys.longitude, eretmochelys.latitude)
gdferetmochelys = gpd.GeoDataFrame(eretmochelys, geometry = eretmochelys_points)
# gdferetmochelys
In [ ]:
# Se genera un GeoDataFrame con el archivo de corales
dfcorales_points = gpd.points_from_xy(dfcorales.longitude, dfcorales.latitude)

gdfcorales = gpd.GeoDataFrame(dfcorales, geometry = dfcorales_points)
#gdfcorales.describe()

Se importan librerias de graficado¶

In [ ]:
# Importar libreria de graficado
from matplotlib import pyplot as plt

Se importan archivo shp de los estados Unidos Mexicanos¶

In [ ]:
# Importando shape file de los EUM y se asignan los nombres de cada entidad federativa

eum = gpd.read_file(f"dest23gw.shp")
eum["area"] = eum.area
eum["boundary"] = eum.boundary
eum["centroid"] = eum.centroid

entidad = ["Aguascalientes", "Baja California", "Baja California Sur", "Campeche", "Coahuila de Zaragoza", "Colima", "Chiapas"
, "Chihuahua", "CDMX", "Durango", "Guanajuato", "Guerrero", "Hidalgo", "Jalisco", "México", "Michoacán de Ocampo", "Morelos",
"Nayarit", "Nuevo León", "Oaxaca", "Puebla", "Querétaro","Quintana Roo", "San Luis Pototsí", "Sinaloa", "Sonora", "Tabasco",
"Tamaulipas", "Tlaxcala","Veracruz de Ignacio de la Llave", "Yucatán", "Zacatecas"]

estados = eum
a = 0
for a in range(32):
 estados.loc[a, ("provincia")] = entidad[a]
#display(estados)

Seleccionando la entidad federativa Veracruz de Ignacio de la Llave¶

In [ ]:
# El archivo de corales comprende los arrecifes coralinos en la entidad federativa de Veracruz
veracruz = estados[estados.provincia == "Veracruz de Ignacio de la Llave"]
In [ ]:
# Se grafican en mapa los arrecifes de coral del estado de Veracruz
lienzo = veracruz.plot(figsize =(10, 10), color="cyan")
gdfcorales.plot(ax = lienzo, color = "green", marker = "*", label = "Arrecifes Coralinos")
gdfchelonia.plot(ax = lienzo, color ="red", marker = "v", label ="Chelonia mydas")
gdflepidochelys.plot(ax = lienzo, color = "yellow", marker = "<", label = "Lepidochelys kempii")
gdfcaretta.plot(ax = lienzo, color ="black", marker =">", label ="Caretta caretta")
gdferetmochelys.plot(ax = lienzo, color ="purple", marker = "o", label = "Eretmochelys imbricata")
lienzo.set_ylabel("Latitud Norte")
lienzo.set_xlabel("Longitud")
plt.title("Observaciones de Cheloniidae obtenidas de iNaturalis Mexico")
plt.legend()
plt.grid()
plt.savefig("Observaciones.png")
plt.show()
No description has been provided for this image

seaborn librerias¶

In [ ]:
# Usando seaborn utilerias
import seaborn as sns

Histograma de las especies de Chelonidos¶

In [ ]:
# Tamaño del gráfico
#plt.figure(figsize=(6, 6))

fig, (ax0, ax1, ax2) = plt.subplots(nrows = 1, ncols = 3, figsize =(14,5))

# Crear un histograma
ax0.hist(dfchelonidos1['taxon_genus_name'], facecolor = "green")
ax1.hist(dfchelonidos1["Year"], facecolor = "red", bins = 18, edgecolor = "black", rwidth= 0.8)
ax2.hist(dfchelonidos1['Month'], facecolor = "orange", bins = 12, edgecolor = "black" , rwidth= 0.8)

# Título del grafico
ax0.set_title('Histograma Géneros')
ax0.set_xlabel('Género')
ax0.set_ylabel('Ocurrencia')

ax1.set_title('Histograma por Año')
ax1.set_xlabel('Año')
ax1.set_ylabel('Ocurrencia')

ax2.set_title("Histograma por Mes")
ax2.set_xlabel("Mes")
ax2.set_ylabel("Ocurrencia")
plt.savefig("Histograma.png")
plt.show()
No description has been provided for this image

Mapa de las observaciones Año / Mes¶

In [ ]:
plt.figure(figsize=(6, 6))
ax = sns.scatterplot(data=dfchelonidos1, x='Year', y='Month', hue='taxon_genus_name', palette='viridis', alpha=0.8)
ax.set(xlabel='Año', ylabel='Mes del Año')
plt.savefig("Dsitribucion.png")
plt.title("Distribucion Chelonidae en Veracruz iNaturalist Mexico")
Out[ ]:
Text(0.5, 1.0, 'Distribucion Chelonidae en Veracruz iNaturalist Mexico')
No description has been provided for this image

Grafico Seaborn Pairplot de los datos del archivo¶

In [ ]:
plt.figure(figsize=(12, 16))
ax1 = sns.pairplot(dfchelonidos1, hue ="taxon_genus_name", x_vars=["Year", "Month", "longitude"], y_vars=["latitude", "Year"])
plt.savefig("Pairplot.png")
<Figure size 1200x1600 with 0 Axes>
No description has been provided for this image

Opciones de regresiones en seaborn¶

Se opta por un análisis de los datos de "Year" y "latitude"

In [ ]:
ax = sns.lmplot(data= dfchelonidos1, x="Year", y="latitude", hue="taxon_genus_name")
plt.title("Regresion lineal por taxon_genus_name")
plt.savefig("Reglineal1.png")
plt.show()
No description has been provided for this image
In [ ]:
sns.regplot(data= dfchelonidos1, x="Year", y="latitude", order = 3, color= "green")
plt.title("Regresion Lineal Polinomial orden 3")
plt.savefig("Polinonreg.png")
plt.show()
No description has been provided for this image

Cargando librerias de AI. Implementando la regresion lineal, polinomial, etcétera¶

In [ ]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

Regresión Lineal¶

In [ ]:
X = np.array(dfchelonidos1['Year']).reshape(-1, 1)
y = np.array(dfchelonidos1['latitude']).reshape(-1, 1)

# 80 % Datos para entrenamiento y 20 % para el test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 42)
In [ ]:
from sklearn.linear_model import LinearRegression

lin_df = LinearRegression()
lin_df.fit(X_train, y_train)

y_pred = lin_df.predict(X_test)                                     # Predict Linear Model
accuracy_score = lin_df.score(X_test, y_test)                       # Accuracy score
print("Linear Regression Model Accuracy Score: " + "{:.1%}".format(accuracy_score))

from sklearn.metrics import mean_squared_error,r2_score

print("R2 Score: " +"{:.3}".format(r2_score(y_test, y_pred)));
Linear Regression Model Accuracy Score: 3.0%
R2 Score: 0.0303
In [ ]:
plt.scatter(X_test, y_test, color='r')
plt.plot(X_test, y_pred, color='g')
plt.title("Regresión Lineal")
plt.savefig("Reglineal2.png")
plt.show()
No description has been provided for this image

Regresion Lineal Multiple¶

In [ ]:
mlin_df = LinearRegression()
mlin_df = mlin_df.fit(X_train, y_train)
mlin_df.intercept_       # constant b0
mlin_df.coef_            # variable coefficient
Out[ ]:
array([[0.07342474]])
In [ ]:
y_pred = mlin_df.predict(X_train)                                      # predict Multi linear Reg model
rmse = np.sqrt(mean_squared_error(y_train, mlin_df.predict(X_train)))
print("RMSE Score for Test set: " +"{:.2}".format(rmse))
print("R2 Score for Test set: " +"{:.3}".format(r2_score(y_train, y_pred)));      # this is test error score
RMSE Score for Test set: 1.2
R2 Score for Test set: 0.0305
In [ ]:
# cross validation method is giving better and clear result

cross_val_score(mlin_df, X, y, cv=10, scoring = 'r2').mean()
Out[ ]:
-0.3748731049698884
In [ ]:
mlin_df.score(X_train, y_train)      # r2 value
Out[ ]:
0.030492579815917464
In [ ]:
np.sqrt(-cross_val_score(mlin_df,
                X_train,
                y_train,
                cv=10,
                scoring = 'neg_mean_squared_error')).mean()
Out[ ]:
1.2170443161570703
In [ ]:
# Finally draw figure of Multiple Linear Regression Model

import operator

plt.scatter(X_train, y_train, s=100)

# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(X_train,y_pred), key=sort_axis)
X_test, y_pred = zip(*sorted_zip)
plt.plot(X_train, y_train, color='g')
#plt.plot(X_train, y_pred, color ="green")
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.title("Regresión Lineal Múltiple")
plt.savefig("Multilineal.png")
plt.show()
No description has been provided for this image

Decision Tree Regressor¶

In [ ]:
from sklearn.tree import DecisionTreeRegressor

dt_reg = DecisionTreeRegressor(random_state= 0)          # create  DecisionTreeReg with sklearn
dt_reg.fit(X_train,y_train)
Out[ ]:
DecisionTreeRegressor(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(random_state=0)
In [ ]:
dt_predict = dt_reg.predict(X_train)
#dt_predict.mean()
In [ ]:
plt.scatter(X_train,y_train, color="red")                           # scatter draw
X_grid = np.arange(min(np.array(X_train)),max(np.array(X_train)), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.plot(X_grid,dt_reg.predict(X_grid),color="g")                 # line draw
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.title("Decision Tree Model")
plt.savefig("Desiciontreechart.png")
plt.show()
<ipython-input-68-060ab9508324>:2: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  X_grid = np.arange(min(np.array(X_train)),max(np.array(X_train)), 0.01)
No description has been provided for this image
In [ ]:
rmse = np.sqrt(mean_squared_error(y_train,dt_predict))
r2 = r2_score(y_train,dt_predict)
print("RMSE Score for Test set: " +"{:.2}".format(rmse))
print("R2 Score for Test set: " +"{:.2}".format(r2))
RMSE Score for Test set: 1.1
R2 Score for Test set: 0.17
In [ ]:
# predicting a new value
A = 2012
# test the output by changing values, like 3750
y_pred = dt_reg.predict([[A]])

# print the predicted price
print("Predicción de Latitud Norte: % d\n"% y_pred, "Año:   ", A)
Predicción de Latitud Norte:  19
 Año:    2012
<ipython-input-72-17c74fe7f4db>:7: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  print("Predicción de Latitud Norte: % d\n"% y_pred, "Año:   ", A)

Generando diagrama de flujo o árbol de desiciones

In [ ]:
# import export_graphviz
from sklearn.tree import export_graphviz

# export the decision tree to a tree.dot file
# for visualizing the plot easily anywhere
export_graphviz(dt_reg, out_file ='Decisiontree.dot',
			feature_names =['Año'])
In [ ]:
import pydot

(graph,) = pydot.graph_from_dot_file('Decisiontree.dot')
graph.write_png('Decisiontree.png')

Regresion Polinomial¶

In [ ]:
from sklearn.preprocessing import PolynomialFeatures

poly_df = PolynomialFeatures(degree = 4)
transform_poly = poly_df.fit_transform(X_train)

linreg2 = LinearRegression()
linreg2.fit(transform_poly,y_train)

polynomial_predict = linreg2.predict(transform_poly)
In [ ]:
rmse = np.sqrt(mean_squared_error(y_train,polynomial_predict))
r2 = r2_score(y_train,polynomial_predict)
print("RMSE Score for Test set: " +"{:.2}".format(rmse))
print("R2 Score for Test set: " +"{:.2}".format(r2))
RMSE Score for Test set: 1.2
R2 Score for Test set: 0.046
In [ ]:
plt.scatter(X_train, y_train, s=50, color = "green")
# sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(X_train,polynomial_predict), key=sort_axis)
X_train, polynomial_predict = zip(*sorted_zip)
plt.plot(X_train, polynomial_predict, color='m')
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.title("Regresión Polinomial grado 4")
plt.savefig("Polinomial.png")
plt.show()
No description has been provided for this image

Random Forest Regressor¶

In [ ]:
from sklearn.ensemble import RandomForestRegressor

rf_reg = RandomForestRegressor(n_estimators=5, random_state=0)
rf_reg.fit(X_train,y_train)
rf_predict = rf_reg.predict(X_train)
#rf_predict.mean()
/usr/local/lib/python3.10/dist-packages/sklearn/base.py:1152: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return fit_method(estimator, *args, **kwargs)
In [ ]:
plt.scatter(X_train,y_train, color="red")                           # scatter draw
X_grid = np.arange(min(np.array(X_train)),max(np.array(X_train)), 0.01)
X_grid = X_grid.reshape((len(X_grid), 1))
plt.plot(X_grid,rf_reg.predict(X_grid),color="b")                 # line draw
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.title("Random Forest Regressor")
plt.savefig("Randomforest.png")
plt.show()
<ipython-input-46-efe0b1bcaed6>:2: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  X_grid = np.arange(min(np.array(X_train)),max(np.array(X_train)), 0.01)
No description has been provided for this image
In [ ]:
mse = np.sqrt(mean_squared_error(y_train,rf_predict))
r2 = r2_score(y_train,rf_predict)
print("RMSE Score for Test set: " +"{:.2}".format(rmse))
print("R2 Score for Test set: " +"{:.2}".format(r2))
RMSE Score for Test set: 1.2
R2 Score for Test set: 0.043

Algoritmo K-Means¶

In [ ]:
#Algoritmo K-Means
from sklearn.cluster import KMeans

head = ["Year", "latitude"]

   # Nuevo DataFrame
df2 = dfchelonidos1[head]

# Cantidad de grupos a producir
groups=3

# Generación y entrenamiento del modelo.
kmeans = KMeans(n_clusters=groups).fit(df2)

# Uso del modelo con los datos.
y_kmeans = kmeans.predict(df2)

#Graficación de los resultados
plt.scatter(df2['Year'], df2['latitude'], c=y_kmeans, s=20, cmap='viridis')
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=70, alpha=0.5)
plt.ylabel('Latitude Norte')
plt.xlabel('Año')
plt.title("Primera aproximación al valor de k en el clustering 3 grupos")
plt.savefig("Kmeans1st.png")
plt.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
No description has been provided for this image
In [ ]:
y_kmeans
Out[ ]:
array([1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 0, 2, 2, 0, 0, 0, 2,
       2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2,
       0, 0, 1, 1, 1, 2, 0, 0, 2, 2, 2, 2, 1, 0, 1, 0, 0, 0, 2, 0, 0, 1,
       0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32)

Centros de Aglomeramiento¶

In [ ]:
# Imprimiendo las coordenadas de "centers" obtenidos por el método K-Means
print("Coordenadas de centros de aglomeramiento seleccionado 3 grupos (Año y Latitud):", "\n", centers)
Coordenadas de centros de aglomeramiento seleccionado 3 grupos (Año y Latitud): 
 [[2018.32335329   19.55299184]
 [2022.37692308   20.2762124 ]
 [2012.8          20.12644468]]

Seleccion de K por el Elbow Method¶

In [ ]:
# DataFrame df2 a arreglo lineal usando librerias Numpy
X0 = df2.to_numpy()

# Se calula el mejor k con el Algoritmo del Codo.
# calculamos la distorcion para diferentes k

# Se calcula el error cuadrado con k diferentes (distorción)
distortions = []
for i in range(1, 11):
    km = KMeans(
        n_clusters=i, init='random',
        n_init=10, max_iter=300,
        tol=1e-04, random_state=0
    )
    km.fit(X0)
    distortions.append(km.inertia_)

# plot
plt.plot(range(1, 11), distortions, marker='o')
plt.xlabel('Número de agrupamientos')
plt.ylabel('Distorción')
plt.title("Selección del k por el método del codo")
plt.savefig("Kmean_elbow.png")
plt.show()
No description has been provided for this image

Centros de agrupamientos despues de seleccion K-means¶

In [ ]:
# Cantidad de grupos a producir
groups=4

# Generación y entrenamiento del modelo.
kmeans = KMeans(n_clusters=groups).fit(df2)

# Uso del modelo con los datos.
y_kmeans = kmeans.predict(df2)

#Graficación de los resultados
plt.scatter(df2['Year'], df2['latitude'], c=y_kmeans, s=20, cmap='viridis')
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=70, alpha=0.5)
plt.ylabel('Latitude Norte')
plt.xlabel('Año')
plt.title("Centros de aglomeramiento o clustering con un valor k = 4")
plt.savefig("Kmeans2d.png")
plt.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
No description has been provided for this image
In [ ]:
print("Coordenadas de los centros de agrupamiento con un k =4 (Año y Latitud)", "\n", centers)
Coordenadas de los centros de agrupamiento con un k =4 (Año y Latitud) 
 [[2012.8          20.12644468]
 [2019.93333333   20.34035097]
 [2022.97802198   20.2110721 ]
 [2017.67326733   19.07240433]]

Agglomerative Clustering¶

In [ ]:
from sklearn.cluster import AgglomerativeClustering

# define the Agglomerative Clustering model
model = AgglomerativeClustering(n_clusters=4)
# fit model and predict clusters
yhat = model.fit_predict(X0)
# retrieve unique clusters
clusters = np.unique(yhat)
# create scatter plot for samples from each cluster
for cluster in clusters:
 # get row indexes for samples with this cluster
 row_ix = np.where(yhat == cluster)
 # create scatter of these samples
 plt.scatter(X0[row_ix, 0], X0[row_ix, 1])
# show the plot
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.title("Gráfico de Aglomeramiento")
plt.savefig("Clusteraglo.png")
plt.show()
No description has been provided for this image

Propagación de Afinidad en agrupamientos¶

In [ ]:
# Affinity propagation
from sklearn import metrics
from sklearn.cluster import AffinityPropagation

af = AffinityPropagation(random_state=0)
af.fit(X0)
cluster_centers_indices = af.cluster_centers_indices_
labels = af.labels_
centroids = af.cluster_centers_
n_clusters_ = len(cluster_centers_indices)
print("Estimated number of clusters: %d" % n_clusters_)

# plot del resultado



colors = plt.cycler("color", plt.cm.viridis(np.linspace(0, 1, n_clusters_)))

for k, col in zip(range(n_clusters_), colors):
    class_members = labels == k
    cluster_center = X0[cluster_centers_indices[k]]
    plt.scatter(
        X0[class_members, 0], X0[class_members, 1], color=col["color"], marker="."
    )
    plt.scatter(
        cluster_center[0], cluster_center[1], s=14, color=col["color"], marker="o"
    )
    for x in X0[class_members]:
        plt.plot(
            [cluster_center[0], x[0]], [cluster_center[1], x[1]], color=col["color"]
        )

plt.title("Número de clusters: %d" % n_clusters_)
plt.xlabel("Año")
plt.ylabel("Latitud Norte")
plt.savefig("Afinitycluster.png")
plt.show()
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_affinity_propagation.py:142: ConvergenceWarning: Affinity propagation did not converge, this model may return degenerate cluster centers and labels.
  warnings.warn(
Estimated number of clusters: 19
No description has been provided for this image
In [ ]:
display(centroids)
array([[2016.        ,   20.09333514],
       [2007.        ,   18.59238795],
       [2017.        ,   18.11792346],
       [2011.        ,   20.12794831],
       [2018.        ,   18.10142267],
       [2019.        ,   20.50839907],
       [2018.        ,   20.32091736],
       [2019.        ,   19.46657916],
       [2014.        ,   19.44047591],
       [2020.        ,   20.16581915],
       [2020.        ,   18.56772893],
       [2021.        ,   20.53207824],
       [2018.        ,   22.18332345],
       [2014.        ,   22.14812337],
       [2023.        ,   18.89283089],
       [2021.        ,   22.179694  ],
       [2023.        ,   22.02307634],
       [2023.        ,   20.39710079],
       [2024.        ,   20.17598433]])

Nearest Neighbors Classification¶

Insertando columna en el DataFrame "chelonidos1" para asignar valor a cada taxon_genus_name:

1 = caretta
2 = chelonia
3 = eretmochelys
4 = lepidochelys

In [ ]:
dfchelonidos1.insert(11, "target", 0)

dfchelonidos1.loc[(dfchelonidos1["taxon_genus_name"] == "Chelonia"), "target"]  = 1
dfchelonidos1.loc[(dfchelonidos1["taxon_genus_name"] == "Caretta"), "target"]  = 2
dfchelonidos1.loc[(dfchelonidos1["taxon_genus_name"] == "Eretmochelys"), "target"]  = 3
dfchelonidos1.loc[(dfchelonidos1["taxon_genus_name"] == "Lepidochelys"), "target"]  = 4
In [ ]:
from sklearn.model_selection import train_test_split

X = dfchelonidos1[[ "Year", "latitude"]]
y = dfchelonidos1.target
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size= 0.80, stratify=y, random_state=10)
In [ ]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

clf = Pipeline(
    steps=[("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=11))]
)
In [ ]:
import matplotlib.pyplot as plt

from sklearn.inspection import DecisionBoundaryDisplay

_, axs = plt.subplots(ncols=2, figsize=(12, 5))

target_names = ["Caretta" , "Chelonia", "Eretmochelys", "Lepidochelys"]

for ax, weights in zip(axs, ("uniform", "distance")):
    clf.set_params(knn__weights=weights).fit(X_train, y_train)
    disp = DecisionBoundaryDisplay.from_estimator(
        clf,
        X_test,
        response_method="predict",
        plot_method="pcolormesh",
       # xlabel=dfchelonidos1.feature_names[4],
       # ylabel=dfchelonidos1.feature_names[3],
        xlabel = "Año",
        ylabel = "Latitud Norte",
        shading="auto",
        alpha=0.5,
        ax=ax,
    )
    scatter = disp.ax_.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, edgecolors="k")
    disp.ax_.legend(
        scatter.legend_elements()[0],
        #iris.target_names,
        #dfchelonidos1["taxon_genus_name"],
        target_names,
        loc="lower left",
        title="Classes",
    )
    _ = disp.ax_.set_title(
        f"4-Class classification\n(k={clf[-1].n_neighbors}, weights={weights!r})"
    )

plt.show()
No description has been provided for this image