[Machine_learning]Elastic Net regression

Recarrdo 2023. 10. 16. 17:02
코드

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import ElasticNet, ElasticNetCV
import numpy as np

# Load the dataset and preprocess
data = pd.read_csv('c:/users/hjhjhj/desktop/shell.csv')
data['STATUS_BINARY'] = data['STATUS_CONF'].apply(lambda x: 1 if x == "validated" else 0)
data_dummies = pd.get_dummies(data.drop(columns=['STATUS_CONF', 'IP_INFO']), drop_first=True)
X = data_dummies.drop('STATUS_BINARY', axis=1)
y = data_dummies['STATUS_BINARY']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ElasticNetCV: Elastic Net regression with built-in cross-validation to find the best alpha and l1_ratio
elastic_cv = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], alphas=np.logspace(-6, 6, 13), cv=5, max_iter=10000)
elastic_cv.fit(X_train, y_train)

# Using the best alpha and l1_ratio found by ElasticNetCV to make predictions with increased max_iter
elastic_model = ElasticNet(alpha=elastic_cv.alpha_, l1_ratio=elastic_cv.l1_ratio_, max_iter=10000)
elastic_model.fit(X_train, y_train)
elastic_y_pred = elastic_model.predict(X_test)
elastic_mse = mean_squared_error(y_test, elastic_y_pred)

# Print the results
print(f"Optimal alpha for Elastic Net regression: {elastic_cv.alpha_}")
print(f"Optimal l1_ratio for Elastic Net regression: {elastic_cv.l1_ratio_}")
print(f"MSE for the test set using Elastic Net regression: {elastic_mse:.6f}")