Source code for graforvfl.network.gfo_rvfl_tuner

#!/usr/bin/env python
# Created by "Thieu" at 22:28, 02/04/2025 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import itertools
import numpy as np
import random
from graforvfl.shared.scorer import get_all_classification_metrics, get_all_regression_metrics
from sklearn.model_selection import train_test_split
from graforvfl.network.gfo_rvfl_cv import GfoRvflCV


[docs]class GfoRvflTuner: """ Hyperparameter tuner for the metaheuristic algorithm used in GfoRvflCV. Parameters ---------- problem_type : str, default="regression" The type of problem you are trying to solve (regression or classification) bounds : list, default=None The boundary for parameters of RVFL network. cv : int, default=None The k fold cross-validation method. scoring : str The name of objective for the problem, also depend on the problem is classification and regression. optim : str or instance of Optimizer class (from Mealpy library), default = "BaseGA" The Metaheuristic Algorithm that use to solve the feature selection problem. Current supported list, please check it here: https://github.com/thieu1995/mealpy. If a custom optimizer is passed, make sure it is an instance of `Optimizer` class. optim_param_grid : dict Dictionary of hyperparameter ranges for the metaheuristic algorithm. If `dict` is passed, make sure it has at least `epoch` and `pop_size` parameters. scoring : str, default="MSE" The evaluation metric used to compare different optimization settings. cv : int, default=None Number of cross-validation folds. search_type : str, default="random" - "grid" for exhaustive grid search. - "random" for randomized search. n_iter : int, default=10 Number of random search iterations (only used when search_type="random"). seed: int, default=None Determines random number generation for weights and bias initialization. Pass an int for reproducible results across multiple function calls. verbose : bool, default=False Whether to print progress messages to stdout. mode : str, optional Mode for optimization (default is 'single'). n_workers : int, optional Number of workers for parallel processing (default is None). termination : any, optional Termination criteria for optimization (default is None). Attributes ---------- best_optim_params : dict The best found hyperparameters for the metaheuristic optimizer. best_score : float The best evaluation score. best_searcher : GfoRvflCV The best trained model using the optimized metaheuristic parameters. """ SUPPORTED_CLS_METRICS = get_all_classification_metrics() SUPPORTED_REG_METRICS = get_all_regression_metrics() def __init__(self, problem_type="regression", bounds=None, optim="OriginalWOA", optim_param_grid=None, scoring="MSE", cv=None, search_type="random", n_iter=10, seed=None, verbose=True, mode='single', n_workers=None, termination=None, **kwargs): self.problem_type = problem_type self.bounds = bounds self.optim = optim self.optim_param_grid = optim_param_grid self.scoring = scoring self.cv = cv self.search_type = search_type self.n_iter = n_iter self.seed = seed self.verbose = verbose self.generator = np.random.default_rng(seed) self.mode = mode self.n_workers = n_workers self.termination = termination self.best_optim_params = None self.best_searcher = None self.kwargs = kwargs if problem_type == "regression": self.minmax = self.SUPPORTED_REG_METRICS[scoring] else: self.minmax = self.SUPPORTED_CLS_METRICS[scoring] if self.minmax == "min": self.best_score = np.inf else: self.best_score = -np.inf def _get_param_combinations(self): """Generate parameter combinations based on search type.""" param_keys = list(self.optim_param_grid.keys()) param_values = list(self.optim_param_grid.values()) if self.search_type == "grid": return [dict(zip(param_keys, values)) for values in itertools.product(*param_values)] elif self.search_type == "random": random.seed(self.seed) return [ {k: random.choice(v) for k, v in self.optim_param_grid.items()} for _ in range(self.n_iter) ] else: raise ValueError("search_type must be 'grid' or 'random'.")
[docs] def fit(self, X, y): """Optimize the metaheuristic parameters for GfoRvflCV.""" param_combinations = self._get_param_combinations() for idx, optim_params in enumerate(param_combinations): if self.verbose: print(f"Testing {idx+1}/{len(param_combinations)}: {optim_params}") # Clone base model and update optimization parameters model = GfoRvflCV(problem_type=self.problem_type, bounds=self.bounds, optim=self.optim, optim_params=optim_params, scoring=self.scoring, cv=self.cv, seed=self.seed, verbose=self.verbose, mode=self.mode, n_workers=self.n_workers, termination=self.termination, **self.kwargs) # Perform cross-validation scores = [] for _ in range(self.cv): X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=1.0/self.cv, random_state=self.seed) model.fit(X_train, y_train) score = model.best_estimator.score(X_val, y_val) scores.append(score) avg_score = np.mean(scores) # Update best parameters if ((self.minmax == "max" and avg_score > self.best_score) or (self.minmax == "min" and avg_score < self.best_score)): self.best_score = avg_score self.best_optim_params = optim_params self.best_searcher = model if self.verbose: print(f"Best optimizer parameters: {self.best_optim_params}") print(f"Best score: {self.best_score}") return self
[docs] def predict(self, X): """Predict using the best found estimator.""" if self.best_searcher is None: raise ValueError("Tuner has not been fitted yet. Call fit() first.") return self.best_searcher.predict(X)
[docs] def score(self, X, y): """Evaluate the best model on given data.""" if self.best_searcher is None: raise ValueError("Tuner has not been fitted yet. Call fit() first.") return self.best_searcher.score(X, y)