diff --git a/CHANGES.md b/CHANGES.md index 909bebc..8a15902 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +# version 0.17.2 + +- `self.n_classes_ = len(np.unique(y))` # for compatibility with sklearn + # version 0.17.1 - `preprocess`ing for all `LazyDeep*` diff --git a/nnetsauce/boosting/adaBoostClassifier.py b/nnetsauce/boosting/adaBoostClassifier.py index 0b190f5..bd79c27 100644 --- a/nnetsauce/boosting/adaBoostClassifier.py +++ b/nnetsauce/boosting/adaBoostClassifier.py @@ -248,6 +248,7 @@ def fit(self, X, y, sample_weight=None, **kwargs): # training n, p = X.shape self.n_classes = len(np.unique(y)) + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn if sample_weight is None: w_m = np.repeat(1.0 / n, n) diff --git a/nnetsauce/custom/customClassifier.py b/nnetsauce/custom/customClassifier.py index 489dfe1..ecdfbe9 100644 --- a/nnetsauce/custom/customClassifier.py +++ b/nnetsauce/custom/customClassifier.py @@ -187,6 +187,7 @@ def fit(self, X, y, sample_weight=None, **kwargs): """ output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn # if sample_weights, else: (must use self.row_index) if sample_weight is not None: diff --git a/nnetsauce/deep/deepClassifier.py b/nnetsauce/deep/deepClassifier.py index 4ef2a95..79958de 100644 --- a/nnetsauce/deep/deepClassifier.py +++ b/nnetsauce/deep/deepClassifier.py @@ -101,6 +101,8 @@ def fit(self, X, y): if isinstance(X, np.ndarray): X = pd.DataFrame(X) + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + # init layer self.stacked_obj = CustomClassifier( obj=self.stacked_obj, diff --git a/nnetsauce/glm/glmClassifier.py b/nnetsauce/glm/glmClassifier.py index bcba564..78348db 100644 --- a/nnetsauce/glm/glmClassifier.py +++ b/nnetsauce/glm/glmClassifier.py @@ -221,6 +221,8 @@ def fit(self, X, y, **kwargs): y ), "y must contain only integers" # change is_factor and subsampling everywhere + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + self.beta_ = None n, p = X.shape diff --git a/nnetsauce/lazypredict/lazydeepClassifier.py b/nnetsauce/lazypredict/lazydeepClassifier.py index 6564ded..a935125 100644 --- a/nnetsauce/lazypredict/lazydeepClassifier.py +++ b/nnetsauce/lazypredict/lazydeepClassifier.py @@ -240,7 +240,7 @@ def fit(self, X_train, X_test, y_train, y_test): categorical_low, categorical_high = get_card_split( X_train, categorical_features ) - + if self.preprocess is True: preprocessor = ColumnTransformer( transformers=[ @@ -301,72 +301,49 @@ def fit(self, X_train, X_test, y_train, y_test): ) ] ) - + if self.preprocess is True: - - for name, model in tqdm(self.classifiers): # do parallel exec - - other_args = ( + + for name, model in tqdm(self.classifiers): # do parallel exec + + other_args = ( {} ) # use this trick for `random_state` too --> refactor - try: - if ( - "n_jobs" in model().get_params().keys() - and name.find("LogisticRegression") == -1 - ): - other_args["n_jobs"] = self.n_jobs - except Exception: - pass - - start = time.time() - - try: - if "random_state" in model().get_params().keys(): - layer_clf = CustomClassifier( - obj=model(random_state=self.random_state), - n_hidden_features=self.n_hidden_features, - activation_name=self.activation_name, - a=self.a, - nodes_sim=self.nodes_sim, - bias=self.bias, - dropout=self.dropout, - direct_link=self.direct_link, - n_clusters=self.n_clusters, - cluster_encode=self.cluster_encode, - type_clust=self.type_clust, - type_scaling=self.type_scaling, - col_sample=self.col_sample, - row_sample=self.row_sample, - seed=self.seed, - backend=self.backend, - ) + try: + if ( + "n_jobs" in model().get_params().keys() + and name.find("LogisticRegression") == -1 + ): + other_args["n_jobs"] = self.n_jobs + except Exception: + pass - else: - layer_clf = CustomClassifier( - obj=model(), - n_hidden_features=self.n_hidden_features, - activation_name=self.activation_name, - a=self.a, - nodes_sim=self.nodes_sim, - bias=self.bias, - dropout=self.dropout, - direct_link=self.direct_link, - n_clusters=self.n_clusters, - cluster_encode=self.cluster_encode, - type_clust=self.type_clust, - type_scaling=self.type_scaling, - col_sample=self.col_sample, - row_sample=self.row_sample, - seed=self.seed, - backend=self.backend, - ) + start = time.time() - layer_clf.fit(X_train, y_train) + try: + if "random_state" in model().get_params().keys(): + layer_clf = CustomClassifier( + obj=model(random_state=self.random_state), + n_hidden_features=self.n_hidden_features, + activation_name=self.activation_name, + a=self.a, + nodes_sim=self.nodes_sim, + bias=self.bias, + dropout=self.dropout, + direct_link=self.direct_link, + n_clusters=self.n_clusters, + cluster_encode=self.cluster_encode, + type_clust=self.type_clust, + type_scaling=self.type_scaling, + col_sample=self.col_sample, + row_sample=self.row_sample, + seed=self.seed, + backend=self.backend, + ) - for _ in range(self.n_layers): - layer_clf = deepcopy( - CustomClassifier( - obj=layer_clf, + else: + layer_clf = CustomClassifier( + obj=model(), n_hidden_features=self.n_hidden_features, activation_name=self.activation_name, a=self.a, @@ -383,119 +360,99 @@ def fit(self, X_train, X_test, y_train, y_test): seed=self.seed, backend=self.backend, ) + + layer_clf.fit(X_train, y_train) + + for _ in range(self.n_layers): + layer_clf = deepcopy( + CustomClassifier( + obj=layer_clf, + n_hidden_features=self.n_hidden_features, + activation_name=self.activation_name, + a=self.a, + nodes_sim=self.nodes_sim, + bias=self.bias, + dropout=self.dropout, + direct_link=self.direct_link, + n_clusters=self.n_clusters, + cluster_encode=self.cluster_encode, + type_clust=self.type_clust, + type_scaling=self.type_scaling, + col_sample=self.col_sample, + row_sample=self.row_sample, + seed=self.seed, + backend=self.backend, + ) + ) + + pipe = Pipeline( + [ + ("preprocessor", preprocessor), + ("classifier", layer_clf), + ] ) - pipe = Pipeline( - [ - ("preprocessor", preprocessor), - ("classifier", layer_clf), - ]) - - pipe.fit(X_train, y_train) - self.models[name] = pipe - y_pred = pipe.predict(X_test) - accuracy = accuracy_score(y_test, y_pred, normalize=True) - b_accuracy = balanced_accuracy_score(y_test, y_pred) - f1 = f1_score(y_test, y_pred, average="weighted") - try: - roc_auc = roc_auc_score(y_test, y_pred) + pipe.fit(X_train, y_train) + self.models[name] = pipe + y_pred = pipe.predict(X_test) + accuracy = accuracy_score(y_test, y_pred, normalize=True) + b_accuracy = balanced_accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average="weighted") + try: + roc_auc = roc_auc_score(y_test, y_pred) + except Exception as exception: + roc_auc = None + if self.ignore_warnings is False: + print("ROC AUC couldn't be calculated for " + name) + print(exception) + names.append(name) + Accuracy.append(accuracy) + B_Accuracy.append(b_accuracy) + ROC_AUC.append(roc_auc) + F1.append(f1) + TIME.append(time.time() - start) + if self.custom_metric is not None: + custom_metric = self.custom_metric(y_test, y_pred) + CUSTOM_METRIC.append(custom_metric) + if self.verbose > 0: + if self.custom_metric is not None: + print( + { + "Model": name, + "Accuracy": accuracy, + "Balanced Accuracy": b_accuracy, + "ROC AUC": roc_auc, + "F1 Score": f1, + self.custom_metric.__name__: custom_metric, + "Time taken": time.time() - start, + } + ) + else: + print( + { + "Model": name, + "Accuracy": accuracy, + "Balanced Accuracy": b_accuracy, + "ROC AUC": roc_auc, + "F1 Score": f1, + "Time taken": time.time() - start, + } + ) + if self.predictions: + predictions[name] = y_pred except Exception as exception: - roc_auc = None if self.ignore_warnings is False: - print("ROC AUC couldn't be calculated for " + name) + print(name + " model failed to execute") print(exception) - names.append(name) - Accuracy.append(accuracy) - B_Accuracy.append(b_accuracy) - ROC_AUC.append(roc_auc) - F1.append(f1) - TIME.append(time.time() - start) - if self.custom_metric is not None: - custom_metric = self.custom_metric(y_test, y_pred) - CUSTOM_METRIC.append(custom_metric) - if self.verbose > 0: - if self.custom_metric is not None: - print( - { - "Model": name, - "Accuracy": accuracy, - "Balanced Accuracy": b_accuracy, - "ROC AUC": roc_auc, - "F1 Score": f1, - self.custom_metric.__name__: custom_metric, - "Time taken": time.time() - start, - } - ) - else: - print( - { - "Model": name, - "Accuracy": accuracy, - "Balanced Accuracy": b_accuracy, - "ROC AUC": roc_auc, - "F1 Score": f1, - "Time taken": time.time() - start, - } - ) - if self.predictions: - predictions[name] = y_pred - except Exception as exception: - if self.ignore_warnings is False: - print(name + " model failed to execute") - print(exception) - - - else: # no preprocessing - - for name, model in tqdm(self.classifiers): # do parallel exec - start = time.time() - try: - if "random_state" in model().get_params().keys(): - layer_clf = CustomClassifier( - obj=model(random_state=self.random_state), - n_hidden_features=self.n_hidden_features, - activation_name=self.activation_name, - a=self.a, - nodes_sim=self.nodes_sim, - bias=self.bias, - dropout=self.dropout, - direct_link=self.direct_link, - n_clusters=self.n_clusters, - cluster_encode=self.cluster_encode, - type_clust=self.type_clust, - type_scaling=self.type_scaling, - col_sample=self.col_sample, - row_sample=self.row_sample, - seed=self.seed, - backend=self.backend, - ) - else: - layer_clf = CustomClassifier( - obj=model(), - n_hidden_features=self.n_hidden_features, - activation_name=self.activation_name, - a=self.a, - nodes_sim=self.nodes_sim, - bias=self.bias, - dropout=self.dropout, - direct_link=self.direct_link, - n_clusters=self.n_clusters, - cluster_encode=self.cluster_encode, - type_clust=self.type_clust, - type_scaling=self.type_scaling, - col_sample=self.col_sample, - row_sample=self.row_sample, - seed=self.seed, - backend=self.backend, - ) - - layer_clf.fit(X_train, y_train) + else: # no preprocessing - for _ in range(self.n_layers): - layer_clf = deepcopy( - CustomClassifier( - obj=layer_clf, + for name, model in tqdm(self.classifiers): # do parallel exec + start = time.time() + try: + if "random_state" in model().get_params().keys(): + layer_clf = CustomClassifier( + obj=model(random_state=self.random_state), n_hidden_features=self.n_hidden_features, activation_name=self.activation_name, a=self.a, @@ -512,63 +469,106 @@ def fit(self, X_train, X_test, y_train, y_test): seed=self.seed, backend=self.backend, ) - ) - # layer_clf.fit(X_train, y_train) + else: + layer_clf = CustomClassifier( + obj=model(), + n_hidden_features=self.n_hidden_features, + activation_name=self.activation_name, + a=self.a, + nodes_sim=self.nodes_sim, + bias=self.bias, + dropout=self.dropout, + direct_link=self.direct_link, + n_clusters=self.n_clusters, + cluster_encode=self.cluster_encode, + type_clust=self.type_clust, + type_scaling=self.type_scaling, + col_sample=self.col_sample, + row_sample=self.row_sample, + seed=self.seed, + backend=self.backend, + ) - layer_clf.fit(X_train, y_train) + layer_clf.fit(X_train, y_train) + + for _ in range(self.n_layers): + layer_clf = deepcopy( + CustomClassifier( + obj=layer_clf, + n_hidden_features=self.n_hidden_features, + activation_name=self.activation_name, + a=self.a, + nodes_sim=self.nodes_sim, + bias=self.bias, + dropout=self.dropout, + direct_link=self.direct_link, + n_clusters=self.n_clusters, + cluster_encode=self.cluster_encode, + type_clust=self.type_clust, + type_scaling=self.type_scaling, + col_sample=self.col_sample, + row_sample=self.row_sample, + seed=self.seed, + backend=self.backend, + ) + ) - self.models[name] = layer_clf - y_pred = layer_clf.predict(X_test) - accuracy = accuracy_score(y_test, y_pred, normalize=True) - b_accuracy = balanced_accuracy_score(y_test, y_pred) - f1 = f1_score(y_test, y_pred, average="weighted") - try: - roc_auc = roc_auc_score(y_test, y_pred) + # layer_clf.fit(X_train, y_train) + + layer_clf.fit(X_train, y_train) + + self.models[name] = layer_clf + y_pred = layer_clf.predict(X_test) + accuracy = accuracy_score(y_test, y_pred, normalize=True) + b_accuracy = balanced_accuracy_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred, average="weighted") + try: + roc_auc = roc_auc_score(y_test, y_pred) + except Exception as exception: + roc_auc = None + if self.ignore_warnings is False: + print("ROC AUC couldn't be calculated for " + name) + print(exception) + names.append(name) + Accuracy.append(accuracy) + B_Accuracy.append(b_accuracy) + ROC_AUC.append(roc_auc) + F1.append(f1) + TIME.append(time.time() - start) + if self.custom_metric is not None: + custom_metric = self.custom_metric(y_test, y_pred) + CUSTOM_METRIC.append(custom_metric) + if self.verbose > 0: + if self.custom_metric is not None: + print( + { + "Model": name, + "Accuracy": accuracy, + "Balanced Accuracy": b_accuracy, + "ROC AUC": roc_auc, + "F1 Score": f1, + self.custom_metric.__name__: custom_metric, + "Time taken": time.time() - start, + } + ) + else: + print( + { + "Model": name, + "Accuracy": accuracy, + "Balanced Accuracy": b_accuracy, + "ROC AUC": roc_auc, + "F1 Score": f1, + "Time taken": time.time() - start, + } + ) + if self.predictions: + predictions[name] = y_pred except Exception as exception: - roc_auc = None if self.ignore_warnings is False: - print("ROC AUC couldn't be calculated for " + name) + print(name + " model failed to execute") print(exception) - names.append(name) - Accuracy.append(accuracy) - B_Accuracy.append(b_accuracy) - ROC_AUC.append(roc_auc) - F1.append(f1) - TIME.append(time.time() - start) - if self.custom_metric is not None: - custom_metric = self.custom_metric(y_test, y_pred) - CUSTOM_METRIC.append(custom_metric) - if self.verbose > 0: - if self.custom_metric is not None: - print( - { - "Model": name, - "Accuracy": accuracy, - "Balanced Accuracy": b_accuracy, - "ROC AUC": roc_auc, - "F1 Score": f1, - self.custom_metric.__name__: custom_metric, - "Time taken": time.time() - start, - } - ) - else: - print( - { - "Model": name, - "Accuracy": accuracy, - "Balanced Accuracy": b_accuracy, - "ROC AUC": roc_auc, - "F1 Score": f1, - "Time taken": time.time() - start, - } - ) - if self.predictions: - predictions[name] = y_pred - except Exception as exception: - if self.ignore_warnings is False: - print(name + " model failed to execute") - print(exception) if self.custom_metric is None: scores = pd.DataFrame( diff --git a/nnetsauce/lazypredict/lazydeepRegressor.py b/nnetsauce/lazypredict/lazydeepRegressor.py index 2de2eb7..e090d0f 100644 --- a/nnetsauce/lazypredict/lazydeepRegressor.py +++ b/nnetsauce/lazypredict/lazydeepRegressor.py @@ -91,7 +91,7 @@ class LazyDeepRegressor(Custom, RegressorMixin): estimators: list, optional (default='all') list of Estimators names or just 'all' (default='all') - preprocess: bool + preprocess: bool preprocessing is done when set to True n_jobs : int, when possible, run in parallel @@ -264,7 +264,7 @@ def fit(self, X_train, X_test, y_train, y_test): ] if self.preprocess is True: - + for name, model in tqdm(self.regressors): # do parallel exec start = time.time() try: @@ -305,7 +305,7 @@ def fit(self, X_train, X_test, y_train, y_test): row_sample=self.row_sample, seed=self.seed, backend=self.backend, - ) + ) for _ in range(self.n_layers): layer_regr = deepcopy( @@ -331,9 +331,13 @@ def fit(self, X_train, X_test, y_train, y_test): layer_regr.fit(X_train, y_train) - pipe = Pipeline(steps=[("preprocessor", preprocessor), - ("regressor", layer_regr)]) - + pipe = Pipeline( + steps=[ + ("preprocessor", preprocessor), + ("regressor", layer_regr), + ] + ) + pipe.fit(X_train, y_train) self.models[name] = pipe @@ -376,7 +380,7 @@ def fit(self, X_train, X_test, y_train, y_test): print(name + " model failed to execute") print(exception) - else: # no preprocessing + else: # no preprocessing for name, model in tqdm(self.regressors): # do parallel exec start = time.time() diff --git a/nnetsauce/multitask/multitaskClassifier.py b/nnetsauce/multitask/multitaskClassifier.py index 8d132e7..c1b8ec9 100644 --- a/nnetsauce/multitask/multitaskClassifier.py +++ b/nnetsauce/multitask/multitaskClassifier.py @@ -189,6 +189,8 @@ def fit(self, X, y, sample_weight=None, **kwargs): assert mx.is_factor(y), "y must contain only integers" + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) self.n_classes_ = len(np.unique(y)) diff --git a/nnetsauce/multitask/simplemultitaskClassifier.py b/nnetsauce/multitask/simplemultitaskClassifier.py index d55ca7c..d547c2e 100644 --- a/nnetsauce/multitask/simplemultitaskClassifier.py +++ b/nnetsauce/multitask/simplemultitaskClassifier.py @@ -105,9 +105,9 @@ def fit(self, X, y, sample_weight=None, **kwargs): assert mx.is_factor(y), "y must contain only integers" - self.scaled_X_ = self.X_scaler_.fit_transform(X) + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn - self.n_classes_ = len(np.unique(y)) + self.scaled_X_ = self.X_scaler_.fit_transform(X) # multitask response Y = mo.one_hot_encode2(y, self.n_classes_) diff --git a/nnetsauce/randombag/randomBagClassifier.py b/nnetsauce/randombag/randomBagClassifier.py index 061f9f1..3d9417f 100644 --- a/nnetsauce/randombag/randomBagClassifier.py +++ b/nnetsauce/randombag/randomBagClassifier.py @@ -200,6 +200,8 @@ def fit(self, X, y, **kwargs): assert mx.is_factor(y), "y must contain only integers" + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + # training self.n_classes = len(np.unique(y)) diff --git a/nnetsauce/ridge2/ridge2Classifier.py b/nnetsauce/ridge2/ridge2Classifier.py index 9ce34c8..abade93 100644 --- a/nnetsauce/ridge2/ridge2Classifier.py +++ b/nnetsauce/ridge2/ridge2Classifier.py @@ -323,6 +323,8 @@ def fit(self, X, y, solver="L-BFGS-B", **kwargs): assert mx.is_factor(y), "y must contain only integers" + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) self.n_classes = len(np.unique(y)) diff --git a/nnetsauce/ridge2/ridge2MultitaskClassifier.py b/nnetsauce/ridge2/ridge2MultitaskClassifier.py index 77bff67..b183fb7 100644 --- a/nnetsauce/ridge2/ridge2MultitaskClassifier.py +++ b/nnetsauce/ridge2/ridge2MultitaskClassifier.py @@ -178,6 +178,8 @@ def fit(self, X, y, **kwargs): assert mx.is_factor(y), "y must contain only integers" + self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn + output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) n_X, p_X = X.shape diff --git a/setup.py b/setup.py index 14ba274..dbc0ae9 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from codecs import open from os import path -__version__ = '0.17.1' +__version__ = '0.17.2' # get the dependencies and installs here = path.abspath(path.dirname(__file__))