Merge pull request #58 from Techtonique/sample-weight

Sample weight + VAR and VECM
Techtonique · Sep 18, 2024 · 0046c5b · 0046c5b
2 parents 7812488 + eb8a224
commit 0046c5b
Show file tree

Hide file tree

Showing 16 changed files with 239 additions and 148 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,8 +1,8 @@
-# version 0.24.0
+# version 0.24.4
 
 - Update `LazyDeepMTS`: **No more `LazyMTS`** class, instead, you can use `LazyDeepMTS` with `n_layers=1` 
 - Specify forecasting horizon in `LazyDeepMTS` (see updated docs and examples/lazy_mts_horizon.py)
-- New class `ClassicalMTS` for classsical models (for now VAR and VECM adapted from statsmodels) in multivariate time series forecasting (not available in `LazyDeepMTS` yet)
+- New class `ClassicalMTS` for classsical models (for now VAR and VECM adapted from statsmodels) in multivariate time series forecasting
 - [`partial_fit`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier.partial_fit) for `CustomClassifier` and `CustomRegressor`
 
 # version 0.23.1

diff --git a/Makefile b/Makefile
@@ -80,6 +80,9 @@ build-site: docs ## export mkdocs website to a folder
 	cp -rf nnetsauce-docs/* ../../Pro_Website/Techtonique.github.io/nnetsauce
 	find . -name '__pycache__' -exec rm -fr {} +
 
+run-custom: ## run all custom examples with one command
+	find examples -maxdepth 2 -name "*custom*.py" -exec  python3 {} \;
+
 run-examples: ## run all examples with one command
 	find examples -maxdepth 2 -name "*.py" -exec  python3 {} \;
 

diff --git a/examples/custom_deep_classification.py b/examples/custom_deep_classification.py
@@ -33,7 +33,30 @@
 
     print(clf.score(X_test, y_test))
 
-print("Example 2 - conformal")
+print("Example 2 - not conformal with weights")
+
+load_models = [load_breast_cancer, load_iris, load_wine]
+
+for model in load_models: 
+
+    data = model()
+    X = data.data
+    y= data.target
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state = 13)
+
+    obj = SVC()
+
+    clf = ns.DeepClassifier(obj, n_layers=2, verbose=1, n_clusters=2, n_hidden_features=2)
+
+    start = time()
+    clf.fit(X_train, y_train, sample_weight=np.random.rand(X_train.shape[0]))
+    print(f"\nElapsed: {time() - start} seconds\n")
+
+    preds = clf.predict(X_test)
+
+    print(clf.score(X_test, y_test))
+
+print("Example 3 - conformal")
 
 for model in load_models: 
 

diff --git a/examples/custom_deep_classification2.py b/examples/custom_deep_classification2.py
diff --git a/examples/custom_with_weights.py b/examples/custom_with_weights.py
@@ -0,0 +1,67 @@
+import nnetsauce as ns
+import numpy as np 
+import os
+from sklearn.datasets import load_breast_cancer, load_diabetes
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+print("Example 1 - classification")
+
+X, y = load_breast_cancer(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
+
+clf = ns.CustomClassifier(obj=LogisticRegression())
+
+n_zeros = np.sum(y_train == 0)
+n_ones = np.sum(y_train == 1)
+weights = np.where(y_train == 0, 1/n_zeros, 1/n_ones)
+
+clf.fit(X_train, y_train, sample_weight=weights)
+
+print(clf.score(X_test, y_test))
+
+clf.fit(X_train, y_train)
+
+print(clf.score(X_test, y_test))
+
+clf = ns.DeepClassifier(obj=LogisticRegression())
+
+clf.fit(X_train, y_train, sample_weight=weights)
+
+print(clf.score(X_test, y_test))
+
+clf.fit(X_train, y_train)
+
+print(clf.score(X_test, y_test))
+
+print("Example 2 - regression")
+
+X, y = load_diabetes(return_X_y=True)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
+
+reg = ns.CustomRegressor(obj=RandomForestRegressor())
+
+weights = np.random.rand(X_train.shape[0])
+
+reg.fit(X_train, y_train, sample_weight=weights)
+
+print(reg.score(X_test, y_test))
+
+reg.fit(X_train, y_train)
+
+print(reg.score(X_test, y_test))
+
+reg = ns.DeepRegressor(obj=RandomForestRegressor())
+
+reg.fit(X_train, y_train, sample_weight=weights)
+
+print(reg.score(X_test, y_test))
+
+reg.fit(X_train, y_train)
+
+print(reg.score(X_test, y_test))
+
+
diff --git a/examples/lazy_mts_horizon.py b/examples/lazy_mts_horizon.py
@@ -24,7 +24,7 @@
 data = np.log(mdata).diff().dropna()
 
 n = data.shape[0]
-max_idx_train = np.floor(n*0.9)
+max_idx_train = np.floor(n*0.4)
 training_index = np.arange(0, max_idx_train)
 testing_index = np.arange(max_idx_train, n)
 df_train = data.iloc[training_index,:]
@@ -147,7 +147,7 @@
 print(f"\n ----- Example 9 ----- \n")
 
 regr_mts5 = ns.LazyDeepMTS(verbose=1, ignore_warnings=False, custom_metric=None,
-                    lags = 20, n_hidden_features=7, n_clusters=2,
+                     n_hidden_features=7, n_clusters=2,
                     #type_pi = "gaussian",
                     show_progress=False, preprocess=False,
                     h=5, )

diff --git a/nnetsauce/boosting/adaBoostClassifier.py b/nnetsauce/boosting/adaBoostClassifier.py
@@ -286,7 +286,7 @@ def fit(self, X, y, sample_weight=None, **kwargs):
 
             for m in range(self.n_estimators):
                 preds = base_learner.fit(
-                    X, y, sample_weight=np.ravel(w_m, order="C"), **kwargs
+                    X, y, sample_weight=w_m.ravel(), **kwargs
                 ).predict(X)
 
                 self.base_learners_.update(
@@ -344,7 +344,7 @@ def fit(self, X, y, sample_weight=None, **kwargs):
 
             for m in range(self.n_estimators):
                 probs = base_learner.fit(
-                    X, y, sample_weight=np.ravel(w_m, order="C"), **kwargs
+                    X, y, sample_weight=w_m.ravel(), **kwargs
                 ).predict_proba(X)
 
                 np.clip(

diff --git a/nnetsauce/custom/customClassifier.py b/nnetsauce/custom/customClassifier.py
@@ -191,6 +191,9 @@ def fit(self, X, y, sample_weight=None, **kwargs):
 
             y: array-like, shape = [n_samples]
                 Target values.
+            
+            sample_weight: array-like, shape = [n_samples]
+                Sample weights.
 
             **kwargs: additional parameters to be passed to
                         self.cook_training_set or self.obj.fit
@@ -201,16 +204,21 @@ def fit(self, X, y, sample_weight=None, **kwargs):
         """
 
         output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
-        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn
+        self.n_classes_ = len(np.unique(y))  # for compatibility with sklearn        
+
+        if self.level is not None:
+            self.obj = PredictionSet(
+                obj=self.obj, method=self.pi_method, level=self.level
+            )
 
         # if sample_weights, else: (must use self.row_index)
         if sample_weight is not None:
             self.obj.fit(
                 scaled_Z,
                 output_y,
-                sample_weight=np.ravel(sample_weight, order="C")[
+                sample_weight=sample_weight[
                     self.index_row_
-                ],
+                ].ravel(),
                 # **kwargs
             )
 
@@ -234,6 +242,9 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
 
             y: array-like, shape = [n_samples]
                 Subset of target values.
+            
+            sample_weight: array-like, shape = [n_samples]
+                Sample weights.
 
             **kwargs: additional parameters to be passed to
                         self.cook_training_set or self.obj.fit
@@ -252,9 +263,7 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
                 self.obj.partial_fit(
                     scaled_Z,
                     output_y,
-                    sample_weight=np.ravel(sample_weight, order="C")[
-                        self.index_row_
-                    ],
+                    sample_weight=sample_weight[self.index_row_].ravel(),
                     # **kwargs
                 )
             except:

diff --git a/nnetsauce/custom/customRegressor.py b/nnetsauce/custom/customRegressor.py
@@ -176,6 +176,9 @@ def fit(self, X, y, sample_weight=None, **kwargs):
 
             y: array-like, shape = [n_samples]
                 Target values.
+            
+            sample_weight: array-like, shape = [n_samples]
+                Sample weights.
 
             **kwargs: additional parameters to be passed to
                 self.cook_training_set or self.obj.fit
@@ -188,24 +191,24 @@ def fit(self, X, y, sample_weight=None, **kwargs):
 
         centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
 
+        if self.level is not None:
+            self.obj = PredictionInterval(
+                obj=self.obj, method=self.pi_method, level=self.level
+            )
+
         # if sample_weights, else: (must use self.row_index)
         if sample_weight is not None:
             self.obj.fit(
                 scaled_Z,
                 centered_y,
-                sample_weight=np.ravel(sample_weight, order="C")[
-                    self.index_row
-                ],
+                sample_weight=sample_weight[
+                    self.index_row_
+                ].ravel(),
                 **kwargs
             )
 
             return self
 
-        if self.level is not None:
-            self.obj = PredictionInterval(
-                obj=self.obj, method=self.pi_method, level=self.level
-            )
-
         self.obj.fit(scaled_Z, centered_y, **kwargs)
 
         self.X_ = X
@@ -225,6 +228,9 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
 
             y: array-like, shape = [n_samples]
                 Subset of target values.
+            
+            sample_weight: array-like, shape = [n_samples]
+                Sample weights.
 
             **kwargs: additional parameters to be passed to
                 self.cook_training_set or self.obj.fit
@@ -243,9 +249,7 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
                 self.obj.partial_fit(
                     scaled_Z,
                     centered_y,
-                    sample_weight=np.ravel(sample_weight, order="C")[
-                        self.index_row
-                    ],
+                    sample_weight=sample_weight[self.index_row_].ravel(),
                     **kwargs
                 )
             except: