precision vignettes

rrrrn · Nov 8, 2023 · 6466531 · 6466531
1 parent 01916fc
commit 6466531
Show file tree

Hide file tree

Showing 13 changed files with 301 additions and 113 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,5 @@ inst
 /Meta/
 renv
 ^\.
+.Rprofile
+^renv
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,6 +1,6 @@
 # MIT License
 
-Copyright (c) 2023 mmetrics authors
+Copyright (c) 2023 mmetrics Rui Nie
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/R/binary_acc.R b/R/binary_acc.R
@@ -0,0 +1,40 @@
+#' binary_acc
+#'
+#' @description Calculate the binary class accuracy for a given predicted set of
+#' values and corresponding targets
+#'
+#' @param preds Predicted label or predicted probability between 0 and 1,
+#' same shape as target label
+#' @param target Target label
+#' @param threshold The numerical cut-off between 0 and 1 to transform
+#' predicted probability into binary predicted labels
+#' @param multidim_average Average model: global-average across all accuracies,
+#' samplewise-average across the all but the first dimensions (calculated
+#' independently for each sample)
+#'
+#' @return Binary accuracy for preds and target, with format dictated by
+#' multidim_average command.
+#'
+#' @export
+#'
+#' @examples
+#' binary_acc(c(0.8, 0.2), c(1,1), 0.3)
+#' binary_acc(c(1,1), c(0,1))
+binary_acc <- function(preds, target, threshold=0.5, multidim_average = "global"){
+
+  stopifnot(dim(preds)==dim(target))
+
+  # transform probability into labels if necessary
+  if(is.numeric(preds)&(!is.integer(preds))){
+    preds <- as.numeric(preds>threshold)
+  }
+
+  cfs_mtx <- confusion_scores(preds, target, multidim_average)
+
+  if(multidim_average == "global"){
+    return(sum(diag(cfs_mtx$matrix))/sum(cfs_mtx$matrix))
+  }
+  else{
+    return((cfs_mtx$tp+cfs_mtx$tn)/(cfs_mtx$tp+cfs_mtx$tn+cfs_mtx$fn+cfs_mtx$fp))
+  }
+}
diff --git a/R/binary_precision.R b/R/binary_precision.R
@@ -0,0 +1,38 @@
+#' binary_precision
+#'
+#' @description Calculate the binary classtypeification precision for a given predicted set of
+#' values and corresponding targets. In other words, this function estimate how accurate
+#' the true prediction value by the model is.
+#'
+#' @param preds Predicted labels or predicted probability between 0 and 1,
+#' same shape as target label
+#' @param target Target label
+#' @param threshold The numerical cut-off between 0 and 1 to transform
+#' predicted probability into binary predicted labels
+#' @param multidim_average Average model: global-average across all accuracies,
+#' samplewise-average across the all but the first dimensions (calculated
+#' independently for each sample)
+#'
+#' @return Binary precision value for preds and target, with format dictated by
+#' multidim_average command.
+#'
+#' @export
+#'
+#' @examples
+#' binary_precision(c(0.8, 0.2), c(1,1), 0.3)
+#' binary_precision(c(1,1), c(0,1))
+binary_precision <- function(preds, target, threshold=0.5, multidim_average = "global"){
+
+  stopifnot(dim(preds)==dim(target))
+
+  # transform probability into labels when necessary
+  if(is.numeric(preds)&(!is.integer(preds))){
+    preds <- as.numeric(preds>=threshold)
+  }
+
+  cfs_mtx <- confusion_scores(preds, target, multidim_average)
+  if(any((cfs_mtx$tp+cfs_mtx$fp==0))){
+    warning("NaN generated due to lack of positively predicted labels")
+  }
+  return((cfs_mtx$tp)/(cfs_mtx$tp+cfs_mtx$fp))
+}
diff --git a/R/confusion_scores.R b/R/confusion_scores.R
@@ -31,8 +31,14 @@ confusion_scores <- function(preds, target, multidim_average="global"){
   }
   else{
   # compute confusion matrix value
-  ele1 = unique(ele_all)[1]
-  ele2 = unique(ele_all)[2]
+    if(is.logical(ele_all)){
+      ele1 = TRUE
+      ele2 = FALSE
+    }
+    else{
+      ele1 = unique(ele_all)[1]
+      ele2 = unique(ele_all)[2]
+    }
   tp = ((target == preds) & (target == ele1))
   fn = ((target != preds) & (target == ele1))
   fp = ((target != preds) & (target == ele2))
@@ -79,8 +85,12 @@ confusion_scores <- function(preds, target, multidim_average="global"){
 #' multiclass_confusion_scores(y_pred, y_target, classtype="A")
 multiclass_confusion_scores <- function(preds, target, classtype=NULL,
                                         multidim_average = "global"){
-
-  ele_all <- factor(unique(c(target, preds))) # element in the union of two vec
+  if(!is.factor(preds)){
+    ele_all <- factor(unique(c(target, as.vector(preds)))) # element in the union of two vec
+  }
+  else{
+    ele_all <- unique(c(levels(target), levels(preds1)))
+  }
   stopifnot(length(ele_all)>0)
   stopifnot(length(target)==length(preds))
 

diff --git a/R/accuracy.R → R/multiclass_acc.R b/R/accuracy.R → R/multiclass_acc.R
@@ -1,43 +1,3 @@
-#' binary_acc
-#'
-#' @description Calculate the binary class accuracy for a given predicted set of
-#' values and corresponding targets
-#'
-#' @param preds Predicted label or predicted probability between 0 and 1,
-#' same shape as target label
-#' @param target Target label
-#' @param threshold The numerical cut-off between 0 and 1 to transform
-#' predicted probability into binary predicted labels
-#' @param multidim_average Average model: global-average across all accuracies,
-#' samplewise-average across the all but the first dimensions (calculated
-#' independently for each sample)
-#'
-#' @return Binary accuracy for preds and target, with format dictated by
-#' multidim_average command.
-#'
-#' @export
-#'
-#' @examples
-#' binary_acc(c(0.8, 0.2), c(1,1), 0.3)
-#' binary_acc(c(1,1), c(0,1))
-binary_acc <- function(preds, target, threshold=0.5, multidim_average = "global"){
-
-  stopifnot(dim(preds)==dim(target))
-
-  # transform probability into labels if necessary
-  if(is.numeric(preds)&(!is.integer(preds))){
-    preds <- as.numeric(preds>threshold)
-  }
-
-  cfs_mtx <- confusion_scores(preds, target, multidim_average)
-
-  if(multidim_average == "global"){
-    return(sum(diag(cfs_mtx$matrix))/sum(cfs_mtx$matrix))
-  }
-  else{
-    return((cfs_mtx$tp+cfs_mtx$tn)/(cfs_mtx$tp+cfs_mtx$tn+cfs_mtx$fn+cfs_mtx$fp))
-  }
-}
 
 #' multiclass_acc
 #'
@@ -74,7 +34,12 @@ multiclass_acc <- function(preds, target, multidim_average = "global",
     preds = apply(preds, 1:(length(dim(preds))-1), which.max)
   }
 
-  ele_all = unique(c(preds, target))
+  if(!is.factor(preds)){
+    ele_all <- factor(unique(c(target, as.vector(preds)))) # element in the union of two vec
+  }
+  else{
+    ele_all <- unique(c(levels(target), levels(preds1)))
+  }
   num_class = length(ele_all)
 
   stopifnot(dim(preds)[1]==dim(target)[1])
@@ -89,24 +54,24 @@ multiclass_acc <- function(preds, target, multidim_average = "global",
       target = datamtx[(n+1):(2*n)]
     }
     else{
-      n = ncol(datamtx)/2
-      preds = datamtx[,1:n]
-      target = datamtx[,(n+1):(2*n)]
+      n <- ncol(datamtx)/2
+      preds <- datamtx[,1:n]
+      target <- datamtx[,(n+1):(2*n)]
     }
 
     if(average=="micro"){
       return(mean(preds==target))
     }
     else if(average=="macro"){
-      label_acc = numeric(num_class)
+      label_acc <- numeric(num_class)
       # label-wise accuracy calculation
       for(i in 1:num_class){
-        targetnew <- target[target==ele_all[i]]
-        predsnew <- preds[target==ele_all[i]]
+        predsnew = preds[target==ele_all[i]]
+        targetnew = target[target==ele_all[i]]
         label_acc[i] <- ifelse(length(targetnew==predsnew)>0, mean(targetnew==predsnew),0)
       }
+      return(mean(label_acc))
     }
-    return(mean(label_acc))
   }
 
   if(multidim_average=="global"){

diff --git a/R/precision.R → R/multiclass_precision.R b/R/precision.R → R/multiclass_precision.R
@@ -1,42 +1,3 @@
-#' binary_precision
-#'
-#' @description Calculate the binary classtypeification precision for a given predicted set of
-#' values and corresponding targets. In other words, this function estimate how accurate
-#' the true prediction value by the model is.
-#'
-#' @param preds Predicted labels or predicted probability between 0 and 1,
-#' same shape as target label
-#' @param target Target label
-#' @param threshold The numerical cut-off between 0 and 1 to transform
-#' predicted probability into binary predicted labels
-#' @param multidim_average Average model: global-average across all accuracies,
-#' samplewise-average across the all but the first dimensions (calculated
-#' independently for each sample)
-#'
-#' @return Binary precision value for preds and target, with format dictated by
-#' multidim_average command.
-#'
-#' @export
-#'
-#' @examples
-#' binary_precision(c(0.8, 0.2), c(1,1), 0.3)
-#' binary_precision(c(1,1), c(0,1))
-binary_precision <- function(preds, target, threshold=0.5, multidim_average = "global"){
-
-  stopifnot(dim(preds)==dim(target))
-
-  # transform probability into labels when necessary
-  if(is.numeric(preds)&(!is.integer(preds))){
-    preds <- as.numeric(preds>threshold)
-  }
-
-  cfs_mtx <- confusion_scores(preds, target, multidim_average)
-  if(any((cfs_mtx$tp+cfs_mtx$fp==0))){
-    warning("NaN generated due to lack of positively predicted labels")
-  }
-  return((cfs_mtx$tp)/(cfs_mtx$tp+cfs_mtx$fp))
-}
-
 #' multiclass_precision
 #'
 #' @description Calculate the multiclass precision value for a given predicted set of
@@ -105,6 +66,10 @@ multiclass_precision <-function(preds, target, multidim_average = "global",
     }
     else if(average=="macro"){
       label_prec = numeric(num_class)
+      if(!any(ele_all %in% c(preds, target))){
+        preds = ele_all[preds]
+        target = ele_all[target]
+      }
       # label-wise accuracy calculation
       for(i in 1:num_class){
         cfsmtx <- multiclass_confusion_scores(preds, target, classtype=ele_all[i])

diff --git a/man/binary_acc.Rd b/man/binary_acc.Rd
diff --git a/man/binary_precision.Rd b/man/binary_precision.Rd
diff --git a/man/multiclass_acc.Rd b/man/multiclass_acc.Rd
diff --git a/man/multiclass_precision.Rd b/man/multiclass_precision.Rd