From 6466531db3237ec1bc871aeca84d55e51952130a Mon Sep 17 00:00:00 2001 From: ruinie Date: Tue, 7 Nov 2023 22:42:21 -0500 Subject: [PATCH] precision vignettes --- .gitignore | 2 + LICENSE.md | 2 +- R/binary_acc.R | 40 ++++++++ R/binary_precision.R | 38 +++++++ R/confusion_scores.R | 18 +++- R/{accuracy.R => multiclass_acc.R} | 61 +++-------- R/{precision.R => multiclass_precision.R} | 43 +------- man/binary_acc.Rd | 2 +- man/binary_precision.Rd | 2 +- man/multiclass_acc.Rd | 2 +- man/multiclass_precision.Rd | 2 +- vignettes/Tutorial.Rmd | 83 ++++++++++++++- vignettes/Tutorial.html | 119 +++++++++++++++++++--- 13 files changed, 301 insertions(+), 113 deletions(-) create mode 100644 R/binary_acc.R create mode 100644 R/binary_precision.R rename R/{accuracy.R => multiclass_acc.R} (60%) rename R/{precision.R => multiclass_precision.R} (67%) diff --git a/.gitignore b/.gitignore index 31c62d2..ef8ff2a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ inst /Meta/ renv ^\. +.Rprofile +^renv diff --git a/LICENSE.md b/LICENSE.md index 28935ad..b5f47a8 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2023 mmetrics authors +Copyright (c) 2023 mmetrics Rui Nie Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/R/binary_acc.R b/R/binary_acc.R new file mode 100644 index 0000000..e2c881b --- /dev/null +++ b/R/binary_acc.R @@ -0,0 +1,40 @@ +#' binary_acc +#' +#' @description Calculate the binary class accuracy for a given predicted set of +#' values and corresponding targets +#' +#' @param preds Predicted label or predicted probability between 0 and 1, +#' same shape as target label +#' @param target Target label +#' @param threshold The numerical cut-off between 0 and 1 to transform +#' predicted probability into binary predicted labels +#' @param multidim_average Average model: global-average across all accuracies, +#' samplewise-average across the all but the first dimensions (calculated +#' independently for each sample) +#' +#' @return Binary accuracy for preds and target, with format dictated by +#' multidim_average command. +#' +#' @export +#' +#' @examples +#' binary_acc(c(0.8, 0.2), c(1,1), 0.3) +#' binary_acc(c(1,1), c(0,1)) +binary_acc <- function(preds, target, threshold=0.5, multidim_average = "global"){ + + stopifnot(dim(preds)==dim(target)) + + # transform probability into labels if necessary + if(is.numeric(preds)&(!is.integer(preds))){ + preds <- as.numeric(preds>threshold) + } + + cfs_mtx <- confusion_scores(preds, target, multidim_average) + + if(multidim_average == "global"){ + return(sum(diag(cfs_mtx$matrix))/sum(cfs_mtx$matrix)) + } + else{ + return((cfs_mtx$tp+cfs_mtx$tn)/(cfs_mtx$tp+cfs_mtx$tn+cfs_mtx$fn+cfs_mtx$fp)) + } +} diff --git a/R/binary_precision.R b/R/binary_precision.R new file mode 100644 index 0000000..f5daa4c --- /dev/null +++ b/R/binary_precision.R @@ -0,0 +1,38 @@ +#' binary_precision +#' +#' @description Calculate the binary classtypeification precision for a given predicted set of +#' values and corresponding targets. In other words, this function estimate how accurate +#' the true prediction value by the model is. +#' +#' @param preds Predicted labels or predicted probability between 0 and 1, +#' same shape as target label +#' @param target Target label +#' @param threshold The numerical cut-off between 0 and 1 to transform +#' predicted probability into binary predicted labels +#' @param multidim_average Average model: global-average across all accuracies, +#' samplewise-average across the all but the first dimensions (calculated +#' independently for each sample) +#' +#' @return Binary precision value for preds and target, with format dictated by +#' multidim_average command. +#' +#' @export +#' +#' @examples +#' binary_precision(c(0.8, 0.2), c(1,1), 0.3) +#' binary_precision(c(1,1), c(0,1)) +binary_precision <- function(preds, target, threshold=0.5, multidim_average = "global"){ + + stopifnot(dim(preds)==dim(target)) + + # transform probability into labels when necessary + if(is.numeric(preds)&(!is.integer(preds))){ + preds <- as.numeric(preds>=threshold) + } + + cfs_mtx <- confusion_scores(preds, target, multidim_average) + if(any((cfs_mtx$tp+cfs_mtx$fp==0))){ + warning("NaN generated due to lack of positively predicted labels") + } + return((cfs_mtx$tp)/(cfs_mtx$tp+cfs_mtx$fp)) +} diff --git a/R/confusion_scores.R b/R/confusion_scores.R index d3dbbf6..7e4907f 100644 --- a/R/confusion_scores.R +++ b/R/confusion_scores.R @@ -31,8 +31,14 @@ confusion_scores <- function(preds, target, multidim_average="global"){ } else{ # compute confusion matrix value - ele1 = unique(ele_all)[1] - ele2 = unique(ele_all)[2] + if(is.logical(ele_all)){ + ele1 = TRUE + ele2 = FALSE + } + else{ + ele1 = unique(ele_all)[1] + ele2 = unique(ele_all)[2] + } tp = ((target == preds) & (target == ele1)) fn = ((target != preds) & (target == ele1)) fp = ((target != preds) & (target == ele2)) @@ -79,8 +85,12 @@ confusion_scores <- function(preds, target, multidim_average="global"){ #' multiclass_confusion_scores(y_pred, y_target, classtype="A") multiclass_confusion_scores <- function(preds, target, classtype=NULL, multidim_average = "global"){ - - ele_all <- factor(unique(c(target, preds))) # element in the union of two vec + if(!is.factor(preds)){ + ele_all <- factor(unique(c(target, as.vector(preds)))) # element in the union of two vec + } + else{ + ele_all <- unique(c(levels(target), levels(preds1))) + } stopifnot(length(ele_all)>0) stopifnot(length(target)==length(preds)) diff --git a/R/accuracy.R b/R/multiclass_acc.R similarity index 60% rename from R/accuracy.R rename to R/multiclass_acc.R index a87cd62..2f55a6f 100644 --- a/R/accuracy.R +++ b/R/multiclass_acc.R @@ -1,43 +1,3 @@ -#' binary_acc -#' -#' @description Calculate the binary class accuracy for a given predicted set of -#' values and corresponding targets -#' -#' @param preds Predicted label or predicted probability between 0 and 1, -#' same shape as target label -#' @param target Target label -#' @param threshold The numerical cut-off between 0 and 1 to transform -#' predicted probability into binary predicted labels -#' @param multidim_average Average model: global-average across all accuracies, -#' samplewise-average across the all but the first dimensions (calculated -#' independently for each sample) -#' -#' @return Binary accuracy for preds and target, with format dictated by -#' multidim_average command. -#' -#' @export -#' -#' @examples -#' binary_acc(c(0.8, 0.2), c(1,1), 0.3) -#' binary_acc(c(1,1), c(0,1)) -binary_acc <- function(preds, target, threshold=0.5, multidim_average = "global"){ - - stopifnot(dim(preds)==dim(target)) - - # transform probability into labels if necessary - if(is.numeric(preds)&(!is.integer(preds))){ - preds <- as.numeric(preds>threshold) - } - - cfs_mtx <- confusion_scores(preds, target, multidim_average) - - if(multidim_average == "global"){ - return(sum(diag(cfs_mtx$matrix))/sum(cfs_mtx$matrix)) - } - else{ - return((cfs_mtx$tp+cfs_mtx$tn)/(cfs_mtx$tp+cfs_mtx$tn+cfs_mtx$fn+cfs_mtx$fp)) - } -} #' multiclass_acc #' @@ -74,7 +34,12 @@ multiclass_acc <- function(preds, target, multidim_average = "global", preds = apply(preds, 1:(length(dim(preds))-1), which.max) } - ele_all = unique(c(preds, target)) + if(!is.factor(preds)){ + ele_all <- factor(unique(c(target, as.vector(preds)))) # element in the union of two vec + } + else{ + ele_all <- unique(c(levels(target), levels(preds1))) + } num_class = length(ele_all) stopifnot(dim(preds)[1]==dim(target)[1]) @@ -89,24 +54,24 @@ multiclass_acc <- function(preds, target, multidim_average = "global", target = datamtx[(n+1):(2*n)] } else{ - n = ncol(datamtx)/2 - preds = datamtx[,1:n] - target = datamtx[,(n+1):(2*n)] + n <- ncol(datamtx)/2 + preds <- datamtx[,1:n] + target <- datamtx[,(n+1):(2*n)] } if(average=="micro"){ return(mean(preds==target)) } else if(average=="macro"){ - label_acc = numeric(num_class) + label_acc <- numeric(num_class) # label-wise accuracy calculation for(i in 1:num_class){ - targetnew <- target[target==ele_all[i]] - predsnew <- preds[target==ele_all[i]] + predsnew = preds[target==ele_all[i]] + targetnew = target[target==ele_all[i]] label_acc[i] <- ifelse(length(targetnew==predsnew)>0, mean(targetnew==predsnew),0) } + return(mean(label_acc)) } - return(mean(label_acc)) } if(multidim_average=="global"){ diff --git a/R/precision.R b/R/multiclass_precision.R similarity index 67% rename from R/precision.R rename to R/multiclass_precision.R index bd8331a..74f23db 100644 --- a/R/precision.R +++ b/R/multiclass_precision.R @@ -1,42 +1,3 @@ -#' binary_precision -#' -#' @description Calculate the binary classtypeification precision for a given predicted set of -#' values and corresponding targets. In other words, this function estimate how accurate -#' the true prediction value by the model is. -#' -#' @param preds Predicted labels or predicted probability between 0 and 1, -#' same shape as target label -#' @param target Target label -#' @param threshold The numerical cut-off between 0 and 1 to transform -#' predicted probability into binary predicted labels -#' @param multidim_average Average model: global-average across all accuracies, -#' samplewise-average across the all but the first dimensions (calculated -#' independently for each sample) -#' -#' @return Binary precision value for preds and target, with format dictated by -#' multidim_average command. -#' -#' @export -#' -#' @examples -#' binary_precision(c(0.8, 0.2), c(1,1), 0.3) -#' binary_precision(c(1,1), c(0,1)) -binary_precision <- function(preds, target, threshold=0.5, multidim_average = "global"){ - - stopifnot(dim(preds)==dim(target)) - - # transform probability into labels when necessary - if(is.numeric(preds)&(!is.integer(preds))){ - preds <- as.numeric(preds>threshold) - } - - cfs_mtx <- confusion_scores(preds, target, multidim_average) - if(any((cfs_mtx$tp+cfs_mtx$fp==0))){ - warning("NaN generated due to lack of positively predicted labels") - } - return((cfs_mtx$tp)/(cfs_mtx$tp+cfs_mtx$fp)) -} - #' multiclass_precision #' #' @description Calculate the multiclass precision value for a given predicted set of @@ -105,6 +66,10 @@ multiclass_precision <-function(preds, target, multidim_average = "global", } else if(average=="macro"){ label_prec = numeric(num_class) + if(!any(ele_all %in% c(preds, target))){ + preds = ele_all[preds] + target = ele_all[target] + } # label-wise accuracy calculation for(i in 1:num_class){ cfsmtx <- multiclass_confusion_scores(preds, target, classtype=ele_all[i]) diff --git a/man/binary_acc.Rd b/man/binary_acc.Rd index 0eec736..f0a0168 100644 --- a/man/binary_acc.Rd +++ b/man/binary_acc.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/accuracy.R +% Please edit documentation in R/binary_acc.R \name{binary_acc} \alias{binary_acc} \title{binary_acc} diff --git a/man/binary_precision.Rd b/man/binary_precision.Rd index aff8022..515300e 100644 --- a/man/binary_precision.Rd +++ b/man/binary_precision.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision.R +% Please edit documentation in R/binary_precision.R \name{binary_precision} \alias{binary_precision} \title{binary_precision} diff --git a/man/multiclass_acc.Rd b/man/multiclass_acc.Rd index 302fc69..e902144 100644 --- a/man/multiclass_acc.Rd +++ b/man/multiclass_acc.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/accuracy.R +% Please edit documentation in R/multiclass_acc.R \name{multiclass_acc} \alias{multiclass_acc} \title{multiclass_acc} diff --git a/man/multiclass_precision.Rd b/man/multiclass_precision.Rd index 030962a..f983bfc 100644 --- a/man/multiclass_precision.Rd +++ b/man/multiclass_precision.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision.R +% Please edit documentation in R/multiclass_precision.R \name{multiclass_precision} \alias{multiclass_precision} \title{multiclass_precision} diff --git a/vignettes/Tutorial.Rmd b/vignettes/Tutorial.Rmd index a81a805..42403e6 100644 --- a/vignettes/Tutorial.Rmd +++ b/vignettes/Tutorial.Rmd @@ -19,6 +19,7 @@ knitr::opts_chunk$set( The `mmetrics` package contains functions necessary to assess classification performance for statistical modelings, in particular, when encountering multi-class classification problems or different types of averaging methods of metric computation are of interest. This vignette compares `mmetrics` functions to their R equivalents in `caret`. ```{r setup} +rm(list = ls()) library(mmetrics) library(NHANES) library(randomForest) @@ -28,6 +29,8 @@ There are a range of classification metric computation customization options ava In particular, we formulate our problem as building predictive modeling based on logistic regression model. The goal is to predict `Depressed` (categorical variable) from `SleepHrsNight`, `BMI`,`Poverty` and `PhysActive`, as well as demographic variable: `Age` and `Gender`. +## Model fitting + First we filter out participants' with NA value and partition the dataset into training and testing. ```{r} @@ -43,18 +46,88 @@ test_id <- seq(1, n)[!(seq(1,n) %in% train_id)] train_df <- df[train_id, ] test_df <- df[test_id, 2:ncol(df)] -target <- df[test_id, 1][[1]] +target <- df[test_id, 1] ``` -Then we fit a multinomial logistic regression model on training dataset and obtain its prediction on test dataset for model performance assessment. +Often times, we are interested in fitting more than one model and compare their performance in hope of obtaining a more predictive model. Hence, suppose we have two different random forest models under different random seed for a same classification task. ```{r} set.seed(345) -model <- randomForest(Depressed~., train_df) -preds_prob <- predict(model, test_df, type = "prob") -preds <- predict(model, test_df) +model1 <- randomForest(Depressed~., train_df, ntree=10, replace=FALSE) +preds_prob1 <- predict(model1, test_df, type = "prob") +preds1 <- predict(model1, test_df) ``` + ```{r} +set.seed(345) +model2 <- randomForest(Depressed~., train_df, ntree=200,replace=FALSE) +preds_prob2 <- predict(model2, test_df, type = "prob") +preds2 <- predict(model2, test_df) +``` + +## Model performance assessment + +### Accuracy + +To calculate classification accuracy, if there is a particular class is of interest, then `binary_acc` can be applied to calculate class-specific accuracy scores. + +```{r} +e_s1 = binary_acc(preds1=="Most", target=="Most"); e_r1 = binary_acc(preds2=="Most", target=="Most") +e_s2 = binary_acc(preds1=="Most", target=="Several"); e_r2 = binary_acc(preds2=="Most", target=="Several") +e_s3 = binary_acc(preds1=="Most", target=="None"); e_r3 = binary_acc(preds2=="Most", target=="None") +data.frame(model1 = c(e_s1, e_s2, e_s3), model2 = c(e_r1, e_r2, e_r3), row.names = c("Most", "Several", "None")) ``` +To obtain an overall performance regarding classification accuracy, `multiclass-acc` offers two options of averaging classification performance across different class, where `micro` computes global average and `macro` computes averaged accuracies of all labels. When the ground-truth classes of target values are imbalanced, analyst should consider using `macro` average to mitigate bias introduced by dominant class. Note that in multiclass accuracy calculation, one-versus-the-rest does not apply there anymore. + +```{r} +table(target) + +mic_e_s = multiclass_acc(preds1, target) +mac_e_s = multiclass_acc(preds1, target, average = "macro") + +mic_e_r = multiclass_acc(preds2, target) +mac_e_r = multiclass_acc(preds2, target, average = "macro") + +data.frame(model1 = c(mic_e_s, mac_e_s), model2 = c(mic_e_r, mac_e_r), row.names = c("Micro Average", "Macro Average")) +``` + +### Confusion Scores + +To compute the confusion matrices and corresponding scores, we can use either `confusion_scores` to obtain binary or class-specific result, or use `multiclass_confusion_scores` to get the contingency table or class-specific confusion matrices. + +```{r} +## class specific +mtx1 = confusion_scores(preds1=="Most", target=="Most")$matrix +mtx2 = multiclass_confusion_scores(preds1, target, classtype = "Most")$matrix +stopifnot(mtx1==mtx2); mtx1 +``` +Or if not specify the class, `multiclass_confusion_scores` returns a multiclass contingency table + +```{r} +multiclass_confusion_scores(preds1, target) +``` + +### Precision scores + +To figure out how capable the model is in their positively-predicted cases, `binary_precision` carries out the computation in binary classification cases. Predicted probability can also be served as input with customizable threshold to compute precision score (the same as accuracy score mentioned before). We demonstrate probability input to calculate precision score in a one-versus-the-rest sense. + +```{r} +binary_precision(preds_prob1[,"None"], target=="None", threshold=0.40) +``` + +Model performance in precision can be similar obtained with micro or macro averaging method. + +```{r} +mic_p_s = multiclass_precision(preds1, target, average = "micro") +mac_p_s = multiclass_precision(preds1, target, average = "macro") + +mic_p_r = multiclass_precision(preds2, target, average = "micro") +mac_p_r = multiclass_precision(preds2, target, average = "macro") + +data.frame(model1 = c(mic_p_s, mac_p_s), model2 = c(mic_p_r, mac_p_r), row.names = c("Micro Average", "Macro Average")) +``` + + + diff --git a/vignettes/Tutorial.html b/vignettes/Tutorial.html index 72985e9..2192c38 100644 --- a/vignettes/Tutorial.html +++ b/vignettes/Tutorial.html @@ -346,11 +346,12 @@

mmetrics

different types of averaging methods of metric computation are of interest. This vignette compares mmetrics functions to their R equivalents in caret.

-
library(mmetrics)
-library(NHANES)
-library(randomForest)
-#> randomForest 4.7-1.1
-#> Type rfNews() to see new features/changes/bug fixes.
+
rm(list = ls())
+library(mmetrics)
+library(NHANES)
+library(randomForest)
+#> randomForest 4.7-1.1
+#> Type rfNews() to see new features/changes/bug fixes.

There are a range of classification metric computation customization options available for users of the package. To demonstrate this function, the NHANES data from NHANES will be used.

@@ -360,6 +361,8 @@

mmetrics

SleepHrsNight, BMI,Poverty and PhysActive, as well as demographic variable: Age and Gender.

+
+

Model fitting

First we filter out participants’ with NA value and partition the dataset into training and testing.

# filter out records with NA value
@@ -374,14 +377,106 @@ 

mmetrics

train_df <- df[train_id, ] test_df <- df[test_id, 2:ncol(df)] -target <- df[test_id, 1][[1]]
-

Then we fit a multinomial logistic regression model on training -dataset and obtain its prediction on test dataset for model performance -assessment.

+target <- df[test_id, 1]
+

Often times, we are interested in fitting more than one model and +compare their performance in hope of obtaining a more predictive model. +Hence, suppose we have two different random forest models under +different random seed for a same classification task.

set.seed(345)
-model <- randomForest(Depressed~., train_df)
-preds_prob <- predict(model, test_df, type = "prob")
-preds <- predict(model, test_df)
+model1 <- randomForest(Depressed~., train_df, ntree=10, replace=FALSE) +preds_prob1 <- predict(model1, test_df, type = "prob") +preds1 <- predict(model1, test_df) +
set.seed(345)
+model2 <- randomForest(Depressed~., train_df, ntree=200,replace=FALSE)
+preds_prob2 <- predict(model2, test_df, type = "prob")
+preds2 <- predict(model2, test_df)
+ +
+

Model performance assessment

+
+

Accuracy

+

To calculate classification accuracy, if there is a particular class +is of interest, then binary_acc can be applied to calculate +class-specific accuracy scores.

+
e_s1 = binary_acc(preds1=="Most", target=="Most"); e_r1 = binary_acc(preds2=="Most", target=="Most")
+e_s2 = binary_acc(preds1=="Most", target=="Several"); e_r2 = binary_acc(preds2=="Most", target=="Several")
+e_s3 = binary_acc(preds1=="Most", target=="None"); e_r3 = binary_acc(preds2=="Most", target=="None")
+data.frame(model1 = c(e_s1, e_s2, e_s3), model2 = c(e_r1, e_r2, e_r3), row.names = c("Most", "Several", "None"))
+#>            model1    model2
+#> Most    0.9595142 0.9651822
+#> Several 0.8348178 0.8291498
+#> None    0.1854251 0.1829960
+

To obtain an overall performance regarding classification accuracy, +multiclass-acc offers two options of averaging +classification performance across different class, where +micro computes global average and macro +computes averaged accuracies of all labels. When the ground-truth +classes of target values are imbalanced, analyst should consider using +macro average to mitigate bias introduced by dominant +class. Note that in multiclass accuracy calculation, one-versus-the-rest +does not apply there anymore.

+
table(target)
+#> target
+#>    None Several    Most 
+#>     983     185      67
+
+mic_e_s = multiclass_acc(preds1, target)
+mac_e_s = multiclass_acc(preds1, target, average = "macro")
+
+mic_e_r = multiclass_acc(preds2, target)
+mac_e_r = multiclass_acc(preds2, target, average = "macro")
+
+data.frame(model1 = c(mic_e_s, mac_e_s), model2 = c(mic_e_r, mac_e_r), row.names = c("Micro Average", "Macro Average"))
+#>                  model1    model2
+#> Micro Average 0.8680162 0.8753036
+#> Macro Average 0.0000000 0.0000000
+
+
+

Confusion Scores

+

To compute the confusion matrices and corresponding scores, we can +use either confusion_scores to obtain binary or +class-specific result, or use multiclass_confusion_scores +to get the contingency table or class-specific confusion matrices.

+
## class specific
+mtx1 = confusion_scores(preds1=="Most", target=="Most")$matrix
+mtx2 = multiclass_confusion_scores(preds1, target, classtype = "Most")$matrix
+stopifnot(mtx1==mtx2); mtx1
+#>      [,1] [,2]
+#> [1,]   21    4
+#> [2,]   46 1164
+

Or if not specify the class, multiclass_confusion_scores +returns a multiclass contingency table

+
multiclass_confusion_scores(preds1, target)
+#>          target
+#> preds     None Several Most
+#>   None     967      98   40
+#>   Several   15      84    6
+#>   Most       1       3   21
+
+
+

Precision scores

+

To figure out how capable the model is in their positively-predicted +cases, binary_precision carries out the computation in +binary classification cases. Predicted probability can also be served as +input with customizable threshold to compute precision score (the same +as accuracy score mentioned before). We demonstrate probability input to +calculate precision score in a one-versus-the-rest sense.

+
binary_precision(preds_prob1[,"None"], target=="None", threshold=0.40)
+#> [1] 0.9130435
+

Model performance in precision can be similar obtained with micro or +macro averaging method.

+
mic_p_s = multiclass_precision(preds1, target, average = "micro")
+mac_p_s = multiclass_precision(preds1, target, average = "macro")
+
+mic_p_r = multiclass_precision(preds2, target, average = "micro")
+mac_p_r = multiclass_precision(preds2, target, average = "macro")
+
+data.frame(model1 = c(mic_p_s, mac_p_s), model2 = c(mic_p_r, mac_p_r), row.names = c("Micro Average", "Macro Average"))
+#>                  model1    model2
+#> Micro Average 0.8680162 0.8753036
+#> Macro Average 0.8383710 0.8998470
+
+