diff --git a/DESCRIPTION b/DESCRIPTION index 10126bc..ef56ee1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ShrinkCovMat Type: Package Title: Shrinkage Covariance Matrix Estimators -Version: 1.4.7 +Version: 2.0.0 Authors@R: person(given = "Anestis", family = "Touloumis", diff --git a/R/shrinkcovmat.R b/R/shrinkcovmat.R index f9cf169..9f3a329 100644 --- a/R/shrinkcovmat.R +++ b/R/shrinkcovmat.R @@ -10,8 +10,10 @@ #' with diagonal elements the corresponding sample variances), and (c) the #' \code{identity} matrix. #' -#' The rows of the data matrix \code{data} correspond to variables and the -#' columns to subjects. +#' The rows of the data matrix \code{data} correspond to variables/features and +#' the columns to subjects. +#' +#' To select the target covariance matrix see \code{\link{targetselection}}. #' #' @param data a numeric matrix containing the data. #' @param centered a logical indicating if the mean vector is the zero vector. diff --git a/R/shrinkcovmat.equal.R b/R/shrinkcovmat.equal.R index 00ecf81..df418f5 100644 --- a/R/shrinkcovmat.equal.R +++ b/R/shrinkcovmat.equal.R @@ -30,11 +30,11 @@ NULL #' @rdname ShrinkCovMat-deprecated #' @section \code{shrinkcovmat.equal}: -#' For \code{shrinkcovmat.equal}, use \code{\link{shrinkcovmat}}. +#' For \code{shrinkcovmat.equal}, use \code{\link{shrinkcovmat}} and set \code{target = "spherical"}. #' #' @export shrinkcovmat.equal <- function(data, centered = FALSE) { # nolint - .Deprecated(msg = "Use instead function 'shrinkcovmat' with argument 'target' equal to 'sphericity'") + .Deprecated(msg = "Use instead function 'shrinkcovmat' with argument 'target' equal to 'spherical'") if (!is.matrix(data)) data <- as.matrix(data) p <- nrow(data) n <- ncol(data) diff --git a/R/shrinkcovmat.identity.R b/R/shrinkcovmat.identity.R index 786969c..00ce7c5 100644 --- a/R/shrinkcovmat.identity.R +++ b/R/shrinkcovmat.identity.R @@ -27,7 +27,7 @@ NULL #' @rdname ShrinkCovMat-deprecated #' @section \code{shrinkcovmat.identity}: -#' For \code{shrinkcovmat.identity}, use \code{\link{shrinkcovmat}}. +#' For \code{shrinkcovmat.identity}, use \code{\link{shrinkcovmat}} and set \code{target = "identity"}. #' #' @export shrinkcovmat.identity <- function(data, centered = FALSE) { # nolint diff --git a/R/shrinkcovmat.unequal.R b/R/shrinkcovmat.unequal.R index 277f74d..6b753ed 100644 --- a/R/shrinkcovmat.unequal.R +++ b/R/shrinkcovmat.unequal.R @@ -30,12 +30,12 @@ NULL #' @rdname ShrinkCovMat-deprecated #' @section \code{shrinkcovmat.unequal}: -#' For \code{shrinkcovmat.unequal}, use \code{\link{shrinkcovmat}}. +#' For \code{shrinkcovmat.unequal}, use \code{\link{shrinkcovmat}} and set \code{target = "diagonal"}. #' #' @export shrinkcovmat.unequal <- function(data, centered = FALSE) { # nolint - .Deprecated(msg = "Use instead function 'shrinkcovmat' with argument 'target' equal to 'diagonality'") + .Deprecated(msg = "Use instead function 'shrinkcovmat' with argument 'target' equal to 'diagonal'") if (!is.matrix(data)) data <- as.matrix(data) p <- nrow(data) n <- ncol(data) diff --git a/README.Rmd b/README.Rmd index d9745ff..0bc2982 100644 --- a/README.Rmd +++ b/README.Rmd @@ -8,7 +8,7 @@ references: given: Anestis container-title: Computational Statistics \& Data Analysis volume: 83 - URL: 'https://doi.org/10.1016/j.csda.2014.10.018' + URL: 'https://www.sciencedirect.com/science/article/pii/S0167947314003107' page: 251-261 type: article-journal issued: @@ -82,6 +82,7 @@ Calculation of the corresponding optimal shrinkage intensities is discussed in @ The utility function `targetselection` is designed to ease the selection of the target matrix. This is based on empirical observation by inspecting the estimated optimal intensities and the range and average of the sample variances. + ## Example Consider the colon cancer data example analyzed in @Touloumis2015. The data consists of two tissue groups: the normal tissue group and the tumor tissue group. ```{r} diff --git a/README.md b/README.md index 177e19b..72a2fe5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # ShrinkCovMat: Shrinkage Covariance Matrix Estimators [![Github -version](https://img.shields.io/badge/GitHub%20-1.4.6-green.svg)](%22commits/master%22) +version](https://img.shields.io/badge/GitHub%20-2.0.0-green.svg)](%22commits/master%22) [![R-CMD-check](https://github.com/AnestisTouloumis/ShrinkCovMat/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/AnestisTouloumis/ShrinkCovMat/actions/workflows/R-CMD-check.yaml) [![Project Status: Active The project has reached a stable, usable state and is being actively @@ -181,7 +181,8 @@ estimated_covariance_tumor Touloumis A (2015). "Nonparametric Stein-type Shrinkage Covariance Matrix Estimators in High-Dimensional Settings." _Computational - Statistics & Data Analysis_, -10. R package version 1.4.6. + Statistics & Data Analysis_, *83*, 251-261. + . A BibTeX entry for LaTeX users is @@ -190,9 +191,9 @@ estimated_covariance_tumor author = {Anestis Touloumis}, year = {2015}, journal = {Computational Statistics & Data Analysis}, - number = {2}, - note = {R package version 1.4.6}, - pages = {-10}, + volume = {83}, + pages = {251-261}, + url = {https://www.sciencedirect.com/science/article/pii/S0167947314003107}, } # References @@ -203,8 +204,8 @@ estimated_covariance_tumor Touloumis, A. (2015) [Nonparametric Stein-type Shrinkage Covariance Matrix Estimators in High-Dimensional -Settings](https://doi.org/10.1016/j.csda.2014.10.018). *Computational -Statistics & Data Analysis*, **83**, 251–261. +Settings](https://www.sciencedirect.com/science/article/pii/S0167947314003107). +*Computational Statistics & Data Analysis*, **83**, 251–261. diff --git a/inst/CITATION b/inst/CITATION index 8589ee2..7479f4b 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,12 +1,10 @@ -note <- sprintf("R package version %s", meta$Version) - bibentry(bibtype = "Article", header = "To cite 'ShrinkCovMat' in publications, please use:", title = "Nonparametric Stein-type Shrinkage Covariance Matrix Estimators in High-Dimensional Settings", author = as.person("Anestis Touloumis"), - year = 2015, + year = "2015", journal= "Computational Statistics & Data Analysis", - volume= 83, - note = note, - pages= {251-261} + volume= "83", + pages= "251-261", + url = "https://www.sciencedirect.com/science/article/pii/S0167947314003107" ) diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 90ba626..cc04222 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -2,10 +2,11 @@ \title{NEWS file for the \pkg{ShrinkCovMat} package} -\section{Changes in Version 1.4.7 (2023-07-10)}{ +\section{Changes in Version 2.0.0 (2023-07-11)}{ \subsection{NEW FEATURES}{ \itemize{ \item{Added function \code{shrinkcovmat} to obtain the shrinkage estimates.} + \item{Added vignette.} } } \subsection{MINOR CHANGES}{ diff --git a/man/ShrinkCovMat-deprecated.Rd b/man/ShrinkCovMat-deprecated.Rd index fdae8a1..bba50c0 100644 --- a/man/ShrinkCovMat-deprecated.Rd +++ b/man/ShrinkCovMat-deprecated.Rd @@ -22,17 +22,17 @@ The functions listed below are deprecated and will be defunct in } \section{\code{shrinkcovmat.equal}}{ -For \code{shrinkcovmat.equal}, use \code{\link{shrinkcovmat}}. +For \code{shrinkcovmat.equal}, use \code{\link{shrinkcovmat}} and set \code{target = "spherical"}. } \section{\code{shrinkcovmat.identity}}{ -For \code{shrinkcovmat.identity}, use \code{\link{shrinkcovmat}}. +For \code{shrinkcovmat.identity}, use \code{\link{shrinkcovmat}} and set \code{target = "identity"}. } \section{\code{shrinkcovmat.unequal}}{ -For \code{shrinkcovmat.unequal}, use \code{\link{shrinkcovmat}}. +For \code{shrinkcovmat.unequal}, use \code{\link{shrinkcovmat}} and set \code{target = "diagonal"}. } \keyword{internal} diff --git a/man/shrinkcovmat.Rd b/man/shrinkcovmat.Rd index de06181..05289b9 100644 --- a/man/shrinkcovmat.Rd +++ b/man/shrinkcovmat.Rd @@ -37,8 +37,10 @@ variances), the \code{diagonal} sample covariance matrix (the diagonal matrix with diagonal elements the corresponding sample variances), and (c) the \code{identity} matrix. -The rows of the data matrix \code{data} correspond to variables and the -columns to subjects. +The rows of the data matrix \code{data} correspond to variables/features and +the columns to subjects. + +To select the target covariance matrix see \code{\link{targetselection}}. } \examples{ data("colon") diff --git a/vignettes/Shrink_Covariance_Matrix.Rmd b/vignettes/Shrink_Covariance_Matrix.Rmd index 5aab08e..515764a 100644 --- a/vignettes/Shrink_Covariance_Matrix.Rmd +++ b/vignettes/Shrink_Covariance_Matrix.Rmd @@ -14,9 +14,6 @@ knitr::opts_chunk$set( ) ``` -```{r setup} -library("ShrinkCovMat") -``` # Sample Covariance Matrix @@ -39,7 +36,6 @@ A simple solution is to consider covariance estimators of the form where $\mathbf T$ is a known positive-definite covariance matrix and $0 < \lambda_{\mathbf T} < 1$ is the known optimal intensity. The advantages of $\mathbf S^{\ast}_{\mathbf T}$ include that it is: (i) non-singular (ii) well-conditioned, (iii) invariant to permutations of the order of the $p$ variables, (iv) consistent to departures from a multivariate normal model, (v) not necessarily sparse, (vi) expressed in closed form, and (vii) computationally cheap regardless of $p$. - In practice, the optimal shrinkage intensity $\lambda_{\mathbf T}$ is unknown and needs to be estimated by minimizing a risk function, such as the expectation of the Frobenius norm of the difference between $\mathbf S^{\ast}_{\mathbf T}$ and $\boldsymbol \Sigma$. This package implements the estimation procedures for $\lambda_{\mathbf T}$ described in Touloumis (2015). @@ -47,7 +43,7 @@ In practice, the optimal shrinkage intensity $\lambda_{\mathbf T}$ is unknown an Let $s^{2}_{11}, s^{2}_{22}, \ldots, s^{2}_{pp}$ be the corresponding diagonal elements of the sample covariance matrix $\mathbf S$, that is the sample variances of the $p$ features. -The diagonal target covariance matrix $\mathbf T_{D}$ is a diagonal matrix whose diagonal elements are equal to the sample variances +The `diagonal` target covariance matrix $\mathbf T_{D}$ is a diagonal matrix whose diagonal elements are equal to the sample variances \[ \mathbf T_{D} @@ -61,7 +57,7 @@ The diagonal target covariance matrix $\mathbf T_{D}$ is a diagonal matrix whose \] -The spherical target covariance matrix $\mathbf T_{S}$ is the diagonal matrix +The `spherical` target covariance matrix $\mathbf T_{S}$ is the diagonal matrix \[ \mathbf T_{S} @@ -81,7 +77,7 @@ s^2 = \frac{1}{p} \sum_{k=1}^{p} s_{kk}^{2}. \] -The identity target covariance matrix $\mathbf T_{I}$ is the $p \times p$ identity matrix +The `identity` target covariance matrix $\mathbf T_{I}$ is the $p \times p$ identity matrix \[ \mathbf T_{I} @@ -97,29 +93,89 @@ The identity target covariance matrix $\mathbf T_{I}$ is the $p \times p$ identi \] + ## Positive-definiteness of the Target Matrices -1. The identity covariance target matrix $\mathbf T_{I}$ is always positive-definite. -1. The spherical covariance target matrix is $\mathbf T_{S}$ is positive-definite provided that at least one of the $p$ sample variances is not $0$. -1. The diagonal covariance target matrix $\mathbf T_{D}$ is positive-definite provided that none of the $p$ sample variances is equal to $0$. +1. The `identity` covariance target matrix $\mathbf T_{I}$ is always positive-definite. +1. The `spherical` covariance target matrix is $\mathbf T_{S}$ is positive-definite provided that at least one of the $p$ sample variances is not $0$. +1. The `diagonal` covariance target matrix $\mathbf T_{D}$ is positive-definite provided that none of the $p$ sample variances is equal to $0$. -An error message will be returned when $\mathbf T_{D}$ or $\mathbf T_{S}$ will not be positive-definite. +An error message will be returned when $\mathbf T_{D}$ or $\mathbf T_{S}$ will not be positive-definite. In this case, the user should either remove all the features (rows) whose sample variance is $0$ or use a different target matrix (e.g. $\mathbf T_{I}$). ## Selection of Target Matrix +In practice, to select a suitable target covariance matrix, one can inspect the optimal shrinkage intensity of the three possible target matrices. If these differ significantly, then one can choose as target matrix the one with the largest $\lambda$ value. Otherwise, the choice of the target matrix can be based on examining the $p$ sample variances. -To select the most suitable target covariance matrix, one should inspect the optimal shrinkage intensity of the three possible target matrices. If these differ significantly then one can choose as $\mathbf T$ the one with the largest $\lambda$ value. Otherwise, the choice of $\mathbf T$ should be based on examining the $p$ sample variances. - -The identity target matrix $\mathbf T_{I}$ should be chosen when all the values of the $p$ sample variances are close to $1$ +The `identity` target matrix $\mathbf T_{I}$ is sensible when all the values of the $p$ sample variances are close to $1$ \[ s^{2}_{11} \approx s^{2}_{11} \approx \ldots \approx s^{2}_{pp} \approx 1. \] -The spherical target covariance matrix $\mathbf T_{S}$ should be chosen when the range of the $p$ sample variances is small +The `spherical` target covariance matrix $\mathbf T_{S}$ is sensible when the range of the $p$ sample variances is small \[ s^{2}_{11} \approx s^{2}_{22} \approx \ldots \approx s^{2}_{pp}. \] -The diagonal target covariance matrix $\mathbf T_{D}$ is suitable when the values of the $p$ sample variances vary significantly. +The `diagonal` target covariance matrix $\mathbf T_{D}$ is sensible when the values of the $p$ sample variances vary significantly. + +Hence, the target matrix selection should be based on inspecting the optimal shrinkage intensities and the range and average of the $p$ sample variances. + + +# Example +The colon cancer data, analyzed in Touloumis (2015), consists of two tissue groups: the normal tissue group and the tumor tissue group. + +```{r} +library("ShrinkCovMat") +data("colon") +normal_group <- colon[, 1:40] +dim(normal_group) +tumor_group <- colon[, 41:62] +dim(tumor_group) +``` + +For each of the $`r ncol(normal_group)`$ subjects in the normal group, their gene expression levels were measured for $`r nrow(normal_group)`$ genes. To select the target matrix for covariance matrix of the normal group, we use the function `targetselection`: + +```{r} +targetselection(normal_group) +``` + +The estimated optimal shrinkage intensity for the `spherical` matrix is slightly larger than the other two. In addition the sample variances appear to be of similar magnitude and their average is smaller than $1$. Thus, the `spherical` matrix seems to be the most appropriate target for the covariance matrix. The resulting covariance matrix estimate is: + +```{r} +estimated_covariance_normal <- shrinkcovmat(normal_group, target = "spherical") +estimated_covariance_normal +``` + +We follow a similar procedure to estimate the covariance matrix of the tumor group: +```{r} +targetselection(tumor_group) +estimated_covariance_tumor <- shrinkcovmat(tumor_group, target = "spherical") +estimated_covariance_tumor +``` + +# Compatibility + +Version 2.0.0 introduces the function `shrinkcovmat` which in the next release of `ShrinkCovMat` will replace the deprecated functions `shinkcovmat.identity`, `shrinkcovmat.equal` and `shrinkcovmat.unequal`. The table below illustrates the changes: + + +```{r echo = FALSE} +package_before <- c("`shrinkcovmat.identity(data)`", + "`shrinkcovmat.identity(data)`", + "`shrinkcovmat.unequal(data)`") +package_after <- c("`shrinkcovmat(data, target = 'identity')`", + "`shrinkcovmat(data, target = 'spherical')`", + "`shrinkcovmat(data, target = 'diagonal')`") +df <- cbind(package_before, package_after) +colnames(df) <- c("Deprecated", "Replacement") +df |> + knitr::kable(caption = "Deprecated functions since v2.0.0 and their replacements in newer versions.") +``` + + +# How To Cite +```{r} +citation("ShrinkCovMat") +``` +