Merge pull request #404 from USEPA/shiny_review

Updates from Shiny review
USEPA · Feb 22, 2024 · 400b12e · 400b12e
2 parents cb2183f + 879278d
commit 400b12e
Show file tree

Hide file tree

Showing 30 changed files with 843 additions and 370 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,9 +2,10 @@ Package: TADA
 Type: Package
 Title: TADA (Tools for Automated Data Analysis) R Package
 Version: 0.0.1
-Organization: U.S. Environmental Protection Agency
 Authors@R: 
-  c(person(given = "Cristina",
+  c(person(given = "U.S. Environmental Protection Agency",
+           role = "aut"),
+    person(given = "Cristina",
            family = "Mullin",
            role = c("aut", "cre"),
            email = "mullin.cristina@epa.gov",
@@ -15,6 +16,9 @@ Authors@R:
     person(given = "Elise",
            family = "Hinman",
            role = "aut"), 
+    person(given = "Hillary",
+           family = "Marler",
+           role = "aut"), 
     person(given = "Kathleen",
            family = "Healy",
            role = "aut"), 
@@ -24,6 +28,9 @@ Authors@R:
     person(given = "Laura",
            family = "Decicco", 
            role = "ctb"),
+    person(given = "Renae",
+           family = "Myers",
+           role = "aut"), 
     person(given = "Brad",
            family = "Cooper", 
            role = "ctr"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -61,4 +61,7 @@ export(TADA_Stats)
 export(TADA_SubstituteDeprecatedChars)
 export(TADA_SummarizeColumn)
 export(TADA_TwoCharacteristicScatterplot)
+export(TADA_addPoints)
+export(TADA_addPolys)
+export(getFeatureLayer)
 importFrom(magrittr,"%>%")
diff --git a/R/Figures.R b/R/Figures.R
@@ -369,8 +369,8 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
 #' @return A leaflet map that shows all sites in the data frame, where larger point sizes
 #' indicate more results collected at a site, and darker point colors indicate more
 #' characteristics measured at that site. Users can click on points on the map to see
-#' a pop-up window with exact counts for measurements, visits, and characteristics
-#' associated with each site.
+#' a pop-up window with exact counts for measurements (i.e. number of rows),
+#' visits (number of unique Activity ID's), and characteristics associated with each site.
 #'
 #' @export
 #'
@@ -381,9 +381,9 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
 #' data(Data_6Tribes_5y_Harmonized)
 #'
 #' # Create maps:
-#' TADA_OverviewMap(Data_Nutrients_UT)
-#' TADA_OverviewMap(Data_NCTCShepherdstown_HUC12)
-#' TADA_OverviewMap(Data_6Tribes_5y_Harmonized)
+#' TADA:::TADA_OverviewMap(Data_Nutrients_UT)
+#' TADA:::TADA_OverviewMap(Data_NCTCShepherdstown_HUC12)
+#' TADA:::TADA_OverviewMap(Data_6Tribes_5y_Harmonized)
 #'
 TADA_OverviewMap <- function(.data) {
   suppressWarnings({
@@ -395,12 +395,11 @@ TADA_OverviewMap <- function(.data) {
       return(leaflet::addLegend(map, colors = colorAdditions, labels = labelAdditions, opacity = opacity, title = "Measurements"))
     }
 
-
     sumdat <- .data %>%
       dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure) %>%
       dplyr::summarise("Sample_Count" = length(unique(ResultIdentifier)), "Visit_Count" = length(unique(ActivityStartDate)), "Parameter_Count" = length(unique(TADA.CharacteristicName)), "Organization_Count" = length(unique(OrganizationIdentifier)))
 
-    pt_sizes <- round(quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
+    pt_sizes <- round(stats::quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
     pt_labels <- c(
       paste0("<=", pt_sizes[1]),
       paste0(">", pt_sizes[1]),
@@ -424,10 +423,25 @@ TADA_OverviewMap <- function(.data) {
       domain = sumdat$Parameter_Count
     )
 
+    # Tribal layers will load by default in the overview map, restricted by the bounding box of the current dataset
+    # They can be toggled on and off using a button (all layers work together and can't be turned on/off individually). 
+    # Colors and icons are as discussed previously (orange/tan colors and open triangle icons for points) but can be changed to match HMW if desired.
+    bbox <- sf::st_bbox(
+      c(
+        xmin = min(sumdat$TADA.LongitudeMeasure),
+        ymin = min(sumdat$TADA.LatitudeMeasure),
+        xmax = max(sumdat$TADA.LongitudeMeasure),
+        ymax = max(sumdat$TADA.LatitudeMeasure)
+      ),
+      crs = sf::st_crs(sumdat)
+    )
+    vbbox <- bbox %>%
+      as.vector()
+
     map <- leaflet::leaflet() %>%
       leaflet::addProviderTiles("Esri.WorldTopoMap", group = "World topo", options = leaflet::providerTileOptions(updateWhenZooming = FALSE, updateWhenIdle = TRUE)) %>%
       leaflet::clearShapes() %>% # get rid of whatever was there before if loading a second dataset
-      leaflet::fitBounds(lng1 = min(sumdat$TADA.LongitudeMeasure), lat1 = min(sumdat$TADA.LatitudeMeasure), lng2 = max(sumdat$TADA.LongitudeMeasure), lat2 = max(sumdat$TADA.LatitudeMeasure)) %>% # fit to bounds of data in tadat$raw
+      leaflet::fitBounds(lng1 = vbbox[1], lat1 = vbbox[2], lng2 = vbbox[3], lat2 = vbbox[4]) %>% # fit to bounds of data in tadat$raw
       leaflet.extras::addResetMapButton() %>% # button to reset to initial zoom and lat/long
       leaflet::addCircleMarkers(
         data = sumdat,
@@ -457,6 +471,17 @@ TADA_OverviewMap <- function(.data) {
         colors = "black",
         labels = site_legend$Sample_n, sizes = site_legend$Point_size * 2
       )
+    # TADA_addPolys and TADA_addPoints are in Utilities.R
+    map <- TADA_addPolys(map, AKAllotmentsUrl, "Tribes", "Alaska Allotments", bbox)
+    map <- TADA_addPolys(map, AmericanIndianUrl, "Tribes", "American Indian", bbox)
+    map <- TADA_addPolys(map, OffReservationUrl, "Tribes", "Off Reservation", bbox)
+    map <- TADA_addPolys(map, OKTribeUrl, "Tribes", "Oklahoma Tribe", bbox)
+    map <- TADA_addPoints(map, AKVillagesUrl, "Tribes", "Alaska Native Villages", bbox)
+    map <- TADA_addPoints(map, VATribeUrl, "Tribes", "Virginia Tribe", bbox)
+    map <- leaflet::addLayersControl(map,
+      overlayGroups = c("Tribes"),
+      options = leaflet::layersControlOptions(collapsed = FALSE)
+    )
     return(map)
   })
 }

diff --git a/R/Filtering.R b/R/Filtering.R
@@ -243,62 +243,63 @@ TADA_FieldValuesTable <- function(.data, field = "null", characteristicName = "n
 
 #' TADA_AnalysisDataFilter
 #'
-#' This function will filter the data set and retain only the media types
+#' This function will filter the data frame and retain only the media types
 #' selected by the user. It uses ActivityMediaSubdivisionName, AquiferName, 
-#' LocalAqfrName,ConstructionDateText, WellDepthMeasure.MeasureValue,
+#' LocalAqfrName, ConstructionDateText, WellDepthMeasure.MeasureValue,
 #' WellDepthMeasure.MeasureUnitCode, WellHoleDepthMeasure.MeasureValue, and
 #' WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users 
-#' can select whether sediment, fish tissue and/or surface water should be included. in the data set. An 
-#' An additional column, TADA.AssessmentData.Flag, specifies whether each row should 
-#' be included in the assessment workflow and why. Setting clean = TRUE, means
-#' that all results not flagged for use in assessment workflow will be removed 
-#' and the TADA.AssessmentData.Flag column will not be added.
+#' can select whether sediment, groundwater and/or surface water should be included. 
+#' An additional column, TADA.UseForAnalysis.Flag, specifies whether each row should 
+#' be included in the analysis workflow and why. Setting clean = TRUE, means
+#' that all results not flagged for use in the analysis workflow will be removed 
+#' and the ADA.UseForAnalysis.Flag column will not be added.
 #' 
-#' *Need to add fish tissue to this function once new WQX profiles are available.
-#' (HRM, 1/22/4)
-#'
-#'
 #' @param .data A TADA profile object
 #' 
-#' @param clean Boolean argument; removes all results not flagged for use in
-#' assessment workflow. TADA.Media.Flag and TADA.AssessmentData.Flag 
-#' columns will not be added Default is clean = TRUE.
+#' @param clean Boolean argument; TRUE removes all results not flagged for use in
+#' analysis workflow. TADA.UseForAnalysis.Flag column displaying the media type (as
+#' determined by this function) and "Yes"/"No" will be added when clean = FALSE.
+#' Results flagged "Yes" are identified as usable for further analysis. Default = FALSE.
 #' 
 #' @param surface_water Boolean argument; specifies whether surface water
-#' results should be included in the returned data frame. Default is 
-#' surface_water = TRUE, surface water samples are retained in the data frame.
+#' results should be flagged or removed in the returned data frame. Default is 
+#' surface_water = TRUE, surface water results are identified as usable for analysis.
 #' 
 #' @param ground_water Boolean argument; specifies whether ground water
-#' results should be included in the returned data frame. Default is 
-#' ground_water = FALSE, ground water samples are  not retained in the data 
-#' frame.
+#' results should be flagged or removed in the returned data frame. Default is 
+#' ground_water = FALSE, ground water results are identified as not usable for analysis.
 #' 
 #' @param sediment Boolean argument; specifies whether sediment results should 
-#' be included in the returned data frame. Default is sediment = FALSE, 
-#' sediment samples are not retained in the data frame.
+#' be flagged or removed in the returned data frame. Default is sediment = FALSE, 
+#' sediment results are identified as not usable for analysis.
 #' 
 #' @return If clean = TRUE, returns the data frame with only the media types
-#' selected by the user. If clean = FALSE, returns the data frame with two
-#' additional columns, "TADA.Media.Flag" and "TADA.AssessmentData.Flag",
-#' indicating which results should be excluded from assessments based on user
-#' input.
+#' selected as usable (set to TRUE in function input) by the user. 
+#' If clean = FALSE, returns the data frame and an additional column, 
+#' TADA.UseForAnalysis.Flag, indicating the media type (as determined by this function)
+#' and which results should be included or excluded from assessments based on user input.
 #' 
 #' @export
 #' 
 #' @examples
-#' # Return data frame with only surface water results
 #' data(Data_6Tribes_5y_Harmonized)
-#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized)
+#' # Returns data with ONLY surface water results retained and no TADA.UseForAnalysis.Flag column
+#' Data_6Tribes_Assessment1 <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = TRUE, surface_water = TRUE, ground_water = FALSE, sediment = FALSE)
+#' 
+#' # Returns data frame with ONLY surface water results identified as usable and adds TADA.UseForAnalysis.Flag column.
+#' Data_6Tribes_Assessment2 <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = FALSE, surface_water = TRUE, ground_water = FALSE, sediment = FALSE)
+#' unique(Data_6Tribes_Assessment2$TADA.UseForAnalysis.Flag)
 #' 
-#' # Return data frame with surface water results and TADA.UseForAnalysis.Flag column
-#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = FALSE)
 
 TADA_AnalysisDataFilter <- function(.data, 
-                                      clean = TRUE,
-                                      surface_water = TRUE,
-                                      ground_water = FALSE,
-                                      sediment = FALSE) {
-
+                                    clean = FALSE,
+                                    surface_water = TRUE,
+                                    ground_water = FALSE,
+                                    sediment = FALSE) {
+
+  # *Need to add fish tissue to this function once new WQX profiles are available.
+  # (HRM, 1/22/4)
+
   # import MonitoringLocationTypeNames and TADA.Media.Flags
   sw.sitetypes <- utils::read.csv(system.file("extdata", "WQXMonitoringLocationTypeNameRef.csv", package = "TADA")) %>%
     dplyr::select(Name, TADA.Media.Flag) %>%
@@ -323,26 +324,23 @@ TADA_AnalysisDataFilter <- function(.data,
     # add TADA.Media.Flag for additional rows based on MonitoringLocationTypeName
     dplyr::left_join(sw.sitetypes, by = "MonitoringLocationTypeName") %>%
     dplyr::mutate(TADA.Media.Flag = ifelse(is.na(TADA.Media.Flag),
-                                                 ML.Media.Flag, TADA.Media.Flag)) %>%
+                                                 ML.Media.Flag, TADA.Media.Flag),
+                  TADA.Media.Flag = toupper(TADA.Media.Flag)) %>%
     dplyr::select(-ML.Media.Flag)
 
   print("TADA_AnalysisDataFilter: Identifying groundwater results.")
 
-  { if (surface_water == TRUE)
+  if (surface_water == TRUE) {
 
-    sur.water.data <- .data %>%
-      dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
+  sur.water.flag <- "Yes"
 
     print("TADA_AnalysisDataFilter: Flagging surface water results to include in assessments.")
 
     }
 
-  { if (surface_water == FALSE)
+  if (surface_water == FALSE) {
 
-    sur.water.data <- .data %>%
-      dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "No")
+    sur.water.flag <- "Yes"
 
     print("TADA_AnalysisDataFilter: Flagging surface water results to exclude from assessments.")
 
@@ -351,86 +349,73 @@ TADA_AnalysisDataFilter <- function(.data,
 
   if (ground_water == TRUE) {
 
-    gr.water.data <- .data %>%
-      dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
+    gr.water.flag <- "Yes"
 
     print("TADA_AnalysisDataFilter: Flagging groundwater results to include in assessments.")
 
 
   }
 
   if (ground_water == FALSE) {
-    gr.water.data <- .data %>%
-      dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "No")
+
+    gr.water.flag <- "No"
 
     print("TADA_AnalysisDataFilter: Flagging groundwater results to exclude from assessments.")
 
   }
+
   if (sediment == TRUE) {
-    sed.data <- .data %>%
-      dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
+
+    sed.flag <- "Yes"
 
     print("TADA_AnalysisDataFilter: Flagging sediment results to include in assessments.")
 
   }
 
   if (sediment == FALSE) {
-    sed.data <- .data %>%
-      dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = "No")
+
+    sed.flag <- "No"
 
     print("TADA_AnalysisDataFilter: Flagging sediment results to exclude from assessments.")
 
   }
 
   if (clean == TRUE) {
 
-    assessment.data <- sur.water.data %>%
-      suppressMessages(dplyr::full_join(gr.water.data)) %>%
-      suppressMessages(dplyr::full_join(sed.data)) %>%
-      dplyr::filter(TADA.UseForAnalysis.Flag == "Yes") %>%
-      dplyr::select(-TADA.UseForAnalysis.Flag, -TADA.Media.Flag) %>%
+    .data <- .data %>%
+      dplyr::mutate(TADA.UseForAnalysis.Flag = dplyr::case_when(
+        TADA.Media.Flag == "SEDIMENT" ~ paste(sed.flag, " - ", TADA.Media.Flag, sep = ""),
+        TADA.Media.Flag == "SURFACE WATER" ~ paste(sur.water.flag, " - ", TADA.Media.Flag, sep = ""),
+        TADA.Media.Flag == "GROUNDWATER" ~ paste(gr.water.flag, " - ", TADA.Media.Flag, sep = ""),
+        is.na(TADA.Media.Flag) ~ "No - OTHER",
+        !TADA.Media.Flag %in% c("SEDIMENT", "SURFACE WATER", "GROUNDWATER", "OTHER") ~ paste("No - ", TADA.Media.Flag, sep = "")
+      )) %>%
+      dplyr::filter(stringr::str_detect(TADA.UseForAnalysis.Flag, "Yes")) %>%
+      dplyr::select(c(-TADA.UseForAnalysis.Flag, -TADA.Media.Flag)) %>%
       TADA_OrderCols()
 
-    rm(sur.water.data, gr.water.data, sed.data)
-
     print("TADA_AnalysisDataFilter: Removing results flagged for exclusion from assessments.")
 
-    return(assessment.data)
+    return(.data)
 
 
   }
 
   if (clean == FALSE) {
 
-    assessment.data <- sur.water.data %>%
-      suppressMessages(dplyr::full_join(gr.water.data)) %>%
-      suppressMessages(dplyr::full_join(sed.data)) %>%
-      dplyr::mutate(TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))
-
-    assessment.list <- assessment.data %>%
-      dplyr::select(ResultIdentifier) %>%
-      dplyr::pull()
-
-    other.data <- .data %>%
-      dplyr::filter(!ResultIdentifier %in% assessment.list) %>%
-      dplyr::mutate(TADA.Media.Flag = ifelse(TADA.Media.Flag == "" | is.na(TADA.Media.Flag), "OTHER", TADA.Media.Flag),
-                    TADA.UseForAnalysis.Flag = "No",
-                    TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))
-
-    all.data <- assessment.data %>%
-      suppressMessages(dplyr::full_join(other.data)) %>%
+    .data <- .data %>%
+      dplyr::mutate(TADA.UseForAnalysis.Flag = dplyr::case_when(
+        TADA.Media.Flag == "SEDIMENT" ~ paste(sed.flag, " - ", TADA.Media.Flag, sep = ""),
+        TADA.Media.Flag == "SURFACE WATER" ~ paste(sur.water.flag, " - ", TADA.Media.Flag, sep = ""),
+        TADA.Media.Flag == "GROUNDWATER" ~ paste(gr.water.flag, " - ", TADA.Media.Flag, sep = ""),
+        is.na(TADA.Media.Flag) ~ "No - OTHER",
+        !TADA.Media.Flag %in% c("SEDIMENT", "SURFACE WATER", "GROUNDWATER", "OTHER") ~ paste("No - ", TADA.Media.Flag, sep = ""))) %>%
       dplyr::select(-TADA.Media.Flag) %>%
       TADA_OrderCols()
 
 
-    rm(sur.water.data, gr.water.data, sed.data, assessment.data, assessment.list)
-
     print("TADA_AnalysisDataFilter: Returning all results with TADA.UseForAnalysis.Flag column indicating if result should be used for assessments.")
 
-    return(all.data)
+    return(.data)
   }
 }