Skip to content

Commit

Permalink
Merge pull request #404 from USEPA/shiny_review
Browse files Browse the repository at this point in the history
Updates from Shiny review
  • Loading branch information
cristinamullin committed Feb 22, 2024
2 parents cb2183f + 879278d commit 400b12e
Show file tree
Hide file tree
Showing 30 changed files with 843 additions and 370 deletions.
11 changes: 9 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ Package: TADA
Type: Package
Title: TADA (Tools for Automated Data Analysis) R Package
Version: 0.0.1
Organization: U.S. Environmental Protection Agency
Authors@R:
c(person(given = "Cristina",
c(person(given = "U.S. Environmental Protection Agency",
role = "aut"),
person(given = "Cristina",
family = "Mullin",
role = c("aut", "cre"),
email = "mullin.cristina@epa.gov",
Expand All @@ -15,6 +16,9 @@ Authors@R:
person(given = "Elise",
family = "Hinman",
role = "aut"),
person(given = "Hillary",
family = "Marler",
role = "aut"),
person(given = "Kathleen",
family = "Healy",
role = "aut"),
Expand All @@ -24,6 +28,9 @@ Authors@R:
person(given = "Laura",
family = "Decicco",
role = "ctb"),
person(given = "Renae",
family = "Myers",
role = "aut"),
person(given = "Brad",
family = "Cooper",
role = "ctr"),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,7 @@ export(TADA_Stats)
export(TADA_SubstituteDeprecatedChars)
export(TADA_SummarizeColumn)
export(TADA_TwoCharacteristicScatterplot)
export(TADA_addPoints)
export(TADA_addPolys)
export(getFeatureLayer)
importFrom(magrittr,"%>%")
41 changes: 33 additions & 8 deletions R/Figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,8 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
#' @return A leaflet map that shows all sites in the data frame, where larger point sizes
#' indicate more results collected at a site, and darker point colors indicate more
#' characteristics measured at that site. Users can click on points on the map to see
#' a pop-up window with exact counts for measurements, visits, and characteristics
#' associated with each site.
#' a pop-up window with exact counts for measurements (i.e. number of rows),
#' visits (number of unique Activity ID's), and characteristics associated with each site.
#'
#' @export
#'
Expand All @@ -381,9 +381,9 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
#' data(Data_6Tribes_5y_Harmonized)
#'
#' # Create maps:
#' TADA_OverviewMap(Data_Nutrients_UT)
#' TADA_OverviewMap(Data_NCTCShepherdstown_HUC12)
#' TADA_OverviewMap(Data_6Tribes_5y_Harmonized)
#' TADA:::TADA_OverviewMap(Data_Nutrients_UT)
#' TADA:::TADA_OverviewMap(Data_NCTCShepherdstown_HUC12)
#' TADA:::TADA_OverviewMap(Data_6Tribes_5y_Harmonized)
#'
TADA_OverviewMap <- function(.data) {
suppressWarnings({
Expand All @@ -395,12 +395,11 @@ TADA_OverviewMap <- function(.data) {
return(leaflet::addLegend(map, colors = colorAdditions, labels = labelAdditions, opacity = opacity, title = "Measurements"))
}


sumdat <- .data %>%
dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure) %>%
dplyr::summarise("Sample_Count" = length(unique(ResultIdentifier)), "Visit_Count" = length(unique(ActivityStartDate)), "Parameter_Count" = length(unique(TADA.CharacteristicName)), "Organization_Count" = length(unique(OrganizationIdentifier)))

pt_sizes <- round(quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
pt_sizes <- round(stats::quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
pt_labels <- c(
paste0("<=", pt_sizes[1]),
paste0(">", pt_sizes[1]),
Expand All @@ -424,10 +423,25 @@ TADA_OverviewMap <- function(.data) {
domain = sumdat$Parameter_Count
)

# Tribal layers will load by default in the overview map, restricted by the bounding box of the current dataset
# They can be toggled on and off using a button (all layers work together and can't be turned on/off individually).
# Colors and icons are as discussed previously (orange/tan colors and open triangle icons for points) but can be changed to match HMW if desired.
bbox <- sf::st_bbox(
c(
xmin = min(sumdat$TADA.LongitudeMeasure),
ymin = min(sumdat$TADA.LatitudeMeasure),
xmax = max(sumdat$TADA.LongitudeMeasure),
ymax = max(sumdat$TADA.LatitudeMeasure)
),
crs = sf::st_crs(sumdat)
)
vbbox <- bbox %>%
as.vector()

map <- leaflet::leaflet() %>%
leaflet::addProviderTiles("Esri.WorldTopoMap", group = "World topo", options = leaflet::providerTileOptions(updateWhenZooming = FALSE, updateWhenIdle = TRUE)) %>%
leaflet::clearShapes() %>% # get rid of whatever was there before if loading a second dataset
leaflet::fitBounds(lng1 = min(sumdat$TADA.LongitudeMeasure), lat1 = min(sumdat$TADA.LatitudeMeasure), lng2 = max(sumdat$TADA.LongitudeMeasure), lat2 = max(sumdat$TADA.LatitudeMeasure)) %>% # fit to bounds of data in tadat$raw
leaflet::fitBounds(lng1 = vbbox[1], lat1 = vbbox[2], lng2 = vbbox[3], lat2 = vbbox[4]) %>% # fit to bounds of data in tadat$raw
leaflet.extras::addResetMapButton() %>% # button to reset to initial zoom and lat/long
leaflet::addCircleMarkers(
data = sumdat,
Expand Down Expand Up @@ -457,6 +471,17 @@ TADA_OverviewMap <- function(.data) {
colors = "black",
labels = site_legend$Sample_n, sizes = site_legend$Point_size * 2
)
# TADA_addPolys and TADA_addPoints are in Utilities.R
map <- TADA_addPolys(map, AKAllotmentsUrl, "Tribes", "Alaska Allotments", bbox)
map <- TADA_addPolys(map, AmericanIndianUrl, "Tribes", "American Indian", bbox)
map <- TADA_addPolys(map, OffReservationUrl, "Tribes", "Off Reservation", bbox)
map <- TADA_addPolys(map, OKTribeUrl, "Tribes", "Oklahoma Tribe", bbox)
map <- TADA_addPoints(map, AKVillagesUrl, "Tribes", "Alaska Native Villages", bbox)
map <- TADA_addPoints(map, VATribeUrl, "Tribes", "Virginia Tribe", bbox)
map <- leaflet::addLayersControl(map,
overlayGroups = c("Tribes"),
options = leaflet::layersControlOptions(collapsed = FALSE)
)
return(map)
})
}
Expand Down
151 changes: 68 additions & 83 deletions R/Filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -243,62 +243,63 @@ TADA_FieldValuesTable <- function(.data, field = "null", characteristicName = "n

#' TADA_AnalysisDataFilter
#'
#' This function will filter the data set and retain only the media types
#' This function will filter the data frame and retain only the media types
#' selected by the user. It uses ActivityMediaSubdivisionName, AquiferName,
#' LocalAqfrName,ConstructionDateText, WellDepthMeasure.MeasureValue,
#' LocalAqfrName, ConstructionDateText, WellDepthMeasure.MeasureValue,
#' WellDepthMeasure.MeasureUnitCode, WellHoleDepthMeasure.MeasureValue, and
#' WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users
#' can select whether sediment, fish tissue and/or surface water should be included. in the data set. An
#' An additional column, TADA.AssessmentData.Flag, specifies whether each row should
#' be included in the assessment workflow and why. Setting clean = TRUE, means
#' that all results not flagged for use in assessment workflow will be removed
#' and the TADA.AssessmentData.Flag column will not be added.
#' can select whether sediment, groundwater and/or surface water should be included.
#' An additional column, TADA.UseForAnalysis.Flag, specifies whether each row should
#' be included in the analysis workflow and why. Setting clean = TRUE, means
#' that all results not flagged for use in the analysis workflow will be removed
#' and the ADA.UseForAnalysis.Flag column will not be added.
#'
#' *Need to add fish tissue to this function once new WQX profiles are available.
#' (HRM, 1/22/4)
#'
#'
#' @param .data A TADA profile object
#'
#' @param clean Boolean argument; removes all results not flagged for use in
#' assessment workflow. TADA.Media.Flag and TADA.AssessmentData.Flag
#' columns will not be added Default is clean = TRUE.
#' @param clean Boolean argument; TRUE removes all results not flagged for use in
#' analysis workflow. TADA.UseForAnalysis.Flag column displaying the media type (as
#' determined by this function) and "Yes"/"No" will be added when clean = FALSE.
#' Results flagged "Yes" are identified as usable for further analysis. Default = FALSE.
#'
#' @param surface_water Boolean argument; specifies whether surface water
#' results should be included in the returned data frame. Default is
#' surface_water = TRUE, surface water samples are retained in the data frame.
#' results should be flagged or removed in the returned data frame. Default is
#' surface_water = TRUE, surface water results are identified as usable for analysis.
#'
#' @param ground_water Boolean argument; specifies whether ground water
#' results should be included in the returned data frame. Default is
#' ground_water = FALSE, ground water samples are not retained in the data
#' frame.
#' results should be flagged or removed in the returned data frame. Default is
#' ground_water = FALSE, ground water results are identified as not usable for analysis.
#'
#' @param sediment Boolean argument; specifies whether sediment results should
#' be included in the returned data frame. Default is sediment = FALSE,
#' sediment samples are not retained in the data frame.
#' be flagged or removed in the returned data frame. Default is sediment = FALSE,
#' sediment results are identified as not usable for analysis.
#'
#' @return If clean = TRUE, returns the data frame with only the media types
#' selected by the user. If clean = FALSE, returns the data frame with two
#' additional columns, "TADA.Media.Flag" and "TADA.AssessmentData.Flag",
#' indicating which results should be excluded from assessments based on user
#' input.
#' selected as usable (set to TRUE in function input) by the user.
#' If clean = FALSE, returns the data frame and an additional column,
#' TADA.UseForAnalysis.Flag, indicating the media type (as determined by this function)
#' and which results should be included or excluded from assessments based on user input.
#'
#' @export
#'
#' @examples
#' # Return data frame with only surface water results
#' data(Data_6Tribes_5y_Harmonized)
#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized)
#' # Returns data with ONLY surface water results retained and no TADA.UseForAnalysis.Flag column
#' Data_6Tribes_Assessment1 <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = TRUE, surface_water = TRUE, ground_water = FALSE, sediment = FALSE)
#'
#' # Returns data frame with ONLY surface water results identified as usable and adds TADA.UseForAnalysis.Flag column.
#' Data_6Tribes_Assessment2 <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = FALSE, surface_water = TRUE, ground_water = FALSE, sediment = FALSE)
#' unique(Data_6Tribes_Assessment2$TADA.UseForAnalysis.Flag)
#'
#' # Return data frame with surface water results and TADA.UseForAnalysis.Flag column
#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = FALSE)

TADA_AnalysisDataFilter <- function(.data,
clean = TRUE,
surface_water = TRUE,
ground_water = FALSE,
sediment = FALSE) {

clean = FALSE,
surface_water = TRUE,
ground_water = FALSE,
sediment = FALSE) {

# *Need to add fish tissue to this function once new WQX profiles are available.
# (HRM, 1/22/4)

# import MonitoringLocationTypeNames and TADA.Media.Flags
sw.sitetypes <- utils::read.csv(system.file("extdata", "WQXMonitoringLocationTypeNameRef.csv", package = "TADA")) %>%
dplyr::select(Name, TADA.Media.Flag) %>%
Expand All @@ -323,26 +324,23 @@ TADA_AnalysisDataFilter <- function(.data,
# add TADA.Media.Flag for additional rows based on MonitoringLocationTypeName
dplyr::left_join(sw.sitetypes, by = "MonitoringLocationTypeName") %>%
dplyr::mutate(TADA.Media.Flag = ifelse(is.na(TADA.Media.Flag),
ML.Media.Flag, TADA.Media.Flag)) %>%
ML.Media.Flag, TADA.Media.Flag),
TADA.Media.Flag = toupper(TADA.Media.Flag)) %>%
dplyr::select(-ML.Media.Flag)

print("TADA_AnalysisDataFilter: Identifying groundwater results.")

{ if (surface_water == TRUE)
if (surface_water == TRUE) {

sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
sur.water.flag <- "Yes"

print("TADA_AnalysisDataFilter: Flagging surface water results to include in assessments.")

}

{ if (surface_water == FALSE)
if (surface_water == FALSE) {

sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")
sur.water.flag <- "Yes"

print("TADA_AnalysisDataFilter: Flagging surface water results to exclude from assessments.")

Expand All @@ -351,86 +349,73 @@ TADA_AnalysisDataFilter <- function(.data,

if (ground_water == TRUE) {

gr.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
gr.water.flag <- "Yes"

print("TADA_AnalysisDataFilter: Flagging groundwater results to include in assessments.")


}

if (ground_water == FALSE) {
gr.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")

gr.water.flag <- "No"

print("TADA_AnalysisDataFilter: Flagging groundwater results to exclude from assessments.")

}

if (sediment == TRUE) {
sed.data <- .data %>%
dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")

sed.flag <- "Yes"

print("TADA_AnalysisDataFilter: Flagging sediment results to include in assessments.")

}

if (sediment == FALSE) {
sed.data <- .data %>%
dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")

sed.flag <- "No"

print("TADA_AnalysisDataFilter: Flagging sediment results to exclude from assessments.")

}

if (clean == TRUE) {

assessment.data <- sur.water.data %>%
suppressMessages(dplyr::full_join(gr.water.data)) %>%
suppressMessages(dplyr::full_join(sed.data)) %>%
dplyr::filter(TADA.UseForAnalysis.Flag == "Yes") %>%
dplyr::select(-TADA.UseForAnalysis.Flag, -TADA.Media.Flag) %>%
.data <- .data %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = dplyr::case_when(
TADA.Media.Flag == "SEDIMENT" ~ paste(sed.flag, " - ", TADA.Media.Flag, sep = ""),
TADA.Media.Flag == "SURFACE WATER" ~ paste(sur.water.flag, " - ", TADA.Media.Flag, sep = ""),
TADA.Media.Flag == "GROUNDWATER" ~ paste(gr.water.flag, " - ", TADA.Media.Flag, sep = ""),
is.na(TADA.Media.Flag) ~ "No - OTHER",
!TADA.Media.Flag %in% c("SEDIMENT", "SURFACE WATER", "GROUNDWATER", "OTHER") ~ paste("No - ", TADA.Media.Flag, sep = "")
)) %>%
dplyr::filter(stringr::str_detect(TADA.UseForAnalysis.Flag, "Yes")) %>%
dplyr::select(c(-TADA.UseForAnalysis.Flag, -TADA.Media.Flag)) %>%
TADA_OrderCols()

rm(sur.water.data, gr.water.data, sed.data)

print("TADA_AnalysisDataFilter: Removing results flagged for exclusion from assessments.")

return(assessment.data)
return(.data)


}

if (clean == FALSE) {

assessment.data <- sur.water.data %>%
suppressMessages(dplyr::full_join(gr.water.data)) %>%
suppressMessages(dplyr::full_join(sed.data)) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))

assessment.list <- assessment.data %>%
dplyr::select(ResultIdentifier) %>%
dplyr::pull()

other.data <- .data %>%
dplyr::filter(!ResultIdentifier %in% assessment.list) %>%
dplyr::mutate(TADA.Media.Flag = ifelse(TADA.Media.Flag == "" | is.na(TADA.Media.Flag), "OTHER", TADA.Media.Flag),
TADA.UseForAnalysis.Flag = "No",
TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))

all.data <- assessment.data %>%
suppressMessages(dplyr::full_join(other.data)) %>%
.data <- .data %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = dplyr::case_when(
TADA.Media.Flag == "SEDIMENT" ~ paste(sed.flag, " - ", TADA.Media.Flag, sep = ""),
TADA.Media.Flag == "SURFACE WATER" ~ paste(sur.water.flag, " - ", TADA.Media.Flag, sep = ""),
TADA.Media.Flag == "GROUNDWATER" ~ paste(gr.water.flag, " - ", TADA.Media.Flag, sep = ""),
is.na(TADA.Media.Flag) ~ "No - OTHER",
!TADA.Media.Flag %in% c("SEDIMENT", "SURFACE WATER", "GROUNDWATER", "OTHER") ~ paste("No - ", TADA.Media.Flag, sep = ""))) %>%
dplyr::select(-TADA.Media.Flag) %>%
TADA_OrderCols()


rm(sur.water.data, gr.water.data, sed.data, assessment.data, assessment.list)

print("TADA_AnalysisDataFilter: Returning all results with TADA.UseForAnalysis.Flag column indicating if result should be used for assessments.")

return(all.data)
return(.data)
}
}
Loading

0 comments on commit 400b12e

Please sign in to comment.