Skip to content

Commit

Permalink
Merge pull request #340 from USEPA/WQXvalidation
Browse files Browse the repository at this point in the history
Update ref files
  • Loading branch information
cristinamullin committed Sep 15, 2023
2 parents ed9a2df + d5a267c commit cfcf24c
Show file tree
Hide file tree
Showing 14 changed files with 70,797 additions and 56,973 deletions.
61 changes: 34 additions & 27 deletions R/GenerateRefTables.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
#' Update TADA Reference Files
#' @return Saves updated reference files
#'
TADA_UpdateAllRefs <- function () {
TADA_UpdateWQXCharValRef()
TADA_UpdateMeasureUnitRef()
TADA_UpdateDetCondRef()
TADA_UpdateDetLimitRef()
TADA_UpdateActivityTypeRef()
TADA_UpdateCharacteristicRef()
TADA_UpdateMeasureQualifierCodeRef()
}

# Used to store cached WQX QAQC Characteristic Validation Reference Table
WQXCharValRef_Cached <- NULL

Expand Down Expand Up @@ -41,33 +54,26 @@ TADA_GetWQXCharValRef <- function() {
message("Falling back to (possibly outdated) internal file.")
return(utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")))
}

# filter data to include only accepted (valid) values and remove extraneous columns

# Categorize status values
notreviewed <- "Not Reviewed"
valid <- c("Accepted", "Y")
invalid <- c("Rejected", "Rejected ", "N")
nonstandard <- c("NonStandardized",
"InvalidMediaUnit",
"InvalidChar",
"MethodNeeded")

WQXcharValRef <- raw.data %>%
dplyr::select(-c(
"Domain", "Unique.Identifier", "Note.Recommendation",
"Last.Change.Date"
))
# replace "Status" values with Valid, Invalid, Unknown
WQXcharValRef$Status2 <- ifelse(WQXcharValRef$Status %in% c("Accepted"), "Valid", "Invalid")
WQXcharValRef$Status2 <- ifelse(WQXcharValRef$Status %in% c(
"NonStandardized",
"Nonstandardized",
"InvalidMediaUnit",
"InvalidChar",
"MethodNeeded"
), "Nonstandardized", WQXcharValRef$Status2)

WQXcharValRef <- WQXcharValRef %>%
dplyr::select(-Status) %>%
dplyr::rename(Status = Status2) %>%
dplyr::mutate(TADA.WQXVal.Flag = dplyr::case_when(
Status %in% notreviewed ~ "Not Reviewed",
Status %in% valid ~ "Valid",
Status %in% invalid ~ "Invalid",
Status %in% nonstandard ~ "NonStandardized",
Status %in% NA ~ "Not Reviewed",
)) %>%
dplyr::distinct()

# # Convert all NONE to NA in Value and Value.Unit columns
# WQXcharValRef = WQXcharValRef %>% dplyr::mutate(Value = replace(Value, Value%in%c("NONE"),NA),
# Value.Unit = replace(Value.Unit, Value.Unit%in%c("NONE"),NA)) %>% dplyr::distinct()
#

# Save updated table in cache
WQXCharValRef_Cached <- WQXcharValRef

Expand Down Expand Up @@ -394,7 +400,8 @@ TADA_GetActivityTypeRef <- function() {
if (is.null(raw.data)) {
message("Downloading latest Activity Type Reference Table failed!")
message("Falling back to (possibly outdated) internal file.")
return(utils::read.csv(system.file("extdata", "WQXActivityTypeRef.csv", package = "TADA")))
return(utils::read.csv(system.file("extdata", "WQXActivityTypeRef.csv",
package = "TADA")))
}

# Categorize Activity Types
Expand Down Expand Up @@ -635,8 +642,8 @@ TADA_GetMeasureQualifierCodeRef <- function() {
Code %in% overdetect ~ "Over-Detect",
Code %in% suspect ~ "Suspect",
Code %in% pass ~ "Pass",
Code %in% NA ~ "Pass",
TRUE ~ as.character("NewValue_NeedsReview")
Code %in% NA ~ "Not Reviewed",
TRUE ~ as.character("Not Reviewed")
)) %>%
dplyr::distinct()

Expand Down
46 changes: 21 additions & 25 deletions R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
check.data <- check.data %>%
dplyr::rename(TADA.SampleFraction.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in TADA.SampleFraction.Flag column
check.data["TADA.SampleFraction.Flag"][is.na(check.data["TADA.SampleFraction.Flag"])] <- "Nonstandardized"
# rename NA values to "Not Reviewed" in TADA.SampleFraction.Flag column
check.data["TADA.SampleFraction.Flag"][is.na(check.data["TADA.SampleFraction.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input unchanged
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("Nonstandardized", "Invalid", "Not Reviewed") %in%
unique(check.data$TADA.SampleFraction.Flag)) == FALSE) {
if (flaggedonly == FALSE) {
print("All characteristic/fraction combinations are valid in your dataframe. Returning input dataframe with TADA.SampleFraction.Flag column for tracking.")
Expand All @@ -86,7 +86,6 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
if (flaggedonly == TRUE) {
print("This dataframe is empty because we did not find any invalid fraction/characteristic combinations in your dataframe")
empty.data <- dplyr::filter(check.data, TADA.SampleFraction.Flag == "Invalid")
# empty.data <- dplyr::select(empty.data, -TADA.SampleFraction.Flag)
empty.data <- TADA_OrderCols(empty.data)
return(empty.data)
}
Expand All @@ -103,9 +102,6 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
if (clean == TRUE & flaggedonly == FALSE) {
# filter out invalid characteristic-fraction combinations
clean.data <- dplyr::filter(check.data, TADA.SampleFraction.Flag != "Invalid")

# remove WQX.SampleFractionValidity column
# clean.data <- dplyr::select(clean.data, -TADA.SampleFraction.Flag)
clean.data <- TADA_OrderCols(clean.data)
return(clean.data)
}
Expand Down Expand Up @@ -211,11 +207,11 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize
check.data <- check.data %>%
dplyr::rename(TADA.MethodSpeciation.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in TADA.MethodSpeciation.Flag column
check.data["TADA.MethodSpeciation.Flag"][is.na(check.data["TADA.MethodSpeciation.Flag"])] <- "Nonstandardized"
# rename NA values to Not Reviewed in TADA.MethodSpeciation.Flag column
check.data["TADA.MethodSpeciation.Flag"][is.na(check.data["TADA.MethodSpeciation.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input with flag column
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("Not Reviewed", "Invalid", "NonStandardized") %in%
unique(check.data$TADA.MethodSpeciation.Flag)) == FALSE) {
print("All characteristic/method speciation combinations are valid in your dataframe. Returning input dataframe with TADA.MethodSpeciation.Flag column for tracking.")
check.data <- TADA_OrderCols(check.data)
Expand All @@ -235,14 +231,14 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize

# when clean = "nonstandardized_only"
if (clean == "nonstandardized_only") {
# filter out only "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "Nonstandardized")
# filter out only "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "NonStandardized")
}

# when clean = "both"
if (clean == "both") {
# filter out both "Invalid" and "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "Nonstandardized" & TADA.MethodSpeciation.Flag != "Invalid")
# filter out both "Invalid" and "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "NonStandardized" & TADA.MethodSpeciation.Flag != "Invalid")
}

# when clean = "none"
Expand All @@ -259,8 +255,8 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize

# when flaggedonly = TRUE
if (flaggedonly == TRUE) {
# filter to show only invalid and/or nonstandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.MethodSpeciation.Flag == "Invalid" | TADA.MethodSpeciation.Flag == "Nonstandardized")
# filter to show only invalid and/or nonStandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.MethodSpeciation.Flag == "Invalid" | TADA.MethodSpeciation.Flag == "NonStandardized")
# if there are no errors
if (nrow(error.data) == 0) {
print("This dataframe is empty because either we did not find any invalid/nonstandardized characteristic-method speciation combinations or they were all filtered out")
Expand Down Expand Up @@ -364,11 +360,11 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize
check.data <- check.data %>%
dplyr::rename(TADA.ResultUnit.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in WQX.ResultUnitValidity column
check.data["TADA.ResultUnit.Flag"][is.na(check.data["TADA.ResultUnit.Flag"])] <- "Nonstandardized"
# rename NA values to Not Reviewed in TADA.ResultUnit.Flag column
check.data["TADA.ResultUnit.Flag"][is.na(check.data["TADA.ResultUnit.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input with flag column
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("NonStandardized", "Invalid", "Not Reviewed") %in%
unique(check.data$TADA.ResultUnit.Flag)) == FALSE) {
print("All characteristic/unit combinations are valid in your dataframe. Returning input dataframe with TADA.ResultUnit.Flag column for tracking.")
check.data <- TADA_OrderCols(check.data)
Expand All @@ -389,13 +385,13 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize
# when clean = "nonstandardized_only"
if (clean == "nonstandardized_only") {
# filter out only "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "Nonstandardized")
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "NonStandardized")
}

# when clean = "both"
if (clean == "both") {
# filter out both "Invalid" and "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "Nonstandardized" & TADA.ResultUnit.Flag != "Invalid")
# filter out both "Invalid" and "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "NonStandardized" & TADA.ResultUnit.Flag != "Invalid")
}

# when clean = "none"
Expand All @@ -412,11 +408,11 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize

# when flaggedonly = TRUE
if (flaggedonly == TRUE) {
# filter to show only invalid and/or nonstandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.ResultUnit.Flag == "Invalid" | TADA.ResultUnit.Flag == "Nonstandardized")
# filter to show only invalid and/or nonStandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.ResultUnit.Flag == "Invalid" | TADA.ResultUnit.Flag == "NonStandardized")
# if there are no errors
if (nrow(error.data) == 0) {
print("This dataframe is empty because either we did not find any invalid/nonstandardized characteristic-media-result unit combinations or they were all filtered out")
print("This dataframe is empty because either we did not find any invalid/NonStandardized characteristic-media-result unit combinations or they were all filtered out")
# error.data <- dplyr::select(error.data, -TADA.ResultUnit.Flag)
}
error.data <- TADA_OrderCols(error.data)
Expand Down
14 changes: 7 additions & 7 deletions R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @return This function adds the TADA.AnalyticalMethod.Flag to a TADA dataframe. This column
#' flags invalid CharacteristicName, ResultAnalyticalMethod/MethodIdentifier,
#' and ResultAnalyticalMethod/MethodIdentifierContext combinations in your dataframe
#' as either "Nonstandardized", "Invalid", or "Valid". When clean = FALSE and
#' as either "NonStandardized", "Invalid", or "Valid". When clean = FALSE and
#' flaggedonly = TRUE, the dataframe is filtered to show only "Invalid"
#' characteristic-analytical method combinations; the column TADA.AnalyticalMethod.Flag
#' is still appended. When clean = TRUE and flaggedonly = FALSE, "Invalid" rows
Expand Down Expand Up @@ -80,12 +80,12 @@ TADA_FlagMethod <- function(.data, clean = TRUE, flaggedonly = FALSE) {
check.data <- check.data %>%
dplyr::rename(TADA.AnalyticalMethod.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in WQX.AnalyticalMethodValidity column
check.data["TADA.AnalyticalMethod.Flag"][is.na(check.data["TADA.AnalyticalMethod.Flag"])] <- "Nonstandardized"
# rename NA values to NonStandardized in WQX.AnalyticalMethodValidity column
check.data["TADA.AnalyticalMethod.Flag"][is.na(check.data["TADA.AnalyticalMethod.Flag"])] <- "NonStandardized"

if (flaggedonly == FALSE) {
# if all rows are "Valid" or NA "Nonstandardized", return input unchanged
## note: Cristina edited this on 9/19/22 to keep Nonstandardized/NA data when clean = TRUE. Now only Invalid data is removed.
# if all rows are "Valid" or NA "NonStandardized", return input unchanged
## note: Cristina edited this on 9/19/22 to keep NonStandardized/NA data when clean = TRUE. Now only Invalid data is removed.
if (any("Invalid" %in%
unique(check.data$TADA.AnalyticalMethod.Flag)) == FALSE) {
print("No invalid method/characteristic combinations in your dataframe. Returning the input dataframe with TADA.AnalyticalMethod.Flag column for tracking.")
Expand Down Expand Up @@ -355,7 +355,7 @@ TADA_FlagAboveThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
dplyr::mutate(TADA.ResultValueAboveUpperThreshold.Flag = dplyr::case_when(
TADA.ResultMeasureValue >= Maximum ~ as.character("Y"),
TADA.ResultMeasureValue < Maximum ~ as.character("N"),
TRUE ~ as.character("No threshold available") # this occurs when the char/unit combo is not in the table
TRUE ~ as.character("Not Reviewed") # this occurs when the char/unit combo is not in the table
))

# remove extraneous columns, fix field names
Expand Down Expand Up @@ -506,7 +506,7 @@ TADA_FlagBelowThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
dplyr::mutate(TADA.ResultValueBelowLowerThreshold.Flag = dplyr::case_when(
TADA.ResultMeasureValue <= Minimum ~ as.character("Y"),
TADA.ResultMeasureValue > Minimum ~ as.character("N"),
TRUE ~ as.character("No threshold available") # this occurs when the char/unit combo is not in the table
TRUE ~ as.character("Not Reviewed") # this occurs when the char/unit combo is not in the table
))

# remove extraneous columns, fix field names
Expand Down
2 changes: 1 addition & 1 deletion R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ utils::globalVariables(c(
"SummationSpeciationConversionFactor", "SummationNote", "NutrientGroup",
"Target.Speciation", "TADA.NearbySiteGroups", "numres", "TADA.SingleOrgDupGroupID",
"TADA.MeasureQualifierCode.Flag", "MeasureQualifierCode", "value", "Flag_Column",
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime"
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime", "TADA.MultipleOrgDupGroupID"
))


Expand Down
Binary file added inst/extdata/Data_6Tribes_5y.rda
Binary file not shown.
Binary file added inst/extdata/Data_6Tribes_5y_Harmonized.rda
Binary file not shown.
Binary file added inst/extdata/Data_NCTCShepherdstown_HUC12.rda
Binary file not shown.
Binary file added inst/extdata/Data_Nutrients_UT.rda
Binary file not shown.
Loading

0 comments on commit cfcf24c

Please sign in to comment.