Skip to content

Commit

Permalink
Merge pull request #371 from USEPA/cm-harmonize
Browse files Browse the repository at this point in the history
Builds consistency
  • Loading branch information
cristinamullin committed Dec 22, 2023
2 parents b710544 + 2a4211d commit deb0285
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 35 deletions.
2 changes: 1 addition & 1 deletion R/Maintenance.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TADA_UpdateExampleData <- function() {
od_method = "as-is",
od_multiplier = "null"
)
y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "Blank" &
y <- dplyr::filter(y, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
!is.na(TADA.ResultMeasureValue))
Expand Down
15 changes: 10 additions & 5 deletions R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ TADA_FindQCActivities <- function(.data, clean = FALSE, flaggedonly = FALSE) {
#' prepare a dataframe for quantitative analyses. Ideally, this function should
#' be run after other data cleaning, QA/QC, and harmonization steps are
#' completed using other TADA package functions, or manually. Specifically, .
#' this function removes rows with "Text","Coerced to NA", and "Blank"
#' this function removes rows with "Text","Coerced to NA", and "NA - Not Applicable"
#' in the TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
#' TADA.ResultMeasureValue column.
#'
Expand Down Expand Up @@ -593,7 +593,7 @@ TADA_AutoFilter <- function(.data) {
"ActivityTypeCode"
))

autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "Blank" &
autofilter <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
!is.na(TADA.ResultMeasureValue)) # &
Expand Down Expand Up @@ -651,6 +651,10 @@ TADA_AutoFilter <- function(.data) {
#'
#' # Remove all suspect samples:
#' MeasureQualifierCode_clean <- TADA_FlagMeasureQualifierCode(Data_6Tribes_5y, clean = TRUE)
#'
#' # Remove all suspect samples and DO NOT include a new column with
#' # qualifier definitions (TADA.MeasureQualifierCode.Def):
#' MeasureQualifierCode_clean_nodefs <- TADA_FlagMeasureQualifierCode(Data_6Tribes_5y, clean = TRUE, define = FALSE)
TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FALSE, define = TRUE) {
# check .data is data.frame
TADA_CheckType(.data, "data.frame", "Input object")
Expand Down Expand Up @@ -734,9 +738,9 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
missing_codes <- paste(missing_codes, collapse = ", ")
print(paste0("MeasureQualifierCode column in dataset contains value(s) ", missing_codes, " which is/are not represented in the MeasureQualifierCode WQX domain table. These data records are placed under the TADA.MeasureQualifierCode.Flag: 'uncategorized'. Please contact TADA administrators to resolve."))
}

# rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column, not needed?
# flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "Pass"
# rename ResultMeasureQualifier NA values to Pass in TADA.MeasureQualifierCode.Flag column
flag.data["TADA.MeasureQualifierCode.Flag"][is.na(flag.data["MeasureQualifierCode"])] <- "NA - Not Applicable"

# clean dataframe
# if clean = FALSE, return full dataframe
Expand Down Expand Up @@ -772,6 +776,7 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
}


final.data <- TADA_OrderCols(final.data)
# return final dataframe
return(final.data)
}
18 changes: 9 additions & 9 deletions R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ utils::globalVariables(c(
"Target.Speciation", "TADA.NearbySiteGroups", "numres", "TADA.SingleOrgDupGroupID",
"TADA.MeasureQualifierCode.Flag", "TADA.MeasureQualifierCode.Def", "MeasureQualifierCode", "value", "Flag_Column",
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime", "TADA.MultipleOrgDupGroupID",
"TADA.WQXVal.Flag"
"TADA.WQXVal.Flag", "Concat", ".", "MeasureQualifierCode.Split"
))


Expand Down Expand Up @@ -150,7 +150,7 @@ TADA_AutoClean <- function(.data) {

# Move detection limit value and unit to TADA Result Measure Value and Unit columns
# Consider moving this to ID censored data in the future?
# this first row copies all over when result is blank but
# this first row copies all over when result is blank (NA) but
# TADA.DetectionQuantitationLimitMeasure.MeasureValue is not and the
# TADA.ResultMeasureValueDataTypes.Flag is not Text
# Imp note: TADA result values are NA for text even though they are not NA in the original result value
Expand All @@ -168,7 +168,7 @@ TADA_AutoClean <- function(.data) {
.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode,
.data$TADA.ResultMeasure.MeasureUnitCode)
.data$TADA.ResultMeasureValueDataTypes.Flag <- ifelse(
.data$TADA.ResultMeasureValueDataTypes.Flag == "Blank"
.data$TADA.ResultMeasureValueDataTypes.Flag == "NA - Not Applicable"
& !is.na(.data$TADA.DetectionQuantitationLimitMeasure.MeasureValue),
"Result Value/Unit Copied from Detection Limit",
.data$TADA.ResultMeasureValueDataTypes.Flag)
Expand Down Expand Up @@ -384,7 +384,7 @@ TADA_ConvertSpecialChars <- function(.data, col) {
if (is.numeric(chars.data$orig)) {
clean.data <- chars.data %>%
dplyr::mutate(flag = dplyr::case_when(
is.na(masked) ~ as.character("Blank"),
is.na(masked) ~ as.character("NA - Not Applicable"),
TRUE ~ as.character("Numeric")
))
} else {
Expand All @@ -394,8 +394,8 @@ TADA_ConvertSpecialChars <- function(.data, col) {
# of the specific type of character/data type
clean.data <- chars.data %>%
dplyr::mutate(flag = dplyr::case_when(
is.na(masked) ~ as.character("Blank"),
(masked == "ND") ~ as.character("Blank"),
is.na(masked) ~ as.character("NA - Not Applicable"),
(masked == "ND") ~ as.character("NA - Not Applicable"),
(!is.na(suppressWarnings(as.numeric(masked)) == TRUE)) ~ as.character("Numeric"),
(grepl("<", masked) == TRUE) ~ as.character("Less Than"),
(grepl(">", masked) == TRUE) ~ as.character("Greater Than"),
Expand Down Expand Up @@ -696,7 +696,7 @@ TADA_SubstituteDeprecatedChars <- function(.data) {

# merge to dataset
.data <- merge(.data, ref.table, all.x = TRUE)
# if CharacteristicName is deprecated and comparable name is not BLANK, use the provided Comparable.Name. Otherwise, keep TADA.CharacteristicName as-is.
# if CharacteristicName is deprecated and comparable name is not blank (NA), use the provided Comparable.Name. Otherwise, keep TADA.CharacteristicName as-is.
.data$TADA.CharacteristicName <- ifelse(!is.na(.data$Char_Flag) & !.data$Comparable.Name %in% c(""), .data$Comparable.Name, .data$TADA.CharacteristicName)

howmany <- length(.data$Char_Flag[!is.na(.data$Char_Flag)])
Expand Down Expand Up @@ -1221,7 +1221,7 @@ TADA_CheckRequiredFields <- function(.data) {
#' prepare a dataframe for quantitative analyses. Ideally, this function should
#' be run after other data cleaning, QA/QC, and harmonization steps are
#' completed using other TADA package functions, or manually. Specifically, .
#' this function removes rows with "Text","Coerced to NA", and "Blank"
#' this function removes rows with "Text","Coerced to NA", and "NA - Not Applicable"
#' in the TADA.ResultMeasureValueDataTypes.Flag column, or NA in the
#' TADA.ResultMeasureValue column.
#'
Expand Down Expand Up @@ -1254,7 +1254,7 @@ TADA_AutoFilter <- function(.data) {
start <- dim(.data)[1]

# remove text, NAs and QC results
.data <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "Blank" &
.data <- dplyr::filter(.data, TADA.ResultMeasureValueDataTypes.Flag != "NA - Not Applicable" &
TADA.ResultMeasureValueDataTypes.Flag != "Text" &
TADA.ResultMeasureValueDataTypes.Flag != "Coerced to NA" &
TADA.ActivityType.Flag == "Non_QC" & # filter out QA/QC ActivityTypeCode's
Expand Down
Binary file modified data/Data_6Tribes_5y.rda
Binary file not shown.
Binary file modified data/Data_6Tribes_5y_Harmonized.rda
Binary file not shown.
Binary file modified data/Data_NCTCShepherdstown_HUC12.rda
Binary file not shown.
Binary file modified data/Data_Nutrients_UT.rda
Binary file not shown.
14 changes: 7 additions & 7 deletions inst/extdata/HarmonizationTemplate.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,30 @@ TADA.CharacteristicName,Target.TADA.CharacteristicName,TADA.CharacteristicNameAs
AMMONIA,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS NH4,AS N,Converts speciation and result values from AS NH4 to AS N,0.776,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,AS N,Assumes original speciation for AMMONIA is AS NH3 and converts to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,FILTERABLE,FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,FILTERABLE,FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,FILTERED,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,FILTERED,NA,NA,NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,FILTERED,NA,NA,NA,AS N,Assumes original speciation for AMMONIA is AS NH3 and converts to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,"FILTERED, FIELD",FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,"FILTERED, LAB",FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,INORGANIC,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,INORGANIC,FILTERED,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,SUPERNATE,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA, TOTAL, AS N is synonymous to AMMONIA, UNFILTERED, AS N.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA, TOTAL, AS NH3 is synonymous to AMMONIA, UNFILTERED, AS N after conversion.",AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA, TOTAL, AS NH4 is synonymous to AMMONIA, UNFILTERED, AS N after conversion.",AS NH4,AS N,Converts speciation and result values from AS NH4 to AS N,0.776,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA, TOTAL is synonymous to AMMONIA, UNFILTERED, AS N.",NA,AS N,Assumes AS N,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL RECOVERABLE,UNFILTERED,"Assumes AMMONIA, TOTAL RECOVERABLE, AS N is synonymous to AMMONIA, UNFILTERED, AS N.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL SOLUBLE,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,TOTAL SOLUBLE,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,AS N,Assumes original speciation for AMMONIA is AS NH3 and converts to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,UNFILTERED,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,UNFILTERED,NA,NA,AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,UNFILTERED,NA,NA,NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,UNFILTERED,NA,NA,NA,AS N,Assumes original speciation for AMMONIA is AS NH3 and converts to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,NA,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA,NA,NA,NA,NA,NA,AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS NH3,AS N,Converts speciation and result values from AS NH3 to AS N,0.822,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,AS NH4,AS N,Converts speciation and result values from AS NH4 to AS N,0.776,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equivalent to FILTERED. FILTERED is precise and preferred.,NA,AS N,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,"FILTERED, LAB",FILTERED,"Assumes FILTERED, LAB and FILTERED are funtionally equivalent for data use.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,SUPERNATE,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIA-NITROGEN,AMMONIA,Assumes AMMONIA-NITROGEN is synonymous to AMMONIA.,TOTAL,UNFILTERED,"Assumes AMMONIA-NITROGEN, TOTAL is synonymous to AMMONIA, UNFILTERED",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
Expand All @@ -44,7 +44,7 @@ AMMONIUM,NA,NA,DISSOLVED,FILTERED,Assumes DISSOLVED and TOTAL SOLUBLE are equiva
AMMONIUM,NA,NA,FILTERABLE,FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS NH4,AS N,Converts speciation and result values from AS NH4 to AS N,0.776,UG/L,MG/L,0.001,Nitrogen
AMMONIUM,NA,NA,FILTERED,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIUM,NA,NA,"FILTERED, LAB",FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIUM,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA AND AMMONIUM, TOTAL is synonymous to AMMONIA AND AMMONIUM, UNFILTERED.",NA,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIUM,NA,NA,TOTAL,UNFILTERED,"Assumes AMMONIA AND AMMONIUM, TOTAL is synonymous to AMMONIA AND AMMONIUM, UNFILTERED.",NA,AS N,Assumes original speciation for AMMONIUM is AS NH4 and converts values to AS N,0.776,UG/L,MG/L,0.001,Nitrogen
AMMONIUM,NA,NA,UNFILTERED,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIUM ,NA,NA,FILTERABLE,FILTERED,"Assumes FILTERED, LAB and FILTERED, FIELD are equivalent to FILTERABLE and FILTERED. FILTERED is precise and preferred.",AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
AMMONIUM ,NA,NA,FILTERED,NA,NA,AS N,NA,NA,NA,UG/L,MG/L,0.001,Nitrogen
Expand Down
Loading

0 comments on commit deb0285

Please sign in to comment.