Skip to content

Commit

Permalink
Merge pull request #357 from USEPA/bugs-cm
Browse files Browse the repository at this point in the history
fixes single org bug
  • Loading branch information
cristinamullin committed Nov 14, 2023
2 parents 4dea1e5 + 120c05a commit bd319aa
Show file tree
Hide file tree
Showing 6 changed files with 70,766 additions and 70,739 deletions.
28 changes: 19 additions & 9 deletions R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -1007,24 +1007,28 @@ TADA_FlagCoordinates <- function(.data,
#' that the sites within each group are within the specified distance from each other.
#'
#' @param .data TADA dataframe
#'
#' @param dist_buffer Numeric. The distance in meters below which two sites with
#' measurements at the same time on the same day of the same parameter will
#' be flagged as potential duplicates.
#'
#' @param org_hierarchy Vector of organization identifiers that acts as the
#' order in which the function should select a result as the representative
#' duplicate, based on the organization that collected the data. If left
#' blank, the function chooses the representative duplicate result at random.
#'
#'
#' @return The same input TADA dataframe with additional columns: a
#' TADA.MultipleOrgDuplicate column indicating if there is evidence that
#' results are likely duplicated due to submission of the same dataset by two
#' or more different organizations, a TADA.MultipleOrgDupGroupID column,
#' or more different organizations, a TADA.MultipleOrgDupGroupID column
#' containing a number unique to results that may represent duplicated
#' measurement events, and one or more TADA.SiteGroup columns indicating
#' monitoring locations within the distance buffer from each other.
#'
#' measurement events, a TADA.ResultSelectedMultipleOrgs column indicating
#' which rows are selected to keep (Y) and remove (N) based on the
#' org hierarchy, and a TADA.NearbySiteGroups column indicating which
#' monitoring locations are within the distance buffer from each other.
#'
#' @export
#'
#'
#' @examples
#' # Load dataset
#' dat <- TADA_DataRetrieval(startDate = "2022-09-01", endDate = "2023-05-01", statecode = "PA", sampleMedia = "Water")
Expand Down Expand Up @@ -1061,7 +1065,13 @@ TADA_FindPotentialDuplicatesMultipleOrgs <- function(.data, dist_buffer = 100, o
dplyr::ungroup()

# merge to data
dupsdat <- dplyr::left_join(dupsdat, .data)
dupsdat <- dplyr::left_join(dupsdat, .data, by = c("ActivityStartDate",
"ActivityStartTime.Time",
"TADA.CharacteristicName",
"ActivityTypeCode",
"OrganizationIdentifier",
"ResultIdentifier",
"TADA.ResultMeasureValue"))

rm(dupsprep)

Expand Down Expand Up @@ -1233,8 +1243,8 @@ TADA_FindPotentialDuplicatesSingleOrg <- function(.data) {
.data$TADA.SingleOrgDup.Flag <- "Duplicate"
# flags potential duplicates as "Duplicate" for easy filtering
.data$TADA.SingleOrgDup.Flag <- ifelse(.data$ResultIdentifier %in% picks$ResultIdentifier, "Unique", .data$TADA.SingleOrgDup.Flag)
# flags non-duplicates as passing, cm removed 10/30
# data$TADA.SingleOrgDup.Flag <- ifelse(.data$TADA.SingleOrgDupGroupID == "Not a duplicate", "Unique", .data$TADA.SingleOrgDup.Flag)
# flags non-duplicates as passing
.data$TADA.SingleOrgDup.Flag <- ifelse(.data$TADA.SingleOrgDupGroupID == "Not a duplicate", "Unique", .data$TADA.SingleOrgDup.Flag)
print(paste0(dim(dups_sum_org)[1], " groups of potentially duplicated results found in dataset. These have been placed into duplicate groups in the TADA.SingleOrgDupGroupID column and the function randomly selected one result from each group to represent a single, unduplicated value. Selected values are indicated in the TADA.SingleOrgDup.Flag as 'Unique', while duplicates are flagged as 'Duplicate' for easy filtering."))
}

Expand Down
24 changes: 19 additions & 5 deletions inst/extdata/WQXCharacteristicRef.csv
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,7 @@
"1,2,4-tribromo-1,1,2-trifluorobutane","Accepted",""
"1,2,4-trichloro-1,1,2-trifluorobutane","Accepted",""
"1,2,5,5,6,6-Hexafluoro-3,4-dimethyl-1,3-cyclohexadiene","Accepted",""
"1,2,5,6,9,10-hexabromocyclododecane","Accepted",""
"1,2,5,6-Tetrabromocyclooctane","Accepted",""
"1,2,5,6-Tetrachloronaphthalene","Accepted",""
"1,2,5-Trichloronaphthalene","Accepted",""
Expand Down Expand Up @@ -3584,7 +3585,7 @@ d","Deprecated","SULFLURAMID"
"17-H-Perfluoroheptadecanoate","Accepted",""
"17-H-Perfluoroheptadecanoic acid","Accepted",""
"17-Oxoestra-1,3,5(10)-trien-3-yl heptafluorobutanoate","Accepted",""
"17-alpha-Estradiol","Accepted",""
"17-alpha-Estradiol***retired***use 17.alpha.-Estradiol","Deprecated",""
"17-alpha-Estradiol-2,4-d2","Accepted",""
"17-alpha-Ethynylestradiol-2,4,16,16-d4","Accepted",""
"17-beta-Estradiol 17-glucuronide","Accepted",""
Expand All @@ -3607,7 +3608,7 @@ d","Deprecated","SULFLURAMID"
"17a(H),21b(H)-Hopane","Accepted",""
"17a(H)-22,29,30-Trisnorhopane","Accepted",""
"17alpha-Dihydroequilin","Accepted",""
"17alpha-Estradiol","Accepted",""
"17alpha-Estradiol***retired***use 17.alpha.-Estradiol","Deprecated",""
"17b(H),21a(H)-30-Norhopane","Accepted",""
"17b(H),21a(H)-Hopane","Accepted",""
"17b(H),21b(H)-Hopane","Accepted",""
Expand Down Expand Up @@ -5889,7 +5890,7 @@ d","Deprecated","SULFLURAMID"
"2-Propanone, hexafluoro-, hydrate","Accepted",""
"2-Propanone, mixt. with 1,1,2-trichloro-1,2,2-trifluoroethane","Accepted",""
"2-Propanyl, 1,1,1,2,3,3,3-heptafluoro-","Accepted",""
"2-Propen-1-ol","Accepted",""
"2-Propen-1-ol***retired***use Allyl Alcohol","Deprecated",""
"2-Propen-1-ol, compounds, reaction products with 1,1,1,2,2,3,3,4,4,5,5,6,6-tridecafluoro-6-iodohexane, dehydroiodinated, reaction products with epichlorohydrin-triethylenetetramine polymer","Accepted",""
"2-Propen-1-ol, reaction products with pentafluoroiodoethane-tetrafluoroethylene telomer, dehydroiodinated, reaction products with epichlorohydrin and triethylenetetramine","Accepted",""
"2-Propen-1-one, 1-cyclopropyl-","Accepted",""
Expand Down Expand Up @@ -11830,6 +11831,7 @@ d","Deprecated","SULFLURAMID"
"Crystalline quartz","Accepted",""
"Cube Resins other than rotenone","Accepted",""
"Cumene","Accepted",""
"Cumene hydroperoxide","Accepted",""
"Cumulative Precipitation Since Last Measurement","Accepted",""
"Cupric hydroxide","Accepted",""
"Cupric sulfate","Accepted",""
Expand Down Expand Up @@ -12303,7 +12305,7 @@ d","Deprecated","SULFLURAMID"
"Desthio-prothioconazole","Accepted",""
"Desulfinylfipronil","Accepted",""
"Desulfinylfipronil amide","Accepted",""
"Desvenlafaxine","Accepted",""
"Desvenlafaxine***retired***use Desmethylvenlafaxine","Deprecated",""
"Detergent, severity (choice list)","Accepted",""
"Detritus Jabs","Accepted",""
"Detritus, coarse particle organic matter","Accepted",""
Expand Down Expand Up @@ -12599,6 +12601,7 @@ d","Deprecated","SULFLURAMID"
"Dimethenamid oxanilic acid","Accepted",""
"Dimethenamid sulfinylacetic acid","Accepted",""
"Dimethenamid-P","Accepted",""
"Dimethipin","Accepted",""
"Dimethoate","Accepted",""
"Dimethoate-d6","Accepted",""
"Dimethomorph","Accepted",""
Expand Down Expand Up @@ -13305,7 +13308,7 @@ d","Deprecated","SULFLURAMID"
"Ethyl acetate","Accepted",""
"Ethyl acrylate","Accepted",""
"Ethyl butyrate","Accepted",""
"Ethyl carbamate","Accepted",""
"Ethyl carbamate***retired***use Urethane","Deprecated",""
"Ethyl cinnamate","Accepted",""
"Ethyl diethanolamine","Accepted",""
"Ethyl difluoro(1,1,2,2-tetrafluoro-2-iodoethoxy)acetate","Accepted",""
Expand Down Expand Up @@ -15098,7 +15101,12 @@ d","Deprecated","SULFLURAMID"
"Methane, (difluoromethoxy)trifluoro-","Accepted",""
"Methane, 1,1'-oxybis[1,1,1-trifluoro-","Accepted",""
"Methane, bis(difluoromethoxy)difluoro-","Accepted",""
"Methane, bromodichloronitro-","Accepted",""
"Methane, bromodiiodo-","Accepted",""
"Methane, chloro(difluoromethoxy)difluoro-","Accepted",""
"Methane, chlorodiiodo-","Accepted",""
"Methane, dibromochloronitro-","Accepted",""
"Methane, dibromoiodo-","Accepted",""
"Methane, oxybis[(difluoromethoxy)difluoro-","Accepted",""
"Methane, oxybis[difluoro-","Accepted",""
"Methanesulfonamide, 1,1,1-trifluoro-N-[(trifluoromethyl)sulfonyl]-","Accepted",""
Expand Down Expand Up @@ -17677,6 +17685,7 @@ d","Deprecated","SULFLURAMID"
"Perfluoro-N-(hexanesulfonyl)hexane-1-sulfonamide lithium","Accepted",""
"Perfluoro-N-[(trifluoromethyl)sulfonyl]butanesulfonamide sodium salt","Accepted",""
"Perfluoro-N-methyl-decahydroquinoline","Accepted",""
"Perfluoro-n-(2,3,4-13C3)butanoic acid","Accepted",""
"Perfluoro-n-[1,2,3,4,5,6,7-13C7]undecanoic acid","Accepted",""
"Perfluoro-n-[1,2,3,4,5,6-13C6]decanoic acid","Accepted",""
"Perfluoro-n-[1,2,3,4,5-13C5]pentanoic acid","Accepted",""
Expand Down Expand Up @@ -20034,6 +20043,7 @@ d","Deprecated","SULFLURAMID"
"Sodium 1-(ethyl-(2-sulfonatooxyethyl)sulfamoyl)-1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro-octane","Accepted",""
"Sodium 1-[(2,2,3,3,4,4,5,5,6,6,7,7-dodecafluoroheptyl)oxy]-3-(2,2,3,3-tetrafluoropropoxy)-2-propanol 2-sulfate","Accepted",""
"Sodium 1H, 1H, 2H, 2H-[1,2-13C2] perfluorooctyl phosphate","Accepted",""
"Sodium 1H,1H,2H,2H-perfluoro1-decanesulfonate","Accepted",""
"Sodium 1H,1H,2H,2H-perfluorodecylphosphate","Accepted",""
"Sodium 1H,1H,2H,2H-perfluorooctylphosphate","Accepted",""
"Sodium 2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9-hexadecafluorononyl sulfate","Accepted",""
Expand Down Expand Up @@ -20127,6 +20137,8 @@ d","Deprecated","SULFLURAMID"
"Sodium octyl sulfate","Accepted",""
"Sodium pentachlorophenate","Accepted",""
"Sodium perfluoro(3-{[1-(1,2-dichloro-1,2,2-trifluoroethoxy)propan-2-yl]oxy}propyl)sulfonate","Accepted",""
"Sodium perfluoro-1-(1,2,3,4-13C4)octanesulfonate","Accepted",""
"Sodium perfluoro-1-(2,3,4-13C)butanesulfonate","Accepted",""
"Sodium perfluoro-1-[1,2,3,4-18O2]octanesulfonate","Accepted",""
"Sodium perfluoro-1-ethanesulfinate","Accepted",""
"Sodium perfluoro-1-hexane[18O2]sulfonate","Accepted",""
Expand Down Expand Up @@ -21225,6 +21237,7 @@ d","Deprecated","SULFLURAMID"
"Triethyl citrate","Accepted",""
"Triethyl phosphate","Accepted",""
"Triethyl-2-acetylcitrate","Accepted",""
"Triethylamine","Accepted",""
"Triethylaminium perfluoro-2-(2-propoxypropoxy)propanoate","Accepted",""
"Triethylaminium perfluoro-2-propoxypropanoate","Accepted",""
"Triethylammonium perfluorobutane sulfonate","Accepted",""
Expand Down Expand Up @@ -21327,6 +21340,7 @@ d","Deprecated","SULFLURAMID"
"Triphenyl(3,3,4,4,5,5,6,6,7,7,8,8,8-tridecafluorooctyl)stannane","Accepted",""
"Triphenylene","Accepted",""
"Triphenyltin","Accepted",""
"Triphenyltin hydroxide","Accepted",""
"Tripleuchlanis","Accepted",""
"Tripropyl phosphate","Accepted",""
"Tripropylene glycol monopropyl ether","Accepted",""
Expand Down
49 changes: 25 additions & 24 deletions inst/extdata/WQXResultDetectionConditionRef.csv
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
"Domain","Unique.Identifier","Name","Description","Last.Change.Date","TADA.Detection_Type"
"Result Detection Condition(ResultDetectionConditionText)",15,"Above Operating Range","data valid but above Normal Operating Range reduces the chance of acceptance","4/26/2016 4:43:04 PM","Over-Detect"
"Result Detection Condition(ResultDetectionConditionText)",13,"Below Daily Detection Limit","data that was found less than defined daily detection limits","4/26/2016 4:42:54 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",12,"Below Detection Limit","data that was found less than defined detection limits","4/26/2016 4:42:49 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",14,"Below Long-term Blank-basd Dt Limit","data that was found less than defined long-term blank based detection limits","4/26/2016 4:42:59 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",20,"Below Method Detection Limit","data that was found less than defined laboratory method detection limits","4/26/2016 4:43:25 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",16,"Below Reporting Limit","data that was found less than defined laboratory reporting limits","4/26/2016 4:43:10 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",17,"Below Sample-specific Detect Limit","data that was found less than defined sample specific detection limits","4/26/2016 4:43:15 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",18,"Below System Detection Limit","data that was found less than defined system detection limits","4/26/2016 4:43:21 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",11,"Between Inst Detect and Quant Limit","between the lowest concentration at which the analyte can not only be reliably detected but at which some predefined goals for bias and imprecision and instrument.","4/26/2016 4:42:44 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",1,"Detected Not Quantified","data that was found and not quanitified within defined laboratory reporting limits or method detection limits","7/26/2006 10:57:47 AM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",21,"High Moisture","wet and allowed to ferment","2/3/2016 11:57:48 AM","Other"
"Result Detection Condition(ResultDetectionConditionText)",4,"Not Detected","data was looked for but was not observed/detected within defined laboratory reporting limits or method detection limits","7/26/2006 10:57:47 AM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",106,"Not Detected at Detection Limit","Not Detected at Detection Limit","11/6/2018 1:01:33 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",107,"Not Detected at Reporting Limit","Not Detected at Reporting Limit","11/6/2018 1:01:48 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",86,"Not Present","data that was looked for but not found and without laboratory reporting limits or method detection limits http://www.abcwua.org/Substances_Not_Found.aspx","11/30/2017 7:36:18 AM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",2,"Not Reported","data was collected but not analyzed. (data has not been quantified)","7/26/2006 10:57:47 AM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",3,"Present Above Quantification Limit","data was found greater than defined laboratory reporting limits or method detection limits. typically for colony count and rare occasions where value is above highest calibration curve point concentration.","7/26/2006 10:57:47 AM","Over-Detect"
"Result Detection Condition(ResultDetectionConditionText)",5,"Present Below Quantification Limit","data was found less than defined laboratory reporting limits or method detection limits","7/26/2006 10:57:47 AM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",10,"Reported in Raw Data (attached)","Continuous or time series sampling attached in raw data file","6/26/2015 2:48:40 AM","Other"
"Result Detection Condition(ResultDetectionConditionText)",6,"Trace","a very small quantity, especially one too small to be accurately measured.","8/5/2015 2:54:11 PM","Non-Detect"
"Result Detection Condition(ResultDetectionConditionText)",22,"Unable to Measure","Measurement or sample collection was attempted but unobtainable due to site conditions, equipment failure, or other circumstances","8/30/2023 2:03:01 PM","Other"
"Result Detection Condition(ResultDetectionConditionText)",19,"Value Decensored","Value Decensored - reconstruct or remove the objectionable values of a measurement set","12/21/2015 2:45:30 PM","Other"
NA,NA,"*Non-detect","Hard-coded legacy detection condition","8/7/2023 12:00:00 PM","Non-Detect"
Domain,Unique.Identifier,Name,Description,Last.Change.Date,TADA.Detection_Type
Result Detection Condition(ResultDetectionConditionText),15,Above Operating Range,data valid but above Normal Operating Range reduces the chance of acceptance,4/26/2016 16:43,Over-Detect
Result Detection Condition(ResultDetectionConditionText),13,Below Daily Detection Limit,data that was found less than defined daily detection limits,4/26/2016 16:42,Non-Detect
Result Detection Condition(ResultDetectionConditionText),12,Below Detection Limit,data that was found less than defined detection limits,4/26/2016 16:42,Non-Detect
Result Detection Condition(ResultDetectionConditionText),14,Below Long-term Blank-basd Dt Limit,data that was found less than defined long-term blank based detection limits,4/26/2016 16:42,Non-Detect
Result Detection Condition(ResultDetectionConditionText),20,Below Method Detection Limit,data that was found less than defined laboratory method detection limits,4/26/2016 16:43,Non-Detect
Result Detection Condition(ResultDetectionConditionText),16,Below Reporting Limit,data that was found less than defined laboratory reporting limits,4/26/2016 16:43,Non-Detect
Result Detection Condition(ResultDetectionConditionText),17,Below Sample-specific Detect Limit,data that was found less than defined sample specific detection limits,4/26/2016 16:43,Non-Detect
Result Detection Condition(ResultDetectionConditionText),18,Below System Detection Limit,data that was found less than defined system detection limits,4/26/2016 16:43,Non-Detect
Result Detection Condition(ResultDetectionConditionText),11,Between Inst Detect and Quant Limit,between the lowest concentration at which the analyte can not only be reliably detected but at which some predefined goals for bias and imprecision and instrument.,4/26/2016 16:42,Non-Detect
Result Detection Condition(ResultDetectionConditionText),1,Detected Not Quantified,data that was found and not quanitified within defined laboratory reporting limits or method detection limits,7/26/2006 10:57,Non-Detect
Result Detection Condition(ResultDetectionConditionText),21,High Moisture,wet and allowed to ferment,2/3/2016 11:57,Other
Result Detection Condition(ResultDetectionConditionText),4,Not Detected,data was looked for but was not observed/detected within defined laboratory reporting limits or method detection limits,7/26/2006 10:57,Non-Detect
Result Detection Condition(ResultDetectionConditionText),106,Not Detected at Detection Limit,Not Detected at Detection Limit,11/6/2018 13:01,Non-Detect
Result Detection Condition(ResultDetectionConditionText),107,Not Detected at Reporting Limit,Not Detected at Reporting Limit,11/6/2018 13:01,Non-Detect
Result Detection Condition(ResultDetectionConditionText),86,Not Present,data that was looked for but not found and without laboratory reporting limits or method detection limits http://www.abcwua.org/Substances_Not_Found.aspx,11/30/2017 7:36,Non-Detect
Result Detection Condition(ResultDetectionConditionText),2,Not Reported,data was collected but not analyzed. (data has not been quantified),7/26/2006 10:57,Non-Detect
Result Detection Condition(ResultDetectionConditionText),3,Present Above Quantification Limit,data was found greater than defined laboratory reporting limits or method detection limits. typically for colony count and rare occasions where value is above highest calibration curve point concentration.,7/26/2006 10:57,Over-Detect
Result Detection Condition(ResultDetectionConditionText),5,Present Below Quantification Limit,data was found less than defined laboratory reporting limits or method detection limits,7/26/2006 10:57,Non-Detect
Result Detection Condition(ResultDetectionConditionText),10,Reported in Raw Data (attached),Continuous or time series sampling attached in raw data file,6/26/2015 2:48,Other
Result Detection Condition(ResultDetectionConditionText),6,Trace,"a very small quantity, especially one too small to be accurately measured.",8/5/2015 14:54,Non-Detect
Result Detection Condition(ResultDetectionConditionText),22,Unable to Measure,"Measurement or sample collection was attempted but unobtainable due to site conditions, equipment failure, or other circumstances",8/30/2023 14:03,Other
Result Detection Condition(ResultDetectionConditionText),19,Value Decensored,Value Decensored - reconstruct or remove the objectionable values of a measurement set,12/21/2015 14:45,Other
Result Detection Condition(ResultDetectionConditionText),23,Value affected by contamination,Value affected by contamination as determined for USGS NWIS data; Analyte was detected in both the environmental sample and the associated blanks.,11/9/2023 21:50,Other
NA,NA,*Non-detect,Hard-coded legacy detection condition,8/7/2023 12:00,Non-Detect
Loading

0 comments on commit bd319aa

Please sign in to comment.