library(middlesnake)
library(dmcognigen)
library(dplyr)
library(DT)
library(leaflet)
library(sf)
library(geojsonsf)
library(DT)Load Columbia County HUC12 watersheds layer from Geoserver
columbia_huc12s <- middlesnake::get_huc_by_cd(district_name = "Columbia")Load NRCS Practices spreadsheet
Dataset provided by SCC
data_path <- system.file("extdata", "20040101-20240430 NRCS Practices_edited_v3.xlsx", package = "middlesnake")
nrcs_data <- readxl::read_xlsx(data_path) %>%
janitor::clean_names()How many unique watersheds in file?
Warning: Practice code 430 has > 1 practice names .
nrcs_data %>%
cnt(practice_code,n_distinct_vars = practice_name) %>%
filter(n_practice_name>1)
#> # A tibble: 1 × 4
#> practice_code n_practice_name n n_cumulative
#> <chr> <int> <int> <int>
#> 1 430 2 1106 105975
nrcs_data %>%
filter(practice_code==430) %>%
distinct(practice_name)
#> # A tibble: 2 × 1
#> practice_name
#> <chr>
#> 1 Irrigation Pipeline
#> 2 Irrigation Water ConveyanceFilter the NRCS data to only include HUC12 in Columbia County
nrcs_columbia <- nrcs_data %>%
filter(huc12 %in% columbia_huc12s$huc12 ) %>%
rename(same_code=land_unit_has_multiple_instances_of_same_practice_code,
same_code_and_year=land_unit_has_multiple_instances_of_same_practice_code_applied_the_same_year)How many Huc12s in Columbia County?
Checking Duplicates
If the same practice code is applied multiple times on the same land unit on the same year, the data is most likely duplicated and should be removed before summing applied amounts
-NRCS Conservation Practices Data: Overview and FAQs Oct-2024
All land_unit_id have 1 distinct huc12 in Columbia
County
nrcs_columbia %>%
cnt(land_unit_id,n_distinct_vars = huc12) %>%
filter(n_huc12>1)
#> # A tibble: 0 × 4
#> # ℹ 4 variables: land_unit_id <chr>, n_huc12 <int>, n <int>, n_cumulative <int>In Columbia County we have 9084 distinct records by
land_unit_id,applied_year, and
practice_code
nrcs_columbia %>%
cnt(n_distinct_vars = c(land_unit_id,applied_year,practice_code))
#> # A tibble: 1 × 6
#> n_land_unit_id n_applied_year n_practice_code n_land_unit_id_applied_y…¹ n
#> <int> <int> <int> <int> <int>
#> 1 2408 21 112 9084 13516
#> # ℹ abbreviated name: ¹n_land_unit_id_applied_year_practice_code
#> # ℹ 1 more variable: n_cumulative <int>Check that distinct records by
land_unit_id,applied_year, and
practice_code match distinct by
applied_amount,
land_unit_id,applied_year, and
practice_code
nrcs_columbia %>%
cnt(n_distinct_vars = c(land_unit_id,applied_amount,applied_year,practice_code))
#> # A tibble: 1 × 7
#> n_land_unit_id n_applied_amount n_applied_year n_practice_code
#> <int> <int> <int> <int>
#> 1 2408 1146 21 112
#> # ℹ 3 more variables:
#> # n_land_unit_id_applied_amount_applied_year_practice_code <int>, n <int>,
#> # n_cumulative <int>
duplicated_practices <- nrcs_columbia %>%
group_by(land_unit_id,practice_code,applied_year) %>%
mutate(duplicates_id_yr_code=1:n()) %>%
ungroup() %>%
group_by(land_unit_id,practice_code,applied_year,applied_amount) %>%
mutate(duplicates_id_yr_code_amt=1:n()) %>%
ungroup()
ids_where_amt_is_different <- duplicated_practices %>%
filter(duplicates_id_yr_code>1) %>%
filter(!duplicates_id_yr_code==duplicates_id_yr_code_amt) %>%
pull(land_unit_id) %>%
unique()listings
duplicated_listings <- duplicated_practices %>%
filter(land_unit_id %in% ids_where_amt_is_different) %>%
select(land_unit_id,practice_name,applied_amount,applied_date,applied_year,starts_with("dup"), program) %>%
arrange(land_unit_id,applied_year)%>%
split(~land_unit_id)Discussion
Note: Consider applied_amount and or
program before removing/collapsing duplicates by
land_unit_id,applied_year, and
practice_code
Example 1
Could be a duplicate but amount different
DT::datatable(duplicated_listings[["{C7966B9E-B2E4-4FED-97BA-8C242B40C33A}"]])Example 2
DT::datatable(duplicated_listings[["{EA8850AA-582F-4603-8270-0963585DF9E5}"]])Example 3
Keep 77 and 102.7 ?
DT::datatable(duplicated_listings[["{FD9D5AB7-7A8E-4020-9FAA-81B392DC7EB4}"]])Example 4
Look at Upland Wildlife Habitat Management in 2010. Not considering applied amount would probably remove the record where applied amount is 1
DT::datatable(duplicated_listings[["{DAA01A27-1A9C-4CD0-8D21-64C427176BF1}"]])Example 5
Look at Forest Stand Improvement in 2023. Removing duplicates of either 1.1 or 2.9 is probably incorrect. Forest Stand Improvement 2023 sum = 4 Forest Stand Improvement 2022 sum = 4
DT::datatable(duplicated_listings[["{AE32D5A2-6BA9-4DD2-AB35-F30F548BE118}"]])Top 20 examples of most practice code duplicates
nrcs_columbia %>%
cnt(land_unit_id,applied_year,n_distinct_vars = practice_code) %>%
arrange(desc(n_practice_code)) %>%
slice(1:20)
#> # A tibble: 20 × 5
#> land_unit_id applied_year n_practice_code n n_cumulative
#> <chr> <dbl> <int> <int> <int>
#> 1 {1FCE00C8-38CF-44D1-B5CA-034… 2010 9 9 1613
#> 2 {1FCE00C8-38CF-44D1-B5CA-034… 2011 9 9 1622
#> 3 {1FCE00C8-38CF-44D1-B5CA-034… 2012 9 9 1631
#> 4 {1FCE00C8-38CF-44D1-B5CA-034… 2013 9 9 1640
#> 5 {1FCE00C8-38CF-44D1-B5CA-034… 2014 9 9 1649
#> 6 {C7AA77F0-F338-46C9-80AE-DF0… 2010 9 9 10183
#> 7 {C7AA77F0-F338-46C9-80AE-DF0… 2011 9 9 10192
#> 8 {C7AA77F0-F338-46C9-80AE-DF0… 2012 9 9 10201
#> 9 {C7AA77F0-F338-46C9-80AE-DF0… 2013 9 9 10210
#> 10 {C7AA77F0-F338-46C9-80AE-DF0… 2014 9 9 10219
#> 11 {E9FF2BE2-08D0-4025-A79C-174… 2016 8 8 12396
#> 12 {2F6D84DC-A1F0-4D37-8558-B6E… 2022 7 10 2522
#> 13 {469DB704-8FA6-4439-8213-963… 2013 7 18 3623
#> 14 {857A3952-0641-41CA-8EA0-16E… 2013 7 15 6808
#> 15 {03F66BFF-3263-43BC-BA99-D7D… 2006 6 6 203
#> 16 {0B238478-C34F-4EAC-A0CC-BDB… 2007 6 6 561
#> 17 {0CD4A764-0227-4E80-AD4C-927… 2007 6 6 653
#> 18 {131BAF71-F051-4D0D-B4FD-039… 2006 6 6 903
#> 19 {16AC9FB6-D71D-441D-A187-D99… 2006 6 6 1112
#> 20 {1A7EBCBC-7A6B-48FF-9348-9CF… 2021 6 6 1287