Skip to contents

Load Columbia County HUC12 watersheds layer from Geoserver


columbia_huc12s <- middlesnake::get_huc_by_cd(district_name = "Columbia")

Load NRCS Practices spreadsheet

Dataset provided by SCC


data_path <- system.file("extdata", "20040101-20240430 NRCS Practices_edited_v3.xlsx", package = "middlesnake")

nrcs_data <- readxl::read_xlsx(data_path) %>%
  janitor::clean_names()

How many unique watersheds in file?

nrcs_data %>%
    pull(huc12) %>%
    unique() %>%
    length()
#> [1] 1289

Warning: Practice code 430 has > 1 practice names .

nrcs_data %>%
    cnt(practice_code,n_distinct_vars = practice_name) %>%
    filter(n_practice_name>1)
#> # A tibble: 1 × 4
#>   practice_code n_practice_name     n n_cumulative
#>   <chr>                   <int> <int>        <int>
#> 1 430                         2  1106       105975
nrcs_data %>%
    filter(practice_code==430) %>%
    distinct(practice_name)
#> # A tibble: 2 × 1
#>   practice_name              
#>   <chr>                      
#> 1 Irrigation Pipeline        
#> 2 Irrigation Water Conveyance

Filter the NRCS data to only include HUC12 in Columbia County

nrcs_columbia <-  nrcs_data %>%
  filter(huc12 %in% columbia_huc12s$huc12 ) %>%
  rename(same_code=land_unit_has_multiple_instances_of_same_practice_code,
         same_code_and_year=land_unit_has_multiple_instances_of_same_practice_code_applied_the_same_year)

How many Huc12s in Columbia County?

nrcs_columbia %>%
  cnt(n_distinct_vars = huc12)
#> # A tibble: 1 × 3
#>   n_huc12     n n_cumulative
#>     <int> <int>        <int>
#> 1      36 13516        13516

Checking Duplicates

If the same practice code is applied multiple times on the same land unit on the same year, the data is most likely duplicated and should be removed before summing applied amounts

-NRCS Conservation Practices Data: Overview and FAQs Oct-2024

All land_unit_id have 1 distinct huc12 in Columbia County

nrcs_columbia %>%
    cnt(land_unit_id,n_distinct_vars = huc12) %>%
   filter(n_huc12>1)
#> # A tibble: 0 × 4
#> # ℹ 4 variables: land_unit_id <chr>, n_huc12 <int>, n <int>, n_cumulative <int>

In Columbia County we have 9084 distinct records by land_unit_id,applied_year, and practice_code

nrcs_columbia %>%
    cnt(n_distinct_vars = c(land_unit_id,applied_year,practice_code))
#> # A tibble: 1 × 6
#>   n_land_unit_id n_applied_year n_practice_code n_land_unit_id_applied_y…¹     n
#>            <int>          <int>           <int>                      <int> <int>
#> 1           2408             21             112                       9084 13516
#> # ℹ abbreviated name: ¹​n_land_unit_id_applied_year_practice_code
#> # ℹ 1 more variable: n_cumulative <int>

Check that distinct records by land_unit_id,applied_year, and practice_code match distinct by applied_amount, land_unit_id,applied_year, and practice_code

nrcs_columbia %>%
    cnt(n_distinct_vars = c(land_unit_id,applied_amount,applied_year,practice_code))
#> # A tibble: 1 × 7
#>   n_land_unit_id n_applied_amount n_applied_year n_practice_code
#>            <int>            <int>          <int>           <int>
#> 1           2408             1146             21             112
#> # ℹ 3 more variables:
#> #   n_land_unit_id_applied_amount_applied_year_practice_code <int>, n <int>,
#> #   n_cumulative <int>
duplicated_practices <- nrcs_columbia %>%
    group_by(land_unit_id,practice_code,applied_year) %>%
    mutate(duplicates_id_yr_code=1:n()) %>%
    ungroup() %>%
    group_by(land_unit_id,practice_code,applied_year,applied_amount) %>%
    mutate(duplicates_id_yr_code_amt=1:n()) %>%
    ungroup()
ids_where_amt_is_different <- duplicated_practices %>%
     filter(duplicates_id_yr_code>1) %>%
     filter(!duplicates_id_yr_code==duplicates_id_yr_code_amt) %>%
     pull(land_unit_id) %>%
     unique()

listings


duplicated_listings <- duplicated_practices %>%
    filter(land_unit_id %in% ids_where_amt_is_different) %>%
    select(land_unit_id,practice_name,applied_amount,applied_date,applied_year,starts_with("dup"), program) %>%
    arrange(land_unit_id,applied_year)%>%
    split(~land_unit_id)

Discussion

Example 1

Could be a duplicate but amount different

DT::datatable(duplicated_listings[["{C7966B9E-B2E4-4FED-97BA-8C242B40C33A}"]])

Example 2

DT::datatable(duplicated_listings[["{EA8850AA-582F-4603-8270-0963585DF9E5}"]])

Example 3

Keep 77 and 102.7 ?

DT::datatable(duplicated_listings[["{FD9D5AB7-7A8E-4020-9FAA-81B392DC7EB4}"]])

Example 4

Look at Upland Wildlife Habitat Management in 2010. Not considering applied amount would probably remove the record where applied amount is 1

DT::datatable(duplicated_listings[["{DAA01A27-1A9C-4CD0-8D21-64C427176BF1}"]])

Example 5

Look at Forest Stand Improvement in 2023. Removing duplicates of either 1.1 or 2.9 is probably incorrect. Forest Stand Improvement 2023 sum = 4 Forest Stand Improvement 2022 sum = 4

DT::datatable(duplicated_listings[["{AE32D5A2-6BA9-4DD2-AB35-F30F548BE118}"]])

Top 20 examples of most practice code duplicates

nrcs_columbia %>%
cnt(land_unit_id,applied_year,n_distinct_vars = practice_code) %>%
arrange(desc(n_practice_code)) %>%
slice(1:20)
#> # A tibble: 20 × 5
#>    land_unit_id                  applied_year n_practice_code     n n_cumulative
#>    <chr>                                <dbl>           <int> <int>        <int>
#>  1 {1FCE00C8-38CF-44D1-B5CA-034…         2010               9     9         1613
#>  2 {1FCE00C8-38CF-44D1-B5CA-034…         2011               9     9         1622
#>  3 {1FCE00C8-38CF-44D1-B5CA-034…         2012               9     9         1631
#>  4 {1FCE00C8-38CF-44D1-B5CA-034…         2013               9     9         1640
#>  5 {1FCE00C8-38CF-44D1-B5CA-034…         2014               9     9         1649
#>  6 {C7AA77F0-F338-46C9-80AE-DF0…         2010               9     9        10183
#>  7 {C7AA77F0-F338-46C9-80AE-DF0…         2011               9     9        10192
#>  8 {C7AA77F0-F338-46C9-80AE-DF0…         2012               9     9        10201
#>  9 {C7AA77F0-F338-46C9-80AE-DF0…         2013               9     9        10210
#> 10 {C7AA77F0-F338-46C9-80AE-DF0…         2014               9     9        10219
#> 11 {E9FF2BE2-08D0-4025-A79C-174…         2016               8     8        12396
#> 12 {2F6D84DC-A1F0-4D37-8558-B6E…         2022               7    10         2522
#> 13 {469DB704-8FA6-4439-8213-963…         2013               7    18         3623
#> 14 {857A3952-0641-41CA-8EA0-16E…         2013               7    15         6808
#> 15 {03F66BFF-3263-43BC-BA99-D7D…         2006               6     6          203
#> 16 {0B238478-C34F-4EAC-A0CC-BDB…         2007               6     6          561
#> 17 {0CD4A764-0227-4E80-AD4C-927…         2007               6     6          653
#> 18 {131BAF71-F051-4D0D-B4FD-039…         2006               6     6          903
#> 19 {16AC9FB6-D71D-441D-A187-D99…         2006               6     6         1112
#> 20 {1A7EBCBC-7A6B-48FF-9348-9CF…         2021               6     6         1287