The way of df [df [, base::c ('key1', 'key2')] |> base::duplicated.data.frame () |> base::which ()]
could only show the surpluses part of the duplicates.
You can use this to filt rows which key(s) is appears more than once:
library (magrittr)
#' @name check_duprows
#' @description
#'
#' check duplicated rows by key(s) in df
#'
#' @example
#' `df %>% check_duprows (key1, key2, ...)`
#'
#' @references
#' - main: [ans-62616469](https://mcmap.net/q/329135/-find-duplicated-rows-based-on-2-columns-in-data-frame-in-r/62616469#62616469)
#' - select except: [ans-49515461](https://mcmap.net/q/331556/-dplyr-select-all-variables-except-for-those-contained-in-vector/49515461#49515461)
#' - sort/order/arrange: [ans-6871968](https://mcmap.net/q/45220/-sort-order-data-frame-rows-by-multiple-columns/6871968#6871968)
#'
check_duprows =
function (df, ..., .show_all = F) df %>%
dplyr::group_by (...) %>%
dplyr::mutate (
.dup_count = dplyr::n (),
.dup_rownum = dplyr::row_number ()) %>%
(dplyr::ungroup) %>%
dplyr::mutate (
.is_duplicated = .dup_rownum > 1,
.has_duplicated = .dup_count > 1) %>%
(\ (tb) if (.show_all) tb else tb %>%
dplyr::filter (.has_duplicated) %>%
dplyr::select (- tidyselect::one_of ('.has_duplicated'))) %>%
dplyr::arrange (...) %>%
{.} ;
Then just use like:
df %>% check_duprows (key1, key2, ...)
Such as:
base::data.frame (
RIC = base::c (
'S1A.PA', 'ABC.PA', 'EFG.PA',
'S1A.PA', 'ABC.PA', 'EFG.PA'),
Date = base::c (
'2011-06-30 20:00:00',
'2011-07-03 20:00:00',
'2011-07-04 20:00:00',
'2011-07-05 20:00:00',
'2011-07-03 20:00:00',
'2011-07-04 20:00:00'),
Open = stats::runif (n=6, min=20, max=30)
) -> df
df %>% check_duprows (RIC, Date)
And you can also define a uniquer by this function:
unique_duprows =
function (df, ...) df %>%
check_duprows(..., .show_all = T) %>%
dplyr::filter(!.is_duplicated) %>%
dplyr::select(- tidyselect::one_of (
'.has_duplicated',
'.is_duplicated',
'.dup_count',
'.dup_rownum')) %>%
{.} ;
df %>% dplyr::arrange (Open) %>% unique_duprows (RIC, Date)
It's just like a distinct
finction !!
Demo on webr and shinylive.