2

我想找到一种与这个问题非常相似的方法。 列中的每次更改增加 1

但是我想在 var1 = c 使用 时重新启动计数器df$var2 <- with(rle(as.character(df$var1)), rep(seq_along(values), lengths))*

结果列 var 2

var1 var2 Should be
   a    1   1
   a    1   1
   1    2   2
   0    3   3
   b    4   4
   b    4   4
   b    4   4
   c    5   1
   1    6   2
   1    6   2
4

2 回答 2

2

data.table您可以使用rleid获取var1每个组内的运行长度 ID。

library(data.table)

setDT(df)
df[, var2 := rleid(var1), by = cumsum(var1 == "c")]
df

#    var1 var2
# 1:    a    1
# 2:    a    1
# 3:    1    2
# 4:    0    3
# 5:    b    4
# 6:    b    4
# 7:    b    4
# 8:    c    1
# 9:    1    2
#10:    1    2

并使用dplyr

library(dplyr)

df %>%
  group_by(group = cumsum(var1 == "c")) %>%
  mutate(var2 = cumsum(var1 != lag(var1, default = first(var1))) + 1)

数据

df <- structure(list(var1 = structure(c(3L, 3L, 2L, 1L, 4L, 4L, 4L, 
5L, 2L, 2L), .Label = c("0", "1", "a", "b", "c"), class = "factor")), 
class = "data.frame", row.names = c(NA, -10L))
于 2019-12-05T00:11:37.550 回答
0

我们可以使用 OP 的代码 with rlein base Rwithave

df$var2 <- with(df,  as.integer(ave(as.character(var1), cumsum(var1 == 'c'), 
       FUN = function(x) with(rle(x), rep(seq_along(values), lengths)))))
df$var2
#[1] 1 1 2 3 4 4 4 1 2 2

数据

df <- structure(list(var1 = structure(c(3L, 3L, 2L, 1L, 4L, 4L, 4L, 
5L, 2L, 2L), .Label = c("0", "1", "a", "b", "c"), class = "factor")), 
class = "data.frame", row.names = c(NA, 
-10L))
于 2019-12-05T02:54:56.953 回答