2

我正在尝试使用条件“ccode”=“Ticker”、“Date”=“Date”和“Time”=“Timestamp”合并两个表。但是,如果“时间”不完全匹配,则应查看“Timeint”(最多 -2 分钟)。因为这是我不能做的事情left_join——我在想fuzzy_left_join

我遇到两个问题:

  1. 我的数据存储在 bigquery 上,我可以做一个left_join工作。但是,当我尝试 a 时fuzzy_left_join,我收到以下错误消息:

“错误:tibble 中的所有列都必须是 1d 或 2d 对象:* Columncol为 NULL

  1. 我试图制作一个可重现的样本(这有点不同,因为它没有存储为 2 的列表),它也不起作用。

这是一个可重现的示例:

library(fuzzyjoin)
library(anytime)

calls.sample <- data.frame(ccode = c("MMM", "K", "A", "CAG", "PM"),
                           Date = c(20111020, 20111021, 20120102, 20110510, 20080710),
                           Time = c("09:30:00", "14:30:00", "11:00:00", "15:30:00", "13:00:00"),
                           Timeint = c("9:28:00", "14:28:00", "10:58:00", "15:28:00", "12:58:00")
                           )


str(calls.sample$Time)
calls.sample$Time <- as_hms(as.character(calls.sample$Time))
calls.sample$Timeint <- as_hms(as.character(calls.sample$Timeint))


stocks.sample <- data.frame(Ticker = c("MMM", "K", "A", "CAG", "PM"),
                            Date = c(20111020, 20111021, 20120102, 20110510, 20080710),
                            Timestamp = c("9:28:00", "14:30:00", "11:00:00", "15:30:00", "13:00:00"),
                            OpenPrice = c(5, 1,6,7,8))

stocks.sample$Timestamp <- as_hms(as.character(stocks.sample$Timestamp))

fuzzy_left_join(
  calls.sample, stocks.sample,
  by = c(
    "ccode" = "Ticker", 
    "Date" = "Date", 
    "Time" = "Timestamp",
    "Timeint" = "Timestamp"
  ),
  match_fun = list(`==`, `==`, `<=`, `>=`)
)

我在寻找:

ccode   Date     Time    OpenPrice
1   MMM 20111020 09:30:00 5
2     K 20111021 14:30:00 1
3     A 20120102 11:00:00 6
4   CAG 20110510 15:30:00 7
5    PM 20080710 13:00:00 8

我很感激这方面的任何帮助:)!

4

1 回答 1

0

我运行了你的代码,它运行没有错误。虽然结果是 NA,但我用最后两个列表项修复了一件事

match_fun = list(`==`, `==`, `>=`, `<=`)

并得到了你想要的结果!

library(fuzzyjoin, quietly = TRUE); library(anytime, quietly = TRUE); library(hms, quietly = TRUE)
#> Warning: package 'fuzzyjoin' was built under R version 3.6.3
#> Warning: package 'anytime' was built under R version 3.6.3
calls.sample <- data.frame(ccode = c("MMM", "K", "A", "CAG", "PM"),
                           Date = c(20111020, 20111021, 20120102, 20110510, 20080710),
                           Time = c("09:30:00", "14:30:00", "11:00:00", "15:30:00", "13:00:00"),
                           Timeint = c("9:28:00", "14:28:00", "10:58:00", "15:28:00", "12:58:00"))
calls.sample$Time <- as_hms(as.character(calls.sample$Time))
calls.sample$Timeint <- as_hms(as.character(calls.sample$Timeint))
stocks.sample <- data.frame(Ticker = c("MMM", "K", "A", "CAG", "PM"),
                            Date = c(20111020, 20111021, 20120102, 20110510, 20080710),
                            Timestamp = c("9:28:00", "14:30:00", "11:00:00", "15:30:00", "13:00:00"),
                            OpenPrice = c(5, 1,6,7,8))
stocks.sample$Timestamp <- as_hms(as.character(stocks.sample$Timestamp))

fuzzy_left_join(calls.sample, stocks.sample,
                by = c("ccode" = "Ticker", 
                       "Date" = "Date", 
                       "Time" = "Timestamp",
                       "Timeint" = "Timestamp"),
                match_fun = list(`==`, `==`, `>=`, `<=`))
#>   ccode   Date.x     Time  Timeint Ticker   Date.y Timestamp OpenPrice
#> 1   MMM 20111020 09:30:00 09:28:00    MMM 20111020  09:28:00         5
#> 2     K 20111021 14:30:00 14:28:00      K 20111021  14:30:00         1
#> 3     A 20120102 11:00:00 10:58:00      A 20120102  11:00:00         6
#> 4   CAG 20110510 15:30:00 15:28:00    CAG 20110510  15:30:00         7
#> 5    PM 20080710 13:00:00 12:58:00     PM 20080710  13:00:00         8

reprex 包于 2020-10-20 创建(v0.3.0)

于 2020-10-20T07:39:38.007 回答