R function to evaluate expression over variable and create new variables filling in with logical -
df1 (below) event log. variable 1 consists of (non-unique) timestamps (posixct). variables 2:4 consist of attributes of events (factors).
i've created df2 , df3 define time bins. df2 stores initial time , df3 end time each time bin.
question how expand df1 variable names of df2 (which same df3) while filling in true or false each event, based on wether event belongs 1 of time bins of variable.
in other words, if event belongs time bin (as defined df2 , df3) value true, otherwise false. each event in df1 needs checked against time bins (all pairs of elements of df2 , 3), 1 variable (of df2&3) @ time.
due large number of variables , events, cannot interactively. learn how r way, avoiding explicit loops, , taking advantage of vectorization.
data (small sampled datasets)
df1 <- data.frame(time.stamp = c("2015-01-05 15:00:00", "2015-01-05 15:01:00", "2015-01-05 15:02:00", "2015-01-05 15:02:00", "2015-01-05 15:03:00", "2015-01-05 15:03:00", "2015-01-05 15:03:00", "2015-01-05 15:03:00"), g.id = as.factor(c("848", "737", "848", "848", "737", "848", "737", "737")) ) df1$time.stamp <- as.posixct(strptime(df1$time.stamp, "%y-%m-%d %h:%m:%s")) df2 <- data.frame(m0p1 = c("2015-01-05 15:00:00", "2015-01-05 16:00:00", "2015-01-05 17:00:00"), m1p1 = c("2015-01-05 15:01:00", "2015-01-05 16:01:00", "2015-01-05 17:01:00"), m2p1 = c("2015-01-05 15:02:00", "2015-01-05 16:02:00", "2015-01-05 17:02:00"), m3p1 = c("2015-01-05 15:03:00", "2015-01-05 16:03:00", "2015-01-05 17:03:00") ) df2$m0p1 <- as.posixct(strptime(df2$m0p1, "%y-%m-%d %h:%m:%s")) df2$m1p1 <- as.posixct(strptime(df2$m1p1, "%y-%m-%d %h:%m:%s")) df2$m2p1 <- as.posixct(strptime(df2$m2p1, "%y-%m-%d %h:%m:%s")) df2$m3p1 <- as.posixct(strptime(df2$m3p1, "%y-%m-%d %h:%m:%s")) df3 <- data.frame(m0p1 = c("2015-01-05 15:01:00", "2015-01-05 16:01:00", "2015-01-05 17:01:00"), m1p1 = c("2015-01-05 15:02:00", "2015-01-05 16:02:00", "2015-01-05 17:02:00"), m2p1 = c("2015-01-05 15:03:00", "2015-01-05 16:03:00", "2015-01-05 17:03:00"), m3p1 = c("2015-01-05 15:04:00", "2015-01-05 16:04:00", "2015-01-05 17:04:00") ) df3$m0p1 <- as.posixct(strptime(df3$m0p1, "%y-%m-%d %h:%m:%s")) df3$m1p1 <- as.posixct(strptime(df3$m1p1, "%y-%m-%d %h:%m:%s")) df3$m2p1 <- as.posixct(strptime(df3$m2p1, "%y-%m-%d %h:%m:%s")) df3$m3p1 <- as.posixct(strptime(df3$m3p1, "%y-%m-%d %h:%m:%s"))
result result this:
> head(df1.extended) time.stamp g.id m0p1 m1p1 m2p1 m3p1 1 2015-01-05 15:00:00 848 true false false false 2 2015-01-05 15:01:00 737 false true false false 3 2015-01-05 15:02:00 848 false false true false 4 2015-01-05 15:02:00 848 false false true false 5 2015-01-05 15:03:00 737 false false false true 6 2015-01-05 15:03:00 848 false false false true 7 2015-01-05 15:03:00 737 false false false true 8 2015-01-05 15:03:00 848 false false false true
any pointers appreciated. thanks
you use foverlaps
package data.table:
library(reshape2) df2 <- melt(df2, value.name = "start") df3 <- melt(df3, value.name = "end") df2$end <- df3$end library(data.table) setdt(df1) setdt(df2) df1[, time.stamp2 := time.stamp] setkey(df2, start, end) res <- df2[, foverlaps(df1, .sd, by.x = c("time.stamp", "time.stamp2"), by.y = c("start", "end"), type = "start")[,list(time.stamp, g.id, match = !is.na(start))], = variable] res[, id := seq_len(.n), = variable] dcast(res, id + time.stamp + g.id ~ variable, value.var = "match") # id time.stamp g.id m0p1 m1p1 m2p1 m3p1 # 1 1 2015-01-05 15:00:00 848 true false false false # 2 2 2015-01-05 15:01:00 737 false true false false # 3 3 2015-01-05 15:02:00 848 false false true false # 4 4 2015-01-05 15:02:00 848 false false true false # 5 5 2015-01-05 15:03:00 737 false false false true # 6 6 2015-01-05 15:03:00 848 false false false true # 7 7 2015-01-05 15:03:00 737 false false false true # 8 8 2015-01-05 15:03:00 737 false false false true
Comments
Post a Comment