R:计算滚动窗口和不同组的协方差
我想计算我的面板数据集的滚动协方差。
数据看起来像这样:
structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C"), Date = c("01.08.2018", "02.08.2018", "03.08.2018",
"04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018", "08.08.2018",
"09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018", "13.08.2018",
"14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018", "18.08.2018",
"01.08.2018", "02.08.2018", "03.08.2018", "04.08.2018", "05.08.2018",
"06.08.2018", "07.08.2018", "08.08.2018", "09.08.2018", "10.08.2018",
"11.08.2018", "12.08.2018", "13.08.2018", "14.08.2018", "15.08.2018",
"16.08.2018", "17.08.2018", "18.08.2018", "01.08.2018", "02.08.2018",
"03.08.2018", "04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018",
"08.08.2018", "09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018",
"13.08.2018", "14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018",
"18.08.2018", "19.08.2018", "20.08.2018", "21.08.2018", "22.08.2018",
"23.08.2018", "24.08.2018", "25.08.2018", "26.08.2018", "27.08.2018"
), Y = c(-0.021104989, 0.005829159, -0.005993838, 0.012343494,
0.000735194, -0.005313411, -0.023474922, -0.006807478, -0.002674863,
-0.004020429, -0.001613125, 0.005300896, 0.018783846, 0.009664223,
-0.00810932, 0.00757471, 0.008253359, 0.003760981, -0.001052405,
-0.007286345, -0.008488191, -0.002538641, -0.009791452, 0.000446472,
-0.002980809, -0.009180535, 0.008837298, -0.0027743, 0.006254873,
0.001064582, 0.00178255, 0.005754829, 0.004967109, 0.005663851,
0.012421897, 0.002893901, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, 0.002353228, -0.003833742,
-0.018048155, -0.003742604, 0.001912499, 0.012315676, -0.011545789,
-0.008412867, -0.008541933, -0.009458445, -0.016025502, -0.012383462,
-2.21e-05, -0.014338361, 0.016535732, -0.000234764), X = c(-0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953, -0.004816989, -0.012074368, -0.016623213,
-0.000674701, 0.005609272, -0.002664995, -0.004614426, -0.020395375,
0.000400389, -0.007096134, 0.003788953, -0.004816989, -0.012074368,
-0.016623213, -0.000674701, 0.005609272, -0.002664995, -0.004614426,
-0.020395375, 0.000400389, -0.007096134, 0.003788953, -0.004816989,
-0.012074368, -0.016623213, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, -0.000674701, 0.005609272,
-0.002664995, -0.004614426, -0.020395375, 0.000400389, -0.007096134,
0.003788953, -0.004816989, -0.012074368, -0.016623213, -0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953)), class = "data.frame", row.names = c(NA,
-63L))
我的真实数据包括每组超过1000个观测值。 现在,我想以每组和每个日期为250个观测值的滚动时间窗口计算X和Y之间的协方差。为了不要失去太多的观察结果,我希望我也有协方差,即使有250个以前的观察值可用。在这种情况下,只是第4行中可能的最大观察量,例如,第4行A仅使用先前的三个观察值。
我走了这么远:
Data <- Data %>%
group_by(Name) %>%
mutate(Covariance=cov(X,Y))
但是,如何添加滚动时间窗口?
感谢您的帮助。
I would like to calculate the rolling covariance for my panel dataset.
The data looks like this:
structure(list(Name = c("A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C", "C",
"C", "C", "C"), Date = c("01.08.2018", "02.08.2018", "03.08.2018",
"04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018", "08.08.2018",
"09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018", "13.08.2018",
"14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018", "18.08.2018",
"01.08.2018", "02.08.2018", "03.08.2018", "04.08.2018", "05.08.2018",
"06.08.2018", "07.08.2018", "08.08.2018", "09.08.2018", "10.08.2018",
"11.08.2018", "12.08.2018", "13.08.2018", "14.08.2018", "15.08.2018",
"16.08.2018", "17.08.2018", "18.08.2018", "01.08.2018", "02.08.2018",
"03.08.2018", "04.08.2018", "05.08.2018", "06.08.2018", "07.08.2018",
"08.08.2018", "09.08.2018", "10.08.2018", "11.08.2018", "12.08.2018",
"13.08.2018", "14.08.2018", "15.08.2018", "16.08.2018", "17.08.2018",
"18.08.2018", "19.08.2018", "20.08.2018", "21.08.2018", "22.08.2018",
"23.08.2018", "24.08.2018", "25.08.2018", "26.08.2018", "27.08.2018"
), Y = c(-0.021104989, 0.005829159, -0.005993838, 0.012343494,
0.000735194, -0.005313411, -0.023474922, -0.006807478, -0.002674863,
-0.004020429, -0.001613125, 0.005300896, 0.018783846, 0.009664223,
-0.00810932, 0.00757471, 0.008253359, 0.003760981, -0.001052405,
-0.007286345, -0.008488191, -0.002538641, -0.009791452, 0.000446472,
-0.002980809, -0.009180535, 0.008837298, -0.0027743, 0.006254873,
0.001064582, 0.00178255, 0.005754829, 0.004967109, 0.005663851,
0.012421897, 0.002893901, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, 0.002353228, -0.003833742,
-0.018048155, -0.003742604, 0.001912499, 0.012315676, -0.011545789,
-0.008412867, -0.008541933, -0.009458445, -0.016025502, -0.012383462,
-2.21e-05, -0.014338361, 0.016535732, -0.000234764), X = c(-0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953, -0.004816989, -0.012074368, -0.016623213,
-0.000674701, 0.005609272, -0.002664995, -0.004614426, -0.020395375,
0.000400389, -0.007096134, 0.003788953, -0.004816989, -0.012074368,
-0.016623213, -0.000674701, 0.005609272, -0.002664995, -0.004614426,
-0.020395375, 0.000400389, -0.007096134, 0.003788953, -0.004816989,
-0.012074368, -0.016623213, -0.000674701, 0.005609272, -0.002664995,
-0.004614426, -0.020395375, 0.000400389, -0.007096134, 0.003788953,
-0.004816989, -0.012074368, -0.016623213, -0.000674701, 0.005609272,
-0.002664995, -0.004614426, -0.020395375, 0.000400389, -0.007096134,
0.003788953, -0.004816989, -0.012074368, -0.016623213, -0.000674701,
0.005609272, -0.002664995, -0.004614426, -0.020395375, 0.000400389,
-0.007096134, 0.003788953)), class = "data.frame", row.names = c(NA,
-63L))
My real data consists of over 1000 observations per group.
Now I would like to calculate the covariance between X and Y with a rolling time window of 250 observations for each group and for each Date. In order not to lose too many observations, I would like that I also have a covariance even if there are less than 250 prior observations available. In such cases just the the maximum amount of observations that are possible, e.g., in row 4 for Name A just use the three prior observations.
I have come so far:
Data <- Data %>%
group_by(Name) %>%
mutate(Covariance=cov(X,Y))
However, how can I add the rolling time window?
I appreciate a lot your help.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
data:image/s3,"s3://crabby-images/d5906/d59060df4059a6cc364216c4d63ceec29ef7fe66" alt="扫码二维码加入Web技术交流群"
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
您还可以使用
roll
软件包。例如,可以像这样实现4号尺寸的滚动窗口:
You can also use the
roll
package.For example, a rolling window of size 4 can be achieved like so:
您可以使用
slide2_dbl
从slide
软件包进行此操作,该软件包与dplyr
:结果很好地工作
You could do this using
slide2_dbl
from theslide
package, which works nicely withdplyr
:Result