如何检查非时间序列数据的增加和减少趋势

发布于 2025-01-13 00:15:53 字数 2779 浏览 3 评论 0 原文

我有以下数据框：

dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", 
"P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", 
"P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", 
"0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", 
"3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", 
"20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 
1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 
1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 
1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 
1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148
)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"
))

该图如下所示：

我想要的是一个可以区分向上（P2、P1）的指标和非上升（P4 和 P3）趋势。正如您可以看到线性模型的R值用于此目的是没有用的。例如，P3 中的 R 分数为正，就像 P2 和 P1 一样。

我怎样才能在 R 中做到这一点？

这是我必须创建该图的代码：

library(tidyverse)
library(broom)
library(ggpubr)

lm_rsq_dat <- dat %>% 
    mutate(dose = as.numeric(gsub("mM", "", dose))) %>% 
    group_by(peptide_name) %>% 
    do(model = glance(lm(dose ~ prolif_score, data = .))) %>% 
    unnest(model) %>% 
    arrange(desc(adj.r.squared)) %>%
    dplyr::select(peptide_name, adj.r.squared) %>% 
    print(n = 100)  
  

# Plot --------------------------------------------------------------------
plot_dat <- dat %>% 
  left_join(lm_rsq_dat, by = "peptide_name") %>%
  mutate(r.squared = formatC(adj.r.squared, format = "e", digits = 2)) %>% 
  mutate(npeptide_name = paste0( peptide_name, " (R=", r.squared, ")")) #%>% 

nspn <- plot_dat %>% 
  dplyr::select(peptide_name, npeptide_name, adj.r.squared) %>% 
  arrange(match(peptide_name, rsq_dat$peptide_name)) %>% 
  unique() %>% 
  pull(npeptide_name)


plot_dat <- plot_dat %>% 
  mutate(npeptide_name = factor(npeptide_name, levels =   nspn))


end_dat <- plot_dat %>% 
  filter(dose == "20mM")

  ggline(plot_dat,
         y = "prolif_score", x = "dose",
         color = "npeptide_name", 
         size = 1, 
         facet.by = "npeptide_name", scales = "free_y",
         palette = get_palette("npg",  length( dat$peptide_name))
  ) +
  xlab("Dose") +
  ylab("Prolif. Score") +
  grids(linetype = "dashed")  +
  rremove("legend") + 
theme(axis.text.x=element_text(angle = 60, hjust = 0.5, vjust = 0.5, size = 12))

原文

I have the following data frame:

dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", 
"P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", 
"P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", 
"0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", 
"3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", 
"20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 
1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 
1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 
1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 
1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148
)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"
))

The plot looks like this:

What I want to have is an indicator that can differentiate between upward (P2, P1)
and non-upward (P4 and P3) trend. As you can see the R value of linear model used for that is not useful.
For example R score in P3 is positive just like P2 and P1.

How can I do that in R?

This is the code I have to create that plot:

library(tidyverse)
library(broom)
library(ggpubr)

lm_rsq_dat <- dat %>% 
    mutate(dose = as.numeric(gsub("mM", "", dose))) %>% 
    group_by(peptide_name) %>% 
    do(model = glance(lm(dose ~ prolif_score, data = .))) %>% 
    unnest(model) %>% 
    arrange(desc(adj.r.squared)) %>%
    dplyr::select(peptide_name, adj.r.squared) %>% 
    print(n = 100)  
  

# Plot --------------------------------------------------------------------
plot_dat <- dat %>% 
  left_join(lm_rsq_dat, by = "peptide_name") %>%
  mutate(r.squared = formatC(adj.r.squared, format = "e", digits = 2)) %>% 
  mutate(npeptide_name = paste0( peptide_name, " (R=", r.squared, ")")) #%>% 

nspn <- plot_dat %>% 
  dplyr::select(peptide_name, npeptide_name, adj.r.squared) %>% 
  arrange(match(peptide_name, rsq_dat$peptide_name)) %>% 
  unique() %>% 
  pull(npeptide_name)


plot_dat <- plot_dat %>% 
  mutate(npeptide_name = factor(npeptide_name, levels =   nspn))


end_dat <- plot_dat %>% 
  filter(dose == "20mM")

  ggline(plot_dat,
         y = "prolif_score", x = "dose",
         color = "npeptide_name", 
         size = 1, 
         facet.by = "npeptide_name", scales = "free_y",
         palette = get_palette("npg",  length( dat$peptide_name))
  ) +
  xlab("Dose") +
  ylab("Prolif. Score") +
  grids(linetype = "dashed")  +
  rremove("legend") + 
theme(axis.text.x=element_text(angle = 60, hjust = 0.5, vjust = 0.5, size = 12))

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

楠木可依 2025-01-20 00:15:53

Kendall 和 Spearman 相关性检验都评估基于等级的相关性，因此读出一个值与另一个值单调变化的程度。只需运行 cor(x, y, method = "kendall") 即可获得此信息。

library(tidyverse)
  
dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", "P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", "P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"))

dat_proc <- dat %>% 
  mutate(dose = parse_number(dose),
         unit = "nM",
         peptide = parse_number(peptide_name)) 

dat_proc %>% 
  group_split(peptide_name) %>% 
  map(~cor(.x$prolif_score, .x$dose, method = "kendall")) %>%
  map(data.frame) %>% 
  map(rename, kendall = 1) %>% 
  bind_rows(.id = "peptide") %>% 
  mutate(peptide = as.numeric(peptide)) %>% 
  left_join(dat_proc, .) %>% 
  ggplot(aes(factor(dose), prolif_score, color = kendall)) +
  geom_point() +
  geom_text(aes(x = 0, y = 2, label = paste0("kendall correlation \n coefficent = ", kendall)), hjust = -0.2) +
  geom_line(aes(group = peptide)) +
  facet_wrap(~peptide, ncol = 2)
#> Joining, by = "peptide"

^{由 reprex 包 (v2.0.1)}

Both the Kendall and Spearman correlation tests assess rank-based correlation and therefore read out on the degree to which a value is is changing monotonically with another. This can be obtained by simply running cor(x, y, method = "kendall").

library(tidyverse)
  
dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", "P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", "P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"))

dat_proc <- dat %>% 
  mutate(dose = parse_number(dose),
         unit = "nM",
         peptide = parse_number(peptide_name)) 

dat_proc %>% 
  group_split(peptide_name) %>% 
  map(~cor(.x$prolif_score, .x$dose, method = "kendall")) %>%
  map(data.frame) %>% 
  map(rename, kendall = 1) %>% 
  bind_rows(.id = "peptide") %>% 
  mutate(peptide = as.numeric(peptide)) %>% 
  left_join(dat_proc, .) %>% 
  ggplot(aes(factor(dose), prolif_score, color = kendall)) +
  geom_point() +
  geom_text(aes(x = 0, y = 2, label = paste0("kendall correlation \n coefficent = ", kendall)), hjust = -0.2) +
  geom_line(aes(group = peptide)) +
  facet_wrap(~peptide, ncol = 2)
#> Joining, by = "peptide"

^{Created on 2022-03-10 by the reprex package (v2.0.1)}

回复收藏 0 原文

~没有更多了~