R语言实现多序列比对(MSA)可视化
- 2020 年 2 月 25 日
- 筆記
BiocManager::install("treeio") BiocManager::install("Biostrings") BiocManager::install("ggtree") install.packages("ggmsa") install.packages("seqmagick") install.packages("cowplot")
接下来我们看下其核心函数ggmsa:

其中的参数,我们不做赘述,其实都很明显了我们直接进入实战:
sequences <-system.file("extdata", "sample.fasta", package ="ggmsa") ggmsa(sequences, 320, 360, color ="Clustal")

其它的颜色我们就不一一试了。接下来我们看下,font=NULL,去掉背景之后的样子:
ggmsa(sequences, 320, 360, font = NULL,color = "Chemistry_AA")

然后,我们看下增加了进化树的综合绘图:
library(Biostrings) x <- readAAStringSet(sequences) d <- as.dist(stringDist(x, method ="hamming")/width(x)[1]) library(ape) tree <- bionj(d) library(ggtree) p <- ggtree(tree ) + geom_tiplab() data = tidy_msa(x, 164, 213) p + geom_facet(geom = geom_msa, data =data, panel = 'msa', font = NULL, color= "Chemistry_AA") +xlim_tree(1)

最后我们看下和基因logo结合的MSA绘图:
f <- system.file("extdata","LeaderRepeat_All.fa", package = "ggmsa") s <- readDNAStringSet(f) strings <- as.character(s) p1 <- ggmsa(s, font = NULL, color ='Chemistry_NT') library(ggseqlogo) library(cowplot) p2 <- axis_canvas(p1, axis='x')+geom_logo(strings, 'probability') pp <- insert_xaxis_grob(p1, p2,position="top", grid::unit(.05, "null")) ggdraw(pp)

欢迎大家学习交流!