YU Guangchuang
Models are approximations or simplications of real-world systems
((virus8:0.05,((virus6:0.21,
(((virus2:0.39,virus1):0.28):0.2,
virus3):0.26):0.54,
(virus5:0.53,virus4))):0.1):0.41):0.92):0.98,
virus7)):0.02):0.08):0.83,
((virus10:0.85,virus9)):0.5):0.98);
R
is still limitedggtree
ape
)ape
)
ape
)ape
)phylobase
)phylobase
)OutbreakTools
)phyloseq
)library(ggtree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
rst_file <- system.file("examples/rst", package="ggtree")
mlc_file <- system.file("examples/mlc", package="ggtree")
beast_tree <- read.beast(beast_file)
codeml_tree <- read.codeml(rst_file, mlc_file)
merged_tree <- merge_tree(beast_tree, codeml_tree)
get.fields(merged_tree)
## [1] "height" "height_0.95_HPD" "height_median"
## [4] "height_range" "length" "length_0.95_HPD"
## [7] "length_median" "length_range" "posterior"
## [10] "rate" "rate_0.95_HPD" "rate_median"
## [13] "rate_range" "t" "N"
## [16] "S" "dN_vs_dS" "dN"
## [19] "dS" "N_x_dN" "S_x_dS"
## [22] "marginal_subs" "joint_subs" "marginal_AA_subs"
## [25] "joint_AA_subs"
fortify
methods for converting tree objects to tidy datadata.frame
is easy to manipulate, while tree object is hardfortify(merged_tree) %>% dplyr::as_data_frame()
## Source: local data frame [151 x 34]
##
## node parent branch.length x y label isTip
## (int) (int) (dbl) (dbl) (dbl) (chr) (lgl)
## 1 1 115 1.7132967 21.273403 42 A/Hokkaido/30-1-a/2013 TRUE
## 2 2 94 0.4987642 12.273403 19 A/New_York/334/2004 TRUE
## 3 3 149 1.1007898 13.273403 20 A/New_York/463/2005 TRUE
## 4 4 81 1.4515245 7.273403 5 A/New_York/452/1999 TRUE
## 5 5 150 0.7584041 13.273403 22 A/New_York/238/2005 TRUE
## 6 6 80 0.9862567 6.273403 4 A/New_York/523/1998 TRUE
## 7 7 85 1.0140833 8.273403 9 A/New_York/435/2000 TRUE
## 8 8 78 1.9788317 4.273403 2 A/New_York/582/1996 TRUE
## 9 9 79 1.1150139 4.273403 3 A/New_York/603/1996 TRUE
## 10 10 77 2.2734027 2.273403 1 A/New_York/657/1994 TRUE
## .. ... ... ... ... ... ... ...
## Variables not shown: branch (dbl), angle (dbl), height (dbl),
## height_0.95_HPD (chr), height_median (dbl), height_range (chr), length
## (dbl), length_0.95_HPD (chr), length_median (dbl), length_range (chr),
## posterior (dbl), rate (dbl), rate_0.95_HPD (chr), rate_median (dbl),
## rate_range (chr), t (dbl), N (dbl), S (dbl), dN_vs_dS (dbl), dN (dbl),
## dS (dbl), N_x_dN (dbl), S_x_dS (dbl), marginal_subs (chr), joint_subs
## (chr), marginal_AA_subs (chr), joint_AA_subs (chr)
df <- fortify(merged_tree)
df <- df[, c("dN_vs_dS", "dN", "dS", "rate")]
df <- na.omit(df)
df <- df[df$dN_vs_dS >=0 & df$dN_vs_dS <=1.5,] %>%
tidyr::gather(type, value, dN_vs_dS:dS)
df$type[df$type == "dN_vs_dS"] <- "dN/dS"
df$type %<>% factor(., levels=c("dN/dS", "dN", "dS"))
ggplot(df, aes(rate, value)) + geom_hex() +
facet_wrap(~type, scale="free_y")
I’m often asked how we make nice annotated tree figures for our papers. The real answer is “with creativity, practice and a lot of trial and error”. But of course the answer people are looking for is what tools do we use.
There are LOADS of different tools out there for plotting trees in various ways. I’ve listed my favourites for basic tree viewing below, but these really only work for quite simple visualisations, have differing and often non-overlapping feature sets, and are hard to customise. My solution to such problems is generally to turn to Python + R, and that is exactly the case here.
Kart Holt, https://holtlab.net/2015/10/03/plotting-trees-data/
Tools listed: Dendroscrope, FigTree, MicroReact, Interactive tree of life (iTOL) and ScripTree.
ggtree
packageggplot2
to support tree objects
ggplot2
-based tree viewersplotggphy
, plot_tree
) based on ggplot2
ggtree
packageggplot2
ggplot
function to support tree objectsgeom_tree
: extends ggplot2
to support tree structuregeom_tree
: geometric layer to support tree structurep <- ggplot(beast_tree, aes(x,y)) + geom_tree()
theme_tree
& theme_tree2
multiplot(p + theme_tree(), p + theme_tree2(), ncol=2)
ggtree
= ggplot()
+ geom_tree()
+ theme_tree()
ggtree(beast_tree)
ggtree
= ggplot()
+ geom_tree()
+ theme_tree()
ggtree(beast_tree, branch.length='none', color='slateblue',
layout='fan', open.angle=180, size=2)
ggtree
= ggplot()
+ geom_tree()
+ theme_tree()
ggtree(beast_tree, aes(color=rate), linetype='dashed')
ggplot2
can be used directly in ggtree
p <- ggtree(beast_tree)
(p + geom_point(aes(color=isTip), size=1) +
geom_text(aes(label=round(posterior*100)),
hjust=-.1, size=2.5)) %>% viewClade(node=118)
ggplo2
doesn’t support subsettingggtree
provides modified version of geometric layers to support it
(p + geom_point2(aes(subset= !is.na(posterior) & posterior > .75),
color='firebrick')) %>% viewClade(node=118)
geom_treescale
displays the tree scalep <- p + geom_treescale(x=0, y=10, offset=2)
geom_tiplab
: layer of tip labels (taxa names)p2 <- p + geom_tiplab(size=2)
p <- p + geom_tiplab(align=T, size=2, linesize=.3) + xlim(NA, 30)
multiplot(p2, p, ncol=2)
geom_hilight
: highlight clade with background colorp <- p + geom_hilight(node=118, fill='blue', alpha=.4)
geom_cladelabel
: labeling cladep <- p + geom_cladelabel(118, 'a selected clade', offset=6.5, offset.text=.5,
barsiz=3, color='darkgreen', angle=90, hjust='center')
geom_range
: displays uncertainty of branch lengthsp <- p + geom_range('height_0.95_HPD', color='red', alpha=.5)
geom_taxalink
: connecting related taxap <- p + geom_taxalink(45, 19, color='purple', arrow=arrow(length=unit(0.02, 'npc'))) +
geom_taxalink(46, 18, color='blue', arrow=arrow(length=unit(0.02, 'npc')))
## p + geom_tippoint(aes(color=grepl("Swine", label)))
tip <- get.tree(beast_tree)$tip.label
host <- rep("Human", length(tip))
host[grep("Swine", tip)] <- "Swine"
host.df <- data.frame(taxa=tip, host=host)
p %<+% host.df + geom_tippoint(aes(color=host))
library(ggtree)
beast_file <- system.file("examples/MCC_FluA_H3.tree", package="ggtree")
beast_tree <- read.beast(beast_file)
ggtree(beast_tree) + geom_treescale(x=0, y=10, offset=2) +
geom_tiplab(align=T, size=2, linesize=.3) + xlim(NA, 30) +
geom_hilight(node=118, fill='blue', alpha=.4) +
geom_cladelabel(118, 'a selected clade', offset=6.5,
offset.text=.5, barsiz=3, color='darkgreen',
angle=90, hjust='center') +
geom_range('height_0.95_HPD', color='red', alpha=.5) +
geom_taxalink(45, 19, color='purple',
arrow=arrow(length=unit(0.02, 'npc'))) +
geom_taxalink(46, 18, color='blue',
arrow=arrow(length=unit(0.02, 'npc'))) +
geom_tippoint(aes(color=grepl("Swine", label)))
p1 <- ggtree(merged_tree, aes(color=rate)) +
theme_tree2() + geom_tiplab(size=1.5) +
scale_color_continuous(high='red', low='green')
p2 <- p1 %<% rescale_tree(merged_tree, 'dN') + aes(color=dN)
multiplot(p1+xlim(NA, 25)+ggtitle('time-scaled tree'),
p2+xlim(NA, 0.07)+ggtitle('dN tree'), ncol=2)
ggplot2
R
R
merge_tree
: allows evolution evidences to be merged and comparedfortify
methods: convert tree objects into tidy data framegeom_tree
: extends ggplot2
to support tree structure