A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/boopsboops/delimtools-testing below:

GitHub - boopsboops/delimtools-testing

For code to acquire the Geophagus dataset please follow instructions in acquire-sequence-data.md.

##################
#### packages ####
##################

# load R packages
source(here::here("scripts/load-libs.R"))


##################
###### setup #####
##################

# set temporary working directory in 'temp/'
today.dir <- glue::glue('Results_{Sys.Date()}')
today.path <- here::here("temp",today.dir)
if(!dir.exists(today.path)) {dir.create(today.path,recursive=TRUE)}


##################
### load data ####
##################

# load tree and fasta files
coi.geophagus.haps.raxml.tr <- ape::read.tree(here::here("assets/coi.geophagus.haps.raxml.nwk"))
coi.geophagus.haps.beast.tr <- treeio::read.beast(here::here("assets/coi.geophagus.haps.beast.tre"))
coi.geophagus.haps.df <- readr::read_csv(here::here("assets/coi.geophagus.haps.csv"),show_col_types=FALSE)
coi.geophagus.haps.fa <- ape::read.FASTA(here::here("assets/coi.geophagus.haps.fasta"))


##################
#### run morph ###
##################

# `morph_tbl()` summarises any two vectors of individual labels and species delimitations
# here we use the species names as reported on NCBI
ncbi.df <- delimtools::morph_tbl(labels=dplyr::pull(coi.geophagus.haps.df,gbAccession),sppVector=dplyr::pull(coi.geophagus.haps.df,scientificName),delimname="ncbi")

# print the delimitation table 
ncbi.df |> delimtools::report_delim(tabulate=FALSE)
ncbi.df |> delimtools::report_delim(tabulate=TRUE)


##################
#### run gmyc ####
##################

# General Mixed Yule Coalescent model
# Monaghan et al. (2009); https://doi.org/10.1093/sysbio/syp027
# implemented in the `splits` package

# convert tree to phylo object
coi.geophagus.haps.beast.tr.phy <- treeio::as.phylo(coi.geophagus.haps.beast.tr)

# check if tree is binary (should be TRUE)
ape::is.binary(coi.geophagus.haps.beast.tr.phy)

# run gmyc
set.seed(42)
gmyc.res <- splits::gmyc(coi.geophagus.haps.beast.tr.phy,method="single",interval=c(0,5),quiet=FALSE)
summary(gmyc.res)

# make df
gmyc.df <- delimtools::gmyc_tbl(gmyc.res)
gmyc.df |> delimtools::report_delim(tabulate=FALSE)


#################
### run bgmyc ###
#################

# Bayesian General Mixed Yule Coalescent model
# Reid & Carstens (2012); https://doi.org/10.1186/1471-2148-12-196
# implemented in the `bGMYC` package

# run bgmyc
set.seed(42)
bgmyc.res.single <- bGMYC::bgmyc.singlephy(coi.geophagus.haps.beast.tr.phy,mcmc=11000,burnin=1000,thinning=100,t1=2,t2=length(coi.geophagus.haps.beast.tr.phy$tip.label),start=c(1,0.5,50))

# make df
bgmyc.df <- delimtools::bgmyc_tbl(bgmyc.res.single,ppcutoff=0.05)
bgmyc.df |> delimtools::report_delim(tabulate=FALSE)


##################
### run locmin ###
##################

# localMinima distance threshold method
# Brown et al. (2012; https://doi.org/10.1111/j.1755-0998.2011.03108.x)
# implemented in the `spider` package using genetic distance matrix

# make distance matrix in APE
mat <- ape::dist.dna(coi.geophagus.haps.fa,model="raw",pairwise.deletion=TRUE)
lmin <- spider::localMinima(as.matrix(mat))

# plot to check threshold is sensible
plot(lmin); abline(v=lmin$localMinima[1],col="red")
locmin.df <- delimtools::locmin_tbl(mat,threshold=lmin$localMinima[1])
locmin.df |> delimtools::report_delim(tabulate=FALSE)


###########################
### run locmin treedist ###
###########################

# localMinima distance threshold method
# Brown et al. (2012; https://doi.org/10.1111/j.1755-0998.2011.03108.x)
# implemented in the `spider` package using patristic tree distances

# patristic tree distances
tr.mat <- ape::cophenetic.phylo(coi.geophagus.haps.raxml.tr) |> as.dist()
tr.lmin <- spider::localMinima(tr.mat)

# plot to check threshold is sensible
plot(tr.lmin); abline(v=tr.lmin$localMinima[1],col="red")
treedist.df <- delimtools::locmin_tbl(tr.mat,threshold=tr.lmin$localMinima[1],delimname="treedist")
treedist.df |> delimtools::report_delim(tabulate=FALSE)


##################
##### run 2% #####
##################

# standard DNA barcoding distance threshold cutoff of 2%
# e.g. Ward (2009); https://doi.org/10.1111/j.1755-0998.2009.02541.x
# can be changed to any value

percent.df <- delimtools::locmin_tbl(mat,threshold=0.02,delimname="percent")
percent.df |> delimtools::report_delim(tabulate=FALSE)


##################
#### run asap ####
##################

# ASAP (Assemble Species by Automatic Partitioning_
# Puillandre et al. (2021); https://doi.org/10.1111/1755-0998.13281

# requires path to ASAP executable on your system
file.exists(here::here("software/ASAP/bin/asap"))# should be TRUE
asap.df <- delimtools::asap_tbl(infile=here::here("assets/coi.geophagus.haps.fasta"),exe=here::here("software/ASAP/bin/asap"),model=3)

# or requires path to results produced by the ASAP webserver @ https://bioinfo.mnhn.fr/abi/public/asap/asapweb.html
asap.df <- delimtools::asap_tbl(webserver=here::here("assets/asap.Partition_1.csv"))
asap.df |> delimtools::report_delim(tabulate=FALSE)


##################
#### run abgd ####
##################

# ABGD (Automatic Barcode Gap Discovery)
# Puillandre et al. (2011); https://doi.org/10.1111/j.1365-294X.2011.05239.x

# requires path to ABGD executable on your system
file.exists(here::here("software/Abgd/bin/abgd")) # should be TRUE
abgd.df <- delimtools::abgd_tbl(infile=here::here("assets/coi.geophagus.haps.fasta"),slope=0.5,exe=here::here("software/Abgd/bin/abgd"),model=3)

# or requires path to results produced by the ABGD webserver @ https://bioinfo.mnhn.fr/abi/public/abgd/abgdweb.html
abgd.df <- delimtools::abgd_tbl(webserver=here::here("assets/abgd.groupe1.txt"))
abgd.df |> delimtools::report_delim(tabulate=FALSE)


################
### run mptp ###
################

# mPTP (Multi-Rate Poisson Tree Processes)
# Kapli et al. (2017); https://doi.org/10.1093/bioinformatics/btx025

# requires path to mPTP executable on your system
file.exists(here::here("software/mptp/bin/mptp")) # should be TRUE

# estimate minimum branch lengths (minbrlen) setting
delimtools::min_brlen(tree=here::here("assets/coi.geophagus.haps.raxml.nwk"),n=10)
mptp.df <- delimtools::mptp_tbl(infile=here::here("assets/coi.geophagus.haps.raxml.nwk"),exe=here::here("software/mptp/bin/mptp"),method="single",minbrlen=0.001)

# or requires path to results produced by the mPTP webserver @ https://mptp.h-its.org/#/tree
mptp.df <- delimtools::mptp_tbl(webserver=here::here("assets/mptp.webserver.txt"))
mptp.df |> delimtools::report_delim(tabulate=FALSE)


################
### run pnet ###
################

# statistical parsimony network
# Templeton et al. (1992); https://doi.org/10.1093/genetics/132.2.619
# implemented in `haplotypes` package

pnet <- haplotypes::parsimnet(haplotypes::as.dna(as.matrix(coi.geophagus.haps.fa)),indels="sic",prob=0.95)
pnet.df <- delimtools:::parsimnet_tbl(dna=coi.geophagus.haps.fa, parsimnet=pnet)
pnet.df |> delimtools::report_delim(tabulate=FALSE)


##################
### join delims ##
##################

# combine all delimitation methods into one table
all.delims.df <- delimtools::delim_join(list(ncbi.df,pnet.df,mptp.df,gmyc.df,bgmyc.df,locmin.df,treedist.df,percent.df,asap.df,abgd.df))
all.delims.df |> delimtools::report_delim(tabulate=FALSE)
all.delims.df |> delimtools::report_delim(tabulate=TRUE)

# make consensus 
all.delims.df |> delim_consensus(n_match=5) |> delimtools::report_delim(tabulate=TRUE)

# match ratio congruence
all.delims.df |> delimtools::match_ratio() |> dplyr::arrange(desc(match_ratio)) |> knitr::kable() |> print()


##################
# subsample data #
##################

# clean and subsample
set.seed(42)
coi.geophagus.haps.df.sub <- coi.geophagus.haps.df |> 
    mutate(scientificName=str_replace_all(scientificName,"_AMX-2021","")) |> 
    mutate(scientificName=str_replace_all(scientificName,"_"," ")) |> 
    slice_sample(n=8,by=scientificName)

# subsample the delims
all.delims.df.sub <- all.delims.df |> filter(labels %in% pull(coi.geophagus.haps.df.sub,gbAccession))

# subample tips
#source(here("../delimtools/R/delim_autoplot2.R"))
coi.geophagus.haps.beast.tr.sub <- coi.geophagus.haps.beast.tr |> tidytree::keep.tip(pull(coi.geophagus.haps.df.sub,gbAccession))


##################
### plot delims ##
##################

# make tip label table
tip.tab <- coi.geophagus.haps.df.sub |> 
    dplyr::mutate(labs=glue::glue("{gbAccession} | {scientificName}")) |> 
    dplyr::select(gbAccession,labs,scientificName)

# get cols
cols <- delimtools::delim_brewer(delim=all.delims.df,package="viridisLite",palette="viridis",seed=42)
cols <- delimtools::delim_brewer(delim=all.delims.df,package="viridisLite",palette="plasma",seed=42)
cols <- delimtools::delim_brewer(delim=all.delims.df,package="RColorBrewer",palette="Set2",seed=42)
cols <- delimtools::delim_brewer(delim=all.delims.df,package="randomcoloR",seed=42)
cols <- delimtools::delim_brewer(delim=all.delims.df.sub)

# plot and save
#source(here("../delimtools/R/delim_autoplot.R"))
p <- delimtools::delim_autoplot(delim=all.delims.df.sub,tr=coi.geophagus.haps.beast.tr.sub,tbl_labs=tip.tab,col_vec=cols,hexpand=0.3,widths=c(0.4,0.1),n_match=3,delim_order=c("asap","abgd","locmin","percent","gmyc","bgmyc","mptp","ncbi","parsimnet"),consensus=TRUE)
ggplot2::ggsave(here::here(today.path,"geophagus-delimitation.pdf"),plot=p,height=500,width=400,units="mm")


# autoplot2
p <- delimtools::delim_autoplot2(delim=all.delims.df.sub, tr=coi.geophagus.haps.beast.tr.sub, consensus=TRUE, n_match= 5, tbl_labs=tip.tab, species="scientificName",hexpand= 0.1, widths= c(0.5, 0.2))
ggplot2::ggsave(here::here(today.path,"geophagus-delimitation.pdf"),plot=p,height=500,width=400,units="mm")


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4