RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from https://github.com/dozmorovlab/HiCcompare/raw/supplemental/supplemental_files/S6_File.Rmd below:

--- title: Extended evaluation of differential chromatin interaction detection analysis using real Hi-C data author: "John Stansfield, Mikhail Dozmorov" output: pdf_document: toc: yes html_document: toc: yes always_allow_html: yes --- ```{r setup, echo=FALSE, message=FALSE, warning=FALSE} # Set up the environment library(knitr) opts_chunk$set(cache.path='cache/', fig.path='img/', cache=F, tidy=T, fig.keep='high', echo=F, dpi=100, warnings=F, message=F, comment=NA, warning=F, results='as.is', fig.width = 10, fig.height = 3) #out.width=700, library(pander) panderOptions('table.split.table', Inf) set.seed(1) library(dplyr) options(stringsAsFactors = FALSE) ``` ```{r libraries} library(chromoR) library(pROC) library(MLmetrics) library(HiTC) library(Matrix) library(GenomicRanges) library(HiCcompare) library(ggplot2) library(gridExtra) library(data.table) ``` ```{r} githubURL <- "https://github.com/dozmorovlab/HiCcompare/raw/supplemental/Supplemental_data/S6_File_data.RData" load(url(githubURL)) chr1.tab = create.hic.table(S6.dpnii.chr1, S6.mbol.chr1, chr = 'chr1', scale = FALSE) ``` ```{r, warning=FALSE, message=FALSE} ROC_on_replicate = function(hic.table, FC, plot.roc = TRUE, plot.MD = T, numChanges = 500, alpha = 0.05, diff.thresh = NA) { if (numChanges < 1) { stop('numChanges must be > 0') } # run hic_loess and get what shows up as a difference hic.table = hic_loess(hic.table, Plot = plot.MD, check.differences = T, diff.thresh = diff.thresh) # detected_first = which(hic.table$p.value < alpha) # add in true differences sample.space = 1:nrow(hic.table) #sample.space = sample.space[-detected_first] if ( numChanges > 0) { changes = sample(sample.space, numChanges) whichIF = ifelse(hic.table[changes, ]$M < 0, -1, 1) newIF1 = FC^whichIF * hic.table[changes,]$IF1 newIF1 = as.integer(round(newIF1)) hic.table[changes,]$IF1 = newIF1 hic.table = hic.table[, M := log2(IF2/IF1)] hic.table = hic.table[, 1:10, with=F] } ### normalize using other methods # convert hic.table to full matrices for other methods to work mat1 = sparse2full(hic.table[, c('start1', 'start2', 'IF1'), with=F]) mat2 = sparse2full(hic.table[, c('start1', 'start2', 'IF2'), with=F]) # chromoR start = as.numeric(colnames(mat1)) end = as.numeric(colnames(mat1)) seg = data.frame(chr = hic.table$chr1[1], start = start, end = end) # correct simulated matrices using chromoR's methods sim1.chromor = correctCIM(mat1, seg) sim2.chromor = correctCIM(mat2, seg) colnames(sim1.chromor$mCorrected) = seg$start colnames(sim2.chromor$mCorrected) = seg$start sim1.chromor = full2sparse(sim1.chromor$mCorrected) sim2.chromor = full2sparse(sim2.chromor$mCorrected) chromoR.table = create.hic.table(sim1.chromor, sim2.chromor, scale = TRUE, chr = hic.table$chr1[1]) # ICE sim1.ice = Matrix(mat1) sim2.ice = Matrix(mat2) # create HTCexp object for simulated data xgi = GRanges(seqnames = 'chr1', ranges = IRanges(start = start, end = end, names = paste('a', 1:length(start), sep=''))) ygi = GRanges(seqnames = 'chr1', ranges = IRanges(start = start, end = end, names = paste('b', 1:length(start), sep=''))) colnames(sim1.ice) <- id(xgi) rownames(sim1.ice) <- id(ygi) sim1.ice = new('HTCexp', sim1.ice, xgi, ygi) colnames(sim2.ice) <- id(xgi) rownames(sim2.ice) <- id(ygi) sim2.ice = new('HTCexp', sim2.ice, xgi, ygi) # normalize with ICE sim1.ice = normICE(sim1.ice, max_iter = 100) sim2.ice = normICE(sim2.ice, max_iter = 100) # convert to hic.table colnames(sim1.ice@intdata) = start colnames(sim2.ice@intdata) = start sim1.ice = full2sparse(as.matrix(sim1.ice@intdata)) sim2.ice = full2sparse(as.matrix(sim2.ice@intdata)) ice.table = create.hic.table(sim1.ice, sim2.ice, scale = TRUE, chr = hic.table$chr1[1]) # KR # remove any columns of 0's zeros1 = which(colSums(mat1) == 0) zeros2 = which(colSums(mat2) == 0) if (length(zeros1) > 0) { cr.mat1 = mat1[-zeros1, -zeros1] } else { cr.mat1 = mat1 } if (length(zeros2) > 0) { cr.mat2 = mat2[-zeros2, -zeros2] } else { cr.mat2 = mat2 } sim1.kr = KRnorm(cr.mat1) sim2.kr = KRnorm(cr.mat2) colnames(sim1.kr) = colnames(cr.mat1) colnames(sim2.kr) = colnames(cr.mat2) sim1.kr = full2sparse(sim1.kr) sim2.kr = full2sparse(sim2.kr) kr.table = create.hic.table(sim1.kr, sim2.kr, scale = TRUE, chr = hic.table$chr1[1]) # SCN sim1.scn = SCN(mat1) sim2.scn = SCN(mat2) sim1.scn = full2sparse(sim1.scn) sim2.scn = full2sparse(sim2.scn) scn.table = create.hic.table(sim1.scn, sim2.scn, chr=hic.table$chr1[1], scale=TRUE) # MA ma.table = create.hic.table(full2sparse(mat1), full2sparse(mat2), chr = hic.table$chr1[1], scale = TRUE) ma.table = MA_norm(ma.table) # detect differences chromoR.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] ice.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] kr.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] scn.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] loess.result = hic_loess(hic.table, Plot = plot.MD, check.differences = TRUE, diff.thresh = diff.thresh) chromoR.result = hic_diff(chromoR.table, Plot = plot.MD, diff.thresh = diff.thresh) ice.result = hic_diff(ice.table, Plot = plot.MD, diff.thresh = diff.thresh) kr.result = hic_diff(kr.table, Plot = plot.MD, diff.thresh = diff.thresh) scn.result = hic_diff(scn.table, Plot = plot.MD, diff.thresh = diff.thresh) ma.result = hic_diff(ma.table, Plot = plot.MD, diff.thresh = diff.thresh) # make pval and truth vectors loess.p = loess.result$p.value chromoR.p = chromoR.result$p.value ice.p = ice.result$p.value kr.p = kr.result$p.value scn.p = scn.result$p.value ma.p = ma.result$p.value # make truth vector - indicator for if value had fold change applied to it # normalization changes dimensions of sparse matrices so need to loop through changes to make truth vector chromoR.changes = vector() ice.changes = vector() kr.changes = vector() scn.changes = vector() if ( numChanges > 0) { for (i in 1:length(changes)) { reg1 = hic.table$start1[changes[i]] reg2 = hic.table$start2[changes[i]] chromoR.changes[i] = ifelse(!length(which(chromoR.result$start1 == reg1 & chromoR.result$start2 == reg2)), NA, which(chromoR.result$start1 == reg1 & chromoR.result$start2 == reg2)) ice.changes[i] = ifelse(!length(which(ice.result$start1 == reg1 & ice.result$start2 == reg2)), NA, which(ice.result$start1 == reg1 & ice.result$start2 == reg2)) kr.changes[i] = ifelse(!length(which(kr.result$start1 == reg1 & kr.result$start2 == reg2)), NA, which(kr.result$start1 == reg1 & kr.result$start2 == reg2)) scn.changes[i] = ifelse(!length(which(scn.result$start1 == reg1 & scn.result$start2 == reg2)), NA, which(scn.result$start1 == reg1 & scn.result$start2 == reg2)) } chromoR.changes = as.vector(na.omit(chromoR.changes)) ice.changes = as.vector(na.omit(ice.changes)) kr.changes = as.vector(na.omit(kr.changes)) scn.changes = as.vector(na.omit(scn.changes)) # make truth vectors loess.truth = rep(0, nrow(loess.result)) loess.truth[changes] = 1 chromoR.truth = rep(0, nrow(chromoR.result)) chromoR.truth[chromoR.changes] = 1 ice.truth = rep(0, nrow(ice.result)) ice.truth[ice.changes] = 1 kr.truth = rep(0, nrow(kr.result)) kr.truth[kr.changes] = 1 scn.truth = rep(0, nrow(scn.result)) scn.truth[scn.changes] = 1 ma.truth = loess.truth } else { loess.truth = rep(0, nrow(loess.result)) chromoR.truth = rep(0, nrow(chromoR.result)) ice.truth = rep(0, nrow(ice.result)) kr.truth = rep(0, nrow(kr.result)) scn.truth = rep(0, nrow(scn.result)) } # roc loess.roc = roc(response = loess.truth, predictor = loess.p) chromoR.roc = roc(response = chromoR.truth, predictor = chromoR.p) ice.roc = roc(response = ice.truth, predictor = ice.p) kr.roc = roc(response = kr.truth, predictor = kr.p) scn.roc = roc(response = scn.truth, predictor = scn.p) ma.roc = roc(response = ma.truth, predictor = ma.p) # F1 loess.F1 = F1_Score(loess.truth, ifelse(loess.p < alpha, 1, 0)) chromoR.F1 = F1_Score(chromoR.truth, ifelse(chromoR.p < alpha, 1, 0)) ice.F1 = F1_Score(ice.truth, ifelse(ice.p < alpha, 1, 0)) kr.F1 = F1_Score(kr.truth, ifelse(kr.p < alpha, 1, 0)) scn.F1 = F1_Score(scn.truth, ifelse(scn.p < alpha, 1, 0)) ma.F1 = F1_Score(ma.truth, ifelse(ma.p < alpha, 1, 0)) # AUC loess.auc = auc(loess.roc) chromoR.auc = auc(chromoR.roc) ice.auc = auc(ice.roc) kr.auc = auc(kr.roc) scn.auc = auc(scn.roc) ma.auc = auc(ma.roc) if (plot.roc) { plot_colors = c('black', 'blue', 'red', 'green', 'purple') plot(loess.roc, col = plot_colors[1], main = paste('Fold Change = ', FC, sep='')) plot(chromoR.roc, col = plot_colors[2], add=T) plot(ice.roc, col = plot_colors[3], add=T) plot(kr.roc, col = plot_colors[4], add=T) plot(scn.roc, col = plot_colors[4], add=T) legend('bottomright', inset = 0, legend = c('loess', 'chromoR', 'ICE', 'KR', 'SCN'), title = 'Normalization Method', fill = plot_colors, horiz = F) #, cex=1.5) } if (numChanges > 0) { loess.true.pos = sum(loess.p[changes] < alpha) loess.false.pos = sum(loess.p[-changes] < alpha) loess.false.neg = sum(loess.p[changes] >= alpha) loess.true.neg = length(loess.p) - loess.true.pos - loess.false.pos - loess.false.neg chromoR.true.pos = sum(chromoR.p[chromoR.changes] < alpha) chromoR.false.pos = sum(chromoR.p[-chromoR.changes] < alpha) chromoR.false.neg = sum(chromoR.p[chromoR.changes] >= alpha) chromoR.true.neg = length(chromoR.p) - chromoR.true.pos - chromoR.false.pos - chromoR.false.neg ice.true.pos = sum(ice.p[ice.changes] < alpha) ice.false.pos = sum(ice.p[-ice.changes] < alpha) ice.false.neg = sum(ice.p[ice.changes] >= alpha) ice.true.neg = length(ice.p) - ice.true.pos - ice.false.pos - ice.false.neg kr.true.pos = sum(kr.p[kr.changes] < alpha) kr.false.pos = sum(kr.p[-kr.changes] < alpha) kr.false.neg = sum(kr.p[kr.changes] >= alpha) kr.true.neg = length(kr.p) - kr.true.pos - kr.false.pos - kr.false.neg scn.true.pos = sum(scn.p[scn.changes] < alpha) scn.false.pos = sum(scn.p[-scn.changes] < alpha) scn.false.neg = sum(scn.p[scn.changes] >= alpha) scn.true.neg = length(scn.p) - scn.true.pos - scn.false.pos - scn.false.neg ma.true.pos = sum(ma.p[changes] < alpha) ma.false.pos = sum(ma.p[-changes] < alpha) ma.false.neg = sum(ma.p[changes] >= alpha) ma.true.neg = length(ma.p) - ma.true.pos - ma.false.pos - ma.false.neg } else{ loess.true.pos = 0 loess.false.pos = sum(loess.p < alpha) loess.false.neg = 0 loess.true.neg = length(loess.p) - loess.true.pos - loess.false.pos - loess.false.neg chromoR.true.pos = 0 chromoR.false.pos = sum(chromoR.p < alpha) chromoR.false.neg = 0 chromoR.true.neg = length(chromoR.p) - chromoR.true.pos - chromoR.false.pos - chromoR.false.neg ice.true.pos = 0 ice.false.pos = sum(ice.p < alpha) ice.false.neg = 0 ice.true.neg = length(ice.p) - ice.true.pos - ice.false.pos - ice.false.neg kr.true.pos = 0 kr.false.pos = sum(kr.p < alpha) kr.false.neg = 0 kr.true.neg = length(kr.p) - kr.true.pos - kr.false.pos - kr.false.neg scn.true.pos = 0 scn.false.pos = sum(scn.p < alpha) scn.false.neg = 0 scn.true.neg = length(scn.p) - scn.true.pos - scn.false.pos - scn.false.neg } # FDR loess.fdr = loess.false.pos / (loess.false.pos + loess.true.pos) chromoR.fdr = chromoR.false.pos / (chromoR.false.pos + chromoR.true.pos) ice.fdr = ice.false.pos / (ice.false.pos + ice.true.pos) kr.fdr = kr.false.pos / (kr.false.pos + kr.true.pos) scn.fdr = scn.false.pos / (scn.false.pos + scn.true.pos) ma.fdr = ma.false.pos / (ma.false.pos + ma.true.pos) # accuracy loess.acc = (loess.true.pos + loess.true.neg) / (length(loess.p)) chromoR.acc = (chromoR.true.pos + chromoR.true.neg) / (length(chromoR.p)) ice.acc = (ice.true.pos + ice.true.neg) / (length(ice.p)) kr.acc = (kr.true.pos + kr.true.neg) / (length(kr.p)) scn.acc = (scn.true.pos + scn.true.neg) / (length(scn.p)) ma.acc = (ma.true.pos + ma.true.neg) / (length(ma.p)) # Precision loess.pre = loess.true.pos / (loess.true.pos + loess.false.pos) chromoR.pre = chromoR.true.pos / (chromoR.true.pos + chromoR.false.pos) kr.pre = kr.true.pos / (kr.true.pos + kr.false.pos) ice.pre = ice.true.pos / (ice.true.pos + ice.false.pos) scn.pre = scn.true.pos / (scn.true.pos + scn.false.pos) ma.pre = ma.true.pos / (ma.true.pos + ma.false.pos) # FPR loess.fpr = loess.false.pos / (loess.false.pos + loess.true.neg) chromoR.fpr = chromoR.false.pos / (chromoR.false.pos + chromoR.true.neg) ice.fpr = ice.false.pos / (ice.false.pos + ice.true.neg) kr.fpr = kr.false.pos / (kr.false.pos + kr.true.neg) scn.fpr = scn.false.pos / (scn.false.pos + scn.true.neg) ma.fpr = ma.false.pos / (ma.false.pos + ma.true.neg) # FNR loess.fnr = loess.false.neg / (loess.true.pos + loess.false.neg) chromoR.fnr = chromoR.false.neg / (chromoR.true.pos + chromoR.false.neg) ice.fnr = ice.false.neg / (ice.true.pos + ice.false.neg) kr.fnr = kr.false.neg / (kr.true.pos + kr.false.neg) scn.fnr = scn.false.neg / (scn.true.pos + scn.false.neg) ma.fnr = ma.false.neg / (ma.true.pos + ma.false.neg) # FOR loess.for = loess.false.neg / (loess.false.neg + loess.true.neg) chromoR.for = chromoR.false.neg / (chromoR.false.neg + chromoR.true.neg) ice.for = ice.false.neg / (ice.false.neg + ice.true.neg) kr.for = kr.false.neg / (kr.false.neg + kr.true.neg) scn.for = scn.false.neg / (scn.false.neg + scn.true.neg) ma.for = ma.false.neg / (ma.false.neg + ma.true.neg) # NPV loess.npv = loess.true.neg / (loess.false.neg + loess.true.neg) chromoR.npv = chromoR.true.neg / (chromoR.false.neg + chromoR.true.neg) ice.npv = ice.true.neg / (ice.false.neg + ice.true.neg) kr.npv = kr.true.neg / (kr.false.neg + kr.true.neg) scn.npv = scn.true.neg / (scn.false.neg + scn.true.neg) ma.npv = ma.true.neg / (ma.false.neg + ma.true.neg) rates = data.frame(loess = c(loess.true.pos, loess.false.pos, loess.true.neg, loess.false.neg), chromoR = c(chromoR.true.pos, chromoR.false.pos, chromoR.true.neg, chromoR.false.neg), ice = c(ice.true.pos, ice.false.pos, ice.true.neg, ice.false.neg), kr = c(kr.true.pos, kr.false.pos, kr.true.neg, kr.false.neg), scn = c(scn.true.pos, scn.false.pos, scn.true.neg, scn.false.neg), ma = c(ma.true.pos, ma.false.pos, ma.true.neg, ma.false.neg)) TPR = apply(rates, 2, function(x) { x[1] / (x[1] + x[4]) }) SPC = apply(rates, 2, function(x) { x[3] / (x[3] + x[2]) }) total = apply(rates, 2, sum) rates = rbind(rates, total, TPR, SPC) F1 = c(loess.F1, chromoR.F1, ice.F1, kr.F1, scn.F1, ma.F1) AUC.row = c(loess.auc, chromoR.auc, ice.auc, kr.auc, scn.auc, ma.auc) FDR = c(loess.fdr, chromoR.fdr, ice.fdr, kr.fdr, scn.fdr, ma.fdr) Acc = c(loess.acc, chromoR.acc, ice.acc, kr.acc, scn.acc, ma.acc) AUC20 = c(auc(loess.roc, partial.auc = c(0.8,1)), auc(chromoR.roc, partial.auc = c(0.8,1)), auc(ice.roc, partial.auc = c(0.8,1)), auc(kr.roc, partial.auc = c(0.8,1)), auc(scn.roc, partial.auc = c(0.8,1)), auc(ma.roc, partial.auc = c(0.8,1))) precision = c(loess.pre, chromoR.pre, ice.pre, kr.pre, scn.pre, ma.pre) FPR = c(loess.fpr, chromoR.fpr, ice.fpr, kr.fpr, scn.fpr, ma.fpr) FNR = c(loess.fnr, chromoR.fnr, ice.fnr, kr.fnr, scn.fnr, ma.fnr) FOR = c(loess.for, chromoR.for, ice.for, kr.for, scn.for, ma.for) NPV = c(loess.npv, chromoR.npv, ice.npv, kr.npv, scn.npv, ma.npv) rates = rbind(rates, F1, AUC.row, AUC20, FDR, Acc, precision, FPR, FNR, FOR, NPV) rates = format(rates, scientific = FALSE) rownames(rates) = c('true positive', 'false positive', 'true negative', 'false negative', 'Total', 'TPR', 'SPC', 'F1', 'AUC', 'AUC 20%' , 'FDR', 'Accuracy', 'Precision', 'FPR', 'FNR', 'FOR', 'NPV') result = list(loess.roc = loess.roc, chromoR.roc = chromoR.roc, ice.roc = ice.roc, kr.roc = kr.roc, scn.roc = scn.roc, ma.roc = ma.roc, rates = rates, loess.F1 = loess.F1, chromoR.F1 = chromoR.F1, ice.F1 = ice.F1, kr.F1 = kr.F1, ma.F1 = ma.F1) return(result) } ``` ```{r} # plot code for barplots make_barplot = function(dat1, dat2, dat3, dat4, metric = 'AUC') { library(reshape2) tab1 = dat1$rates tab2 = dat2$rates tab3 = dat3$rates tab4 = dat4$rates v1 = tab1[metric, ] %>% as.numeric() v2 = tab2[metric, ] %>% as.numeric() v3 = tab3[metric, ] %>% as.numeric() v4 = tab4[metric, ] %>% as.numeric() new = data.frame(method = factor(rep(c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA'), 4), levels = c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA')), value = c(v1, v2, v3, v4), Fold_change = factor(c(rep('1.5', 6), rep('2', 6), rep('4', 6), rep('10', 6)), levels = c('1.5', '2', '4', '10'))) ggplot(new, aes(x = method, y = value, fill = Fold_change)) + geom_bar(stat='identity', position=position_dodge()) + ggtitle(metric) } ``` # Introduction To evaluate the performance of different normalization methods on the detection of chromatin interaction differences, controlled changes were used. Real Hi-C data is used: GM12878, Chromosome 1, MboI vs. DpnII restriction enzymes, 1Mb resolution. The dimensions of the chromatin interaction matrices are 250 x 250. The performance of the joint (`loess` and `MA`) and individual (`ChromoR`, Iterative Correction and Eigenvector decomposition - `ICE`, Knight-Ruiz - `KR`, Sequential Component Normalization - `SCN`) normalization methods at varying fold changes (1.5 by default) and varying number of controlled changes (500 by default) is investigated. The data were globally rescaled to have the same total count of interaction frequencies. Fold changes are applied to one of the datasets by up-regulating the selected IF if the difference between the datasets is positive. If the difference between the datasets at that point is negative the IF is down-regulated by the specified fold change. This method of making changes ensures that the fold change specified is actually realized on the MD plot. ## The effect of fold change ### 1.5 Fold change ```{r, cache=F} chr1.roc1 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc1$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc1$rates), digits = 3)) ``` ### 2.0 Fold change ```{r, cache=F} chr1.roc2 = ROC_on_replicate(chr1.tab, FC = 2.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc2$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc2$rates), digits = 3)) ``` ### 4.0 Fold change ```{r, cache=F} chr1.roc3 = ROC_on_replicate(chr1.tab, FC = 4.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc3$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc3$rates), digits = 3)) ``` ### 10.0 fold change ```{r, cache=F} chr1.roc4 = ROC_on_replicate(chr1.tab, FC = 10.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc4$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc4$rates), digits = 3)) ``` ### Bar plots The bar plots show comparisons of the effect of different fold changes using fixed numbers of controlled changes on different performance metrics. ```{r} make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'AUC') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'TPR') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'SPC') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'F1') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'FDR') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'Accuracy') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'Precision') ``` ```{r} # plot code for barplots make_barplot2 = function(dat1, dat2, dat3, dat4, dat5, metric = 'AUC') { library(reshape2) tab1 = dat1$rates tab2 = dat2$rates tab3 = dat3$rates tab4 = dat4$rates tab5 = dat5$rates v1 = tab1[metric, ] %>% as.numeric() v2 = tab2[metric, ] %>% as.numeric() v3 = tab3[metric, ] %>% as.numeric() v4 = tab4[metric, ] %>% as.numeric() v5 = tab5[metric, ] %>% as.numeric() new = data.frame(method = factor(rep(c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA'), 5), levels = c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA')), value = c(v1, v2, v3, v4, v5), number_changes = factor(c(rep('1', 6), rep('100', 6), rep('200', 6), rep('1000', 6), rep('5000', 6)), levels = c('1', '100', '200', '1000', '5000'))) ggplot(new, aes(x = method, y = value, fill = number_changes)) + geom_bar(stat='identity', position=position_dodge()) + ggtitle(metric) } ``` ### Summary For the real data with changes added, `loess` was once again able to detect the most true changes compared to the other normalization methods. `loess` was clearly superior to the individual methods at the lower fold changes (1.5 and 2) with much higher TPRs compared to the other methods with TPRs below 10%. `loess` also had the lowest number of false negatives compared to the other methods. The gap in detection once again began to close at the higher fold changes between `loess`, `KR`, `SCN`, and `ICE` however `loess` still was slightly better. `ChromoR` once again proved to be the worst normalization method and had the lowest detection rates. `loess` was slightly better than `MA` normalization at the lower fold changes and at higher fold changes the methods were about equal. ## The effect of introduced number of changes A given number of interaction frequencies were increased or decreased to produce a 1.5-fold change. ### 1 change ```{r, cache=F} chr1.roc5 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 1, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc5$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc5$rates), digits = 3)) ``` ### 100 changes ```{r, cache=F} chr1.roc6 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 100, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc6$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc6$rates), digits = 3)) ``` ### 200 changes ```{r, cache=F} chr1.roc7 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 200, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc7$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc7$rates), digits = 3)) ``` ### 1000 changes ```{r, cache=F} chr1.roc8 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 1000, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc8$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc8$rates), digits = 3)) ``` ### 5000 changes ```{r, cache=F} chr1.roc9 = ROC_on_replicate(chr1.tab, FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 5000, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc9$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc9$rates), digits = 3)) ``` ### Bar plots Below are bar plots showing comparisons of the different normalization methods over the varying numbers of changes at a fixed fold change for selected metrics. ```{r} make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'AUC') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'TPR') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'SPC') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'F1') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'FDR') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'Accuracy') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'Precision') ``` ### Summary `loess` was able to detect a single true change added at a 1.5 fold change. `loess` also detected the most changes that were added for all levels changes made compared to the other methods. `loess` again had the lowest numbers of false negatives compared to the other methods. Interestingly, `ChromoR` seemed to detect more differences when a large number of changes were made (1000 and 5000) compared to `KR`, `SCN`, and `ICE`. `loess` also performed slightly better than `MA` normalization at all levels of fold changes.

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4