RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from https://github.com/dozmorovlab/HiCcompare/raw/supplemental/supplemental_files/S5_File.Rmd below:

--- title: Extended evaluation of differential chromatin interaction detection analysis using simulated Hi-C data author: "John Stansfield, Mikhail Dozmorov" output: pdf_document: toc: yes html_document: toc: yes always_allow_html: yes --- ```{r setup, echo=FALSE, message=FALSE, warning=FALSE} # Set up the environment library(knitr) opts_chunk$set(cache.path='cache/', fig.path='img/', cache=F, tidy=T, fig.keep='high', echo=F, dpi=100, warnings=F, message=F, comment=NA, warning=F, results='as.is', fig.width = 10, fig.height = 3) #out.width=700, library(pander) panderOptions('table.split.table', Inf) set.seed(1) library(dplyr) options(stringsAsFactors = FALSE) ``` ```{r libraries} library(chromoR) library(pROC) library(MLmetrics) library(HiTC) library(Matrix) library(GenomicRanges) library(HiCcompare) library(ggplot2) library(gridExtra) library(data.table) ``` ```{r, warning=FALSE, message=FALSE} ROC_on_replicate = function(FC, plot.roc = TRUE, plot.MD = T, numChanges = 500, alpha = 0.05, nrow = 250, diff.thresh = NA) { if (numChanges < 1) { stop('numChanges must be > 0') } # generate data sim = hic_simulate(nrow=nrow, i.range = 1, j.range =1 , Plot = FALSE, scale = FALSE, diff.thresh = diff.thresh) hic.table = sim$hic.table # run hic_loess and get what shows up as a difference # add in true differences sample.space = 1:nrow(hic.table) if ( numChanges > 0) { changes = sample(sample.space, numChanges) whichIF = ifelse(hic.table[changes, ]$M < 0, -1, 1) newIF1 = FC^whichIF * hic.table[changes,]$IF1 newIF1 = (round(newIF1)) hic.table[changes,]$IF1 = newIF1 hic.table = hic.table[, M := log2(IF2/IF1)] hic.table = hic.table[, 1:10, with=F] } ### normalize using other methods # convert hic.table to full matrices for other methods to work mat1 = sparse2full(hic.table[, c('start1', 'start2', 'IF1'), with=F]) mat2 = sparse2full(hic.table[, c('start1', 'start2', 'IF2'), with=F]) # chromoR start = as.numeric(colnames(mat1)) end = as.numeric(colnames(mat1)) seg = data.frame(chr = hic.table$chr1[1], start = start, end = end) # correct simulated matrices using chromoR's methods sim1.chromor = correctCIM(mat1, seg) sim2.chromor = correctCIM(mat2, seg) colnames(sim1.chromor$mCorrected) = seg$start colnames(sim2.chromor$mCorrected) = seg$start sim1.chromor = full2sparse(sim1.chromor$mCorrected) sim2.chromor = full2sparse(sim2.chromor$mCorrected) chromoR.table = create.hic.table(sim1.chromor, sim2.chromor, scale = TRUE, chr = hic.table$chr1[1]) # ICE sim1.ice = Matrix(mat1) sim2.ice = Matrix(mat2) # create HTCexp object for simulated data xgi = GRanges(seqnames = 'chr1', ranges = IRanges(start = start, end = end, names = paste('a', 1:length(start), sep=''))) ygi = GRanges(seqnames = 'chr1', ranges = IRanges(start = start, end = end, names = paste('b', 1:length(start), sep=''))) colnames(sim1.ice) <- id(xgi) rownames(sim1.ice) <- id(ygi) sim1.ice = new('HTCexp', sim1.ice, xgi, ygi) colnames(sim2.ice) <- id(xgi) rownames(sim2.ice) <- id(ygi) sim2.ice = new('HTCexp', sim2.ice, xgi, ygi) # normalize with ICE sim1.ice = normICE(sim1.ice, max_iter = 100) sim2.ice = normICE(sim2.ice, max_iter = 100) # convert to hic.table colnames(sim1.ice@intdata) = start colnames(sim2.ice@intdata) = start sim1.ice = full2sparse(as.matrix(sim1.ice@intdata)) sim2.ice = full2sparse(as.matrix(sim2.ice@intdata)) ice.table = create.hic.table(sim1.ice, sim2.ice, scale = TRUE, chr = hic.table$chr1[1]) # KR # remove any columns of 0's zeros1 = which(colSums(mat1) == 0) zeros2 = which(colSums(mat2) == 0) if (length(zeros1) > 0) { cr.mat1 = mat1[-zeros1, -zeros1] } else { cr.mat1 = mat1 } if (length(zeros2) > 0) { cr.mat2 = mat2[-zeros2, -zeros2] } else { cr.mat2 = mat2 } sim1.kr = KRnorm(cr.mat1) sim2.kr = KRnorm(cr.mat2) colnames(sim1.kr) = colnames(cr.mat1) colnames(sim2.kr) = colnames(cr.mat2) sim1.kr = full2sparse(sim1.kr) sim2.kr = full2sparse(sim2.kr) kr.table = create.hic.table(sim1.kr, sim2.kr, scale = TRUE, chr = hic.table$chr1[1]) # SCN sim1.scn = SCN(mat1) sim2.scn = SCN(mat2) sim1.scn = full2sparse(sim1.scn) sim2.scn = full2sparse(sim2.scn) scn.table = create.hic.table(sim1.scn, sim2.scn, chr=hic.table$chr1[1], scale=TRUE) # MA ma.table = create.hic.table(full2sparse(mat1), full2sparse(mat2), chr = hic.table$chr1[1], scale = TRUE) ma.table = MA_norm(ma.table) # detect differences chromoR.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] ice.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] kr.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] scn.table[, ':=' (adj.IF1 = IF1, adj.IF2 = IF2, adj.M = M)] loess.result = hic_loess(hic.table, Plot = plot.MD, check.differences = TRUE, diff.thresh = diff.thresh) chromoR.result = hic_diff(chromoR.table, Plot = plot.MD, diff.thresh = diff.thresh) ice.result = hic_diff(ice.table, Plot = plot.MD, diff.thresh = diff.thresh) kr.result = hic_diff(kr.table, Plot = plot.MD, diff.thresh = diff.thresh) scn.result = hic_diff(scn.table, Plot = plot.MD, diff.thresh = diff.thresh) ma.result = hic_diff(ma.table, Plot = plot.MD, diff.thresh = diff.thresh) # make pval and truth vectors loess.p = loess.result$p.value chromoR.p = chromoR.result$p.value ice.p = ice.result$p.value kr.p = kr.result$p.value scn.p = scn.result$p.value ma.p = ma.result$p.value # make truth vector - indicator for if value had fold change applied to it # normalization changes dimensions of sparse matrices so need to loop through changes to make truth vector chromoR.changes = vector() ice.changes = vector() kr.changes = vector() scn.changes = vector() if ( numChanges > 0) { for (i in 1:length(changes)) { reg1 = hic.table$start1[changes[i]] reg2 = hic.table$start2[changes[i]] chromoR.changes[i] = ifelse(!length(which(chromoR.result$start1 == reg1 & chromoR.result$start2 == reg2)), NA, which(chromoR.result$start1 == reg1 & chromoR.result$start2 == reg2)) ice.changes[i] = ifelse(!length(which(ice.result$start1 == reg1 & ice.result$start2 == reg2)), NA, which(ice.result$start1 == reg1 & ice.result$start2 == reg2)) kr.changes[i] = ifelse(!length(which(kr.result$start1 == reg1 & kr.result$start2 == reg2)), NA, which(kr.result$start1 == reg1 & kr.result$start2 == reg2)) scn.changes[i] = ifelse(!length(which(scn.result$start1 == reg1 & scn.result$start2 == reg2)), NA, which(scn.result$start1 == reg1 & scn.result$start2 == reg2)) } chromoR.changes = as.vector(na.omit(chromoR.changes)) ice.changes = as.vector(na.omit(ice.changes)) kr.changes = as.vector(na.omit(kr.changes)) scn.changes = as.vector(na.omit(scn.changes)) # make truth vectors loess.truth = rep(0, nrow(loess.result)) loess.truth[changes] = 1 chromoR.truth = rep(0, nrow(chromoR.result)) chromoR.truth[chromoR.changes] = 1 ice.truth = rep(0, nrow(ice.result)) ice.truth[ice.changes] = 1 kr.truth = rep(0, nrow(kr.result)) kr.truth[kr.changes] = 1 scn.truth = rep(0, nrow(scn.result)) scn.truth[scn.changes] = 1 ma.truth = loess.truth } else { loess.truth = rep(0, nrow(loess.result)) chromoR.truth = rep(0, nrow(chromoR.result)) ice.truth = rep(0, nrow(ice.result)) kr.truth = rep(0, nrow(kr.result)) scn.truth = rep(0, nrow(scn.result)) } # roc loess.roc = roc(response = loess.truth, predictor = loess.p) chromoR.roc = roc(response = chromoR.truth, predictor = chromoR.p) ice.roc = roc(response = ice.truth, predictor = ice.p) kr.roc = roc(response = kr.truth, predictor = kr.p) scn.roc = roc(response = scn.truth, predictor = scn.p) ma.roc = roc(response = ma.truth, predictor = ma.p) # F1 loess.F1 = F1_Score(loess.truth, ifelse(loess.p < alpha, 1, 0)) chromoR.F1 = F1_Score(chromoR.truth, ifelse(chromoR.p < alpha, 1, 0)) ice.F1 = F1_Score(ice.truth, ifelse(ice.p < alpha, 1, 0)) kr.F1 = F1_Score(kr.truth, ifelse(kr.p < alpha, 1, 0)) scn.F1 = F1_Score(scn.truth, ifelse(scn.p < alpha, 1, 0)) ma.F1 = F1_Score(ma.truth, ifelse(ma.p < alpha, 1, 0)) # AUC loess.auc = auc(loess.roc) chromoR.auc = auc(chromoR.roc) ice.auc = auc(ice.roc) kr.auc = auc(kr.roc) scn.auc = auc(scn.roc) ma.auc = auc(ma.roc) if (plot.roc) { plot_colors = c('black', 'blue', 'red', 'green', 'purple') plot(loess.roc, col = plot_colors[1], main = paste('Fold Change = ', FC, sep='')) plot(chromoR.roc, col = plot_colors[2], add=T) plot(ice.roc, col = plot_colors[3], add=T) plot(kr.roc, col = plot_colors[4], add=T) plot(scn.roc, col = plot_colors[4], add=T) legend('bottomright', inset = 0, legend = c('loess', 'chromoR', 'ICE', 'KR', 'SCN'), title = 'Normalization Method', fill = plot_colors, horiz = F) #, cex=1.5) } if (numChanges > 0) { loess.true.pos = sum(loess.p[changes] < alpha) loess.false.pos = sum(loess.p[-changes] < alpha) loess.false.neg = sum(loess.p[changes] >= alpha) loess.true.neg = length(loess.p) - loess.true.pos - loess.false.pos - loess.false.neg chromoR.true.pos = sum(chromoR.p[chromoR.changes] < alpha) chromoR.false.pos = sum(chromoR.p[-chromoR.changes] < alpha) chromoR.false.neg = sum(chromoR.p[chromoR.changes] >= alpha) chromoR.true.neg = length(chromoR.p) - chromoR.true.pos - chromoR.false.pos - chromoR.false.neg ice.true.pos = sum(ice.p[ice.changes] < alpha) ice.false.pos = sum(ice.p[-ice.changes] < alpha) ice.false.neg = sum(ice.p[ice.changes] >= alpha) ice.true.neg = length(ice.p) - ice.true.pos - ice.false.pos - ice.false.neg kr.true.pos = sum(kr.p[kr.changes] < alpha) kr.false.pos = sum(kr.p[-kr.changes] < alpha) kr.false.neg = sum(kr.p[kr.changes] >= alpha) kr.true.neg = length(kr.p) - kr.true.pos - kr.false.pos - kr.false.neg scn.true.pos = sum(scn.p[scn.changes] < alpha) scn.false.pos = sum(scn.p[-scn.changes] < alpha) scn.false.neg = sum(scn.p[scn.changes] >= alpha) scn.true.neg = length(scn.p) - scn.true.pos - scn.false.pos - scn.false.neg ma.true.pos = sum(ma.p[changes] < alpha) ma.false.pos = sum(ma.p[-changes] < alpha) ma.false.neg = sum(ma.p[changes] >= alpha) ma.true.neg = length(ma.p) - ma.true.pos - ma.false.pos - ma.false.neg } else{ loess.true.pos = 0 loess.false.pos = sum(loess.p < alpha) loess.false.neg = 0 loess.true.neg = length(loess.p) - loess.true.pos - loess.false.pos - loess.false.neg chromoR.true.pos = 0 chromoR.false.pos = sum(chromoR.p < alpha) chromoR.false.neg = 0 chromoR.true.neg = length(chromoR.p) - chromoR.true.pos - chromoR.false.pos - chromoR.false.neg ice.true.pos = 0 ice.false.pos = sum(ice.p < alpha) ice.false.neg = 0 ice.true.neg = length(ice.p) - ice.true.pos - ice.false.pos - ice.false.neg kr.true.pos = 0 kr.false.pos = sum(kr.p < alpha) kr.false.neg = 0 kr.true.neg = length(kr.p) - kr.true.pos - kr.false.pos - kr.false.neg scn.true.pos = 0 scn.false.pos = sum(scn.p < alpha) scn.false.neg = 0 scn.true.neg = length(scn.p) - scn.true.pos - scn.false.pos - scn.false.neg } # FDR loess.fdr = loess.false.pos / (loess.false.pos + loess.true.pos) chromoR.fdr = chromoR.false.pos / (chromoR.false.pos + chromoR.true.pos) ice.fdr = ice.false.pos / (ice.false.pos + ice.true.pos) kr.fdr = kr.false.pos / (kr.false.pos + kr.true.pos) scn.fdr = scn.false.pos / (scn.false.pos + scn.true.pos) ma.fdr = ma.false.pos / (ma.false.pos + ma.true.pos) # accuracy loess.acc = (loess.true.pos + loess.true.neg) / (length(loess.p)) chromoR.acc = (chromoR.true.pos + chromoR.true.neg) / (length(chromoR.p)) ice.acc = (ice.true.pos + ice.true.neg) / (length(ice.p)) kr.acc = (kr.true.pos + kr.true.neg) / (length(kr.p)) scn.acc = (scn.true.pos + scn.true.neg) / (length(scn.p)) ma.acc = (ma.true.pos + ma.true.neg) / (length(ma.p)) # Precision loess.pre = loess.true.pos / (loess.true.pos + loess.false.pos) chromoR.pre = chromoR.true.pos / (chromoR.true.pos + chromoR.false.pos) kr.pre = kr.true.pos / (kr.true.pos + kr.false.pos) ice.pre = ice.true.pos / (ice.true.pos + ice.false.pos) scn.pre = scn.true.pos / (scn.true.pos + scn.false.pos) ma.pre = ma.true.pos / (ma.true.pos + ma.false.pos) # FPR loess.fpr = loess.false.pos / (loess.false.pos + loess.true.neg) chromoR.fpr = chromoR.false.pos / (chromoR.false.pos + chromoR.true.neg) ice.fpr = ice.false.pos / (ice.false.pos + ice.true.neg) kr.fpr = kr.false.pos / (kr.false.pos + kr.true.neg) scn.fpr = scn.false.pos / (scn.false.pos + scn.true.neg) ma.fpr = ma.false.pos / (ma.false.pos + ma.true.neg) # FNR loess.fnr = loess.false.neg / (loess.true.pos + loess.false.neg) chromoR.fnr = chromoR.false.neg / (chromoR.true.pos + chromoR.false.neg) ice.fnr = ice.false.neg / (ice.true.pos + ice.false.neg) kr.fnr = kr.false.neg / (kr.true.pos + kr.false.neg) scn.fnr = scn.false.neg / (scn.true.pos + scn.false.neg) ma.fnr = ma.false.neg / (ma.true.pos + ma.false.neg) # FOR loess.for = loess.false.neg / (loess.false.neg + loess.true.neg) chromoR.for = chromoR.false.neg / (chromoR.false.neg + chromoR.true.neg) ice.for = ice.false.neg / (ice.false.neg + ice.true.neg) kr.for = kr.false.neg / (kr.false.neg + kr.true.neg) scn.for = scn.false.neg / (scn.false.neg + scn.true.neg) ma.for = ma.false.neg / (ma.false.neg + ma.true.neg) # NPV loess.npv = loess.true.neg / (loess.false.neg + loess.true.neg) chromoR.npv = chromoR.true.neg / (chromoR.false.neg + chromoR.true.neg) ice.npv = ice.true.neg / (ice.false.neg + ice.true.neg) kr.npv = kr.true.neg / (kr.false.neg + kr.true.neg) scn.npv = scn.true.neg / (scn.false.neg + scn.true.neg) ma.npv = ma.true.neg / (ma.false.neg + ma.true.neg) rates = data.frame(loess = c(loess.true.pos, loess.false.pos, loess.true.neg, loess.false.neg), chromoR = c(chromoR.true.pos, chromoR.false.pos, chromoR.true.neg, chromoR.false.neg), ice = c(ice.true.pos, ice.false.pos, ice.true.neg, ice.false.neg), kr = c(kr.true.pos, kr.false.pos, kr.true.neg, kr.false.neg), scn = c(scn.true.pos, scn.false.pos, scn.true.neg, scn.false.neg), ma = c(ma.true.pos, ma.false.pos, ma.true.neg, ma.false.neg)) TPR = apply(rates, 2, function(x) { x[1] / (x[1] + x[4]) }) SPC = apply(rates, 2, function(x) { x[3] / (x[3] + x[2]) }) total = apply(rates, 2, sum) rates = rbind(rates, total, TPR, SPC) F1 = c(loess.F1, chromoR.F1, ice.F1, kr.F1, scn.F1, ma.F1) AUC.row = c(loess.auc, chromoR.auc, ice.auc, kr.auc, scn.auc, ma.auc) FDR = c(loess.fdr, chromoR.fdr, ice.fdr, kr.fdr, scn.fdr, ma.fdr) Acc = c(loess.acc, chromoR.acc, ice.acc, kr.acc, scn.acc, ma.acc) AUC20 = c(auc(loess.roc, partial.auc = c(0.8,1)), auc(chromoR.roc, partial.auc = c(0.8,1)), auc(ice.roc, partial.auc = c(0.8,1)), auc(kr.roc, partial.auc = c(0.8,1)), auc(scn.roc, partial.auc = c(0.8,1)), auc(ma.roc, partial.auc = c(0.8,1))) precision = c(loess.pre, chromoR.pre, ice.pre, kr.pre, scn.pre, ma.pre) FPR = c(loess.fpr, chromoR.fpr, ice.fpr, kr.fpr, scn.fpr, ma.fpr) FNR = c(loess.fnr, chromoR.fnr, ice.fnr, kr.fnr, scn.fnr, ma.fnr) FOR = c(loess.for, chromoR.for, ice.for, kr.for, scn.for, ma.for) NPV = c(loess.npv, chromoR.npv, ice.npv, kr.npv, scn.npv, ma.npv) rates = rbind(rates, F1, AUC.row, AUC20, FDR, Acc, precision, FPR, FNR, FOR, NPV) rates = format(rates, scientific = FALSE) rownames(rates) = c('true positive', 'false positive', 'true negative', 'false negative', 'Total', 'TPR', 'SPC', 'F1', 'AUC', 'AUC 20%' , 'FDR', 'Accuracy', 'Precision', 'FPR', 'FNR', 'FOR', 'NPV') result = list(loess.roc = loess.roc, chromoR.roc = chromoR.roc, ice.roc = ice.roc, kr.roc = kr.roc, scn.roc = scn.roc, ma.roc = ma.roc, rates = rates, loess.F1 = loess.F1, chromoR.F1 = chromoR.F1, ice.F1 = ice.F1, kr.F1 = kr.F1, ma.F1 = ma.F1) return(result) } ``` # Introduction To evaluate the performance of different normalization methods on the detection of chromatin interaction differences, controlled changes were used. To better control for existing differences in the real Hi-C data, simulated Hi-C datasets were used. The data was simulated using the `hic_simulate` function. For the simulated matrices, the default values were used (see `?hic_simulate`) except for changing the size to be a 250 x 250 matrix. The effect of varying fold changes (1.5 by default) and varying number of controlled changes (500 by default) is investigated. Fold changes are applied to one of the datasets by up-regulating the selected IF if the difference between the datasets is positive. If the difference between the datasets at that point is negative the IF is down-regulated by the specified fold change. This method of making changes ensures that the fold change specified is actually realized on the MD plot. ## The effect of fold change ### 1.5 Fold change ```{r, cache=F} chr1.roc1 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc1$rates), digits = 3), options = list(pageLength = 20)) # kable(signif(data.matrix(chr1.roc1$rates), digits = 3), format = 'latex') pander(signif(data.matrix(chr1.roc1$rates), digits = 3)) ``` ### 2.0 Fold change ```{r, cache=F} chr1.roc2 = ROC_on_replicate(FC = 2.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc2$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc2$rates), digits = 3)) ``` ### 4.0 Fold change ```{r, cache=F} chr1.roc3 = ROC_on_replicate(FC = 4.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc3$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc3$rates), digits = 3)) ``` ### 10.0 fold change ```{r, cache=F} chr1.roc4 = ROC_on_replicate(FC = 10.0, plot.roc = F, plot.MD = F, numChanges = 500, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc4$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc4$rates), digits = 3)) ``` ### Bar plots The bar plots show comparisons of the effect of different fold changes using fixed numbers of controlled changes on different performance metrics. ```{r} # plot code for barplots make_barplot = function(dat1, dat2, dat3, dat4, metric = 'AUC') { library(reshape2) tab1 = dat1$rates tab2 = dat2$rates tab3 = dat3$rates tab4 = dat4$rates v1 = tab1[metric, ] %>% as.numeric() v2 = tab2[metric, ] %>% as.numeric() v3 = tab3[metric, ] %>% as.numeric() v4 = tab4[metric, ] %>% as.numeric() new = data.frame(method = factor(rep(c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA'), 4), levels = c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA')), value = c(v1, v2, v3, v4), Fold_change = factor(c(rep('1.5', 6), rep('2', 6), rep('4', 6), rep('10', 6)), levels = c('1.5', '2', '4', '10'))) ggplot(new, aes(x = method, y = value, fill = Fold_change)) + geom_bar(stat='identity', position=position_dodge()) + ggtitle(metric) } ``` ```{r} make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'AUC') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'TPR') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'SPC') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'F1') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'FDR') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'Accuracy') make_barplot(chr1.roc1, chr1.roc2, chr1.roc3, chr1.roc4, metric = 'Precision') ``` ### Summary The tables show that the most true differences are detected after joint `loess` normalization compared to the other normalization techniques. `loess` also has the lowest number of false positives among the normalization techniques. With `loess`, differences at smaller fold changes (1.5 and 2) are able to be detected more reliably compared to the other methods and this superiority continues to the higher fold changes though the individual normalization methods tend to make up some ground.`MA` normalization performed almost as well as `loess` at lower fold changes and was about equivalent at the higher fold changes. `ChromoR` performed the worst of the normalization techniques tested while `KR` and `SCN` tended to perform better. ## The effect of introduced number of changes A specified number of interaction frequencies were increased or decreased to produce a 1.5-fold change. ### 1 change ```{r, cache=F} chr1.roc5 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 1, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc5$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc5$rates), digits = 3)) ``` ### 100 changes ```{r, cache=F} chr1.roc6 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 100, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc6$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc6$rates), digits = 3)) ``` ### 200 changes ```{r, cache=F} chr1.roc7 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 200, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc7$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc7$rates), digits = 3)) ``` ### 1000 changes ```{r, cache=F} chr1.roc8 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 1000, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc8$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc8$rates), digits = 3)) ``` ### 5000 changes ```{r, cache=F} chr1.roc9 = ROC_on_replicate(FC = 1.5, plot.roc = F, plot.MD = F, numChanges = 5000, alpha = 0.05) # DT::datatable(signif(data.matrix(chr1.roc9$rates), digits = 3), options = list(pageLength = 20)) pander(signif(data.matrix(chr1.roc9$rates), digits = 3)) ``` ```{r} # plot code for barplots make_barplot2 = function(dat1, dat2, dat3, dat4, dat5, metric = 'AUC') { library(reshape2) tab1 = dat1$rates tab2 = dat2$rates tab3 = dat3$rates tab4 = dat4$rates tab5 = dat5$rates v1 = tab1[metric, ] %>% as.numeric() v2 = tab2[metric, ] %>% as.numeric() v3 = tab3[metric, ] %>% as.numeric() v4 = tab4[metric, ] %>% as.numeric() v5 = tab5[metric, ] %>% as.numeric() new = data.frame(method = factor(rep(c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA'), 5), levels = c('loess', 'chromoR', 'ICE', 'KR', 'SCN', 'MA')), value = c(v1, v2, v3, v4, v5), number_changes = factor(c(rep('1', 6), rep('100', 6), rep('200', 6), rep('1000', 6), rep('5000', 6)), levels = c('1', '100', '200', '1000', '5000'))) ggplot(new, aes(x = method, y = value, fill = number_changes)) + geom_bar(stat='identity', position=position_dodge()) + ggtitle(metric) } ``` ### Bar plots Below are bar plots showing comparisons of the different normalization methods over the varying numbers of changes at a fixed fold change for selected metrics. ```{r} make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'AUC') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'TPR') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'SPC') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'F1') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'FDR') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'Accuracy') make_barplot2(chr1.roc5, chr1.roc6, chr1.roc7, chr1.roc8, chr1.roc9, metric = 'Precision') ``` ### Summary When only a single true difference was introduced at a 1.5 fold change all methods failed to detect it. At all other numbers of introduced differences checked, `loess` was able to detect the most while also maintaining the lowest number of false positives. `KR` and `SCN` again performed better than `ICE` and `ChromoR` at detecting the true differences.

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4