# get_code_dependency ----2 3
#' Get code dependency of an object4
#'5
#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.6
#'7
#' Given a character vector with code, this function will extract the part of the code responsible for creating8
#' the variables specified by `names`.9
#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,10
#' _i.e._ calls that create variables used in the final call and their parents, etc.11
#' Also included are calls that create side-effects like establishing connections.12
#'13
#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .14
#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.15
#'16
#' Side-effects are not detected automatically and must be marked in the code.17
#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required18
#' to reproduce a variable called `object`.19
#'20
#' @param code `character` with the code.21
#' @param names `character` vector of object names.22
#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.23
#'24
#' @return Character vector, a subset of `code`.25
#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.26
#'27
#' @keywords internal28
get_code_dependency <- function(code, names, check_code_names = TRUE) {29 84x
checkmate::assert_list(code, "character")30 84x
checkmate::assert_character(names, any.missing = FALSE)31 32 84x
graph <- lapply(code, attr, "dependency")33 34 84x
if (check_code_names) {35 83x
symbols <- unlist(lapply(graph, function(call) {36 236x
ind <- match("<-", call, nomatch = length(call) + 1L)37 236x
call[seq_len(ind - 1L)]38
}))39 40 83x
if (!all(names %in% unique(symbols))) {41 8x
warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE)42
}43
}44 45 84x
if (length(code) == 0) {46 1x
return(code)47
}48 49 83x
ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))50 51 83x
lib_ind <- detect_libraries(graph)52 53 83x
code_ids <- sort(unique(c(lib_ind, ind)))54 83x
code[code_ids]55
}56 57
#' Locate function call token58
#'59
#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.60
#'61
#' Useful for determining occurrence of `assign` or `data` functions in an input call.62
#'63
#' @param call_pd `data.frame` as returned by `extract_calls()`64
#' @param text `character(1)` to look for in `text` column of `call_pd`65
#'66
#' @return67
#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.68
#' 0 if not found.69
#'70
#' @keywords internal71
#' @noRd72
find_call <- function(call_pd, text) {73 767x
checkmate::check_data_frame(call_pd)74 767x
checkmate::check_names(call_pd, must.include = c("token", "text"))75 767x
checkmate::check_string(text)76 77 767x
ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)78 767x
if (length(ans)) {79 11x
ans80
} else {81 756x
0L82
}83
}84 85
#' Split the result of `utils::getParseData()` into separate calls86
#'87
#' @param pd (`data.frame`) A result of `utils::getParseData()`.88
#'89
#' @return90
#' A `list` of `data.frame`s.91
#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.92
#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.93
#'94
#' @keywords internal95
#' @noRd96
extract_calls <- function(pd) {97 1003x
calls <- lapply(98 1003x
pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"],99 1003x
function(parent) {100 1136x
rbind(101 1136x
pd[pd$id == parent, ],102 1136x
get_children(pd = pd, parent = parent)103
)104
}105
)106 1003x
calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)107 1003x
calls <- Filter(Negate(is.null), calls)108 1003x
calls <- fix_shifted_comments(calls)109 1003x
calls <- remove_custom_assign(calls, c(":="))110 1003x
fix_arrows(calls)111
}112 113
#' @keywords internal114
#' @noRd115
get_children <- function(pd, parent) {116 11104x
idx_children <- abs(pd$parent) == parent117 11104x
children <- pd[idx_children, ]118 11104x
if (nrow(children) == 0) {119 6306x
return(NULL)120
}121 122 4798x
if (parent > 0) {123 4798x
do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))124
}125
}126 127
#' Fixes edge case of comments being shifted to the next call.128
#' @keywords internal129
#' @noRd130
fix_shifted_comments <- function(calls) {131
# If the first or the second token is a @linksto COMMENT,132
# then it belongs to the previous call.133 1003x
if (length(calls) >= 2) {134 94x
for (i in 2:length(calls)) {135 170x
comment_idx <- grep("@linksto", calls[[i]][, "text"])136 170x
if (isTRUE(comment_idx[1] <= 2)) {137 9x
calls[[i - 1]] <- rbind(138 9x
calls[[i - 1]],139 9x
calls[[i]][comment_idx[1], ]140
)141 9x
calls[[i]] <- calls[[i]][-comment_idx[1], ]142
}143
}144
}145 1003x
Filter(nrow, calls)146
}147 148
#' Fixes edge case of custom assignments operator being treated as assignment.149
#'150
#' @param exclude (`character`) custom assignment operators to be excluded151
#' @keywords internal152
#' @noRd153
remove_custom_assign <- function(calls, exclude = NULL) {154 1003x
checkmate::assert_list(calls)155 1003x
checkmate::assert_character(exclude, null.ok = TRUE)156 1003x
lapply(calls, function(call) {157 1129x
if (!is.null(exclude)) {158 1129x
call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ]159
} else {160 !
call161
}162
})163
}164 165
#' Fixes edge case of `<-` assignment operator being called as function,166
#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.167
#' @keywords internal168
#' @noRd169
fix_arrows <- function(calls) {170 1003x
checkmate::assert_list(calls)171 1003x
lapply(calls, function(call) {172 1129x
sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"173 1129x
call[sym_fun, ] <- sub_arrows(call[sym_fun, ])174 1129x
call175
})176
}177 178
#' Execution of assignment operator substitutions for a call.179
#' @keywords internal180
#' @noRd181
sub_arrows <- function(call) {182 1129x
checkmate::assert_data_frame(call)183 1129x
map <- data.frame(184 1129x
row.names = c("<-", "<<-", "="),185 1129x
token = rep("LEFT_ASSIGN", 3),186 1129x
text = rep("<-", 3)187
)188 1129x
sub_ids <- call$text %in% rownames(map)189 1129x
call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]190 1129x
call191
}192 193
# code_graph ----194 195
#' Extract object occurrence196
#'197
#' Extracts objects occurrence within calls passed by `pd`.198
#' Also detects which objects depend on which within a call.199
#'200
#' @param pd `data.frame`;201
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;202
#' created by `extract_calls()` function203
#'204
#' @return205
#' A character vector listing names of objects that depend on this call206
#' and names of objects that this call depends on.207
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`208
#' depends on objects `b` and `c`.209
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.210
#'211
#' @keywords internal212
#' @noRd213
extract_occurrence <- function(pd) {214 385x
is_in_function <- function(x) {215
# If an object is a function parameter,216
# then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.217 374x
function_id <- x[x$token == "FUNCTION", "parent"]218 374x
if (length(function_id)) {219 20x
x$id %in% get_children(x, function_id[1])$id220
} else {221 354x
rep(FALSE, nrow(x))222
}223
}224 385x
in_parenthesis <- function(x) {225 284x
if (any(x$token %in% c("LBB", "'['"))) {226 7x
id_start <- min(x$id[x$token %in% c("LBB", "'['")])227 7x
id_end <- min(x$id[x$token == "']'"])228 7x
x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]229
}230
}231 232
# Handle data(object)/data("object")/data(object, envir = ) independently.233 385x
data_call <- find_call(pd, "data")234 385x
if (data_call) {235 3x
sym <- pd[data_call + 1, "text"]236 3x
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))237
}238
# Handle assign(x = ).239 382x
assign_call <- find_call(pd, "assign")240 382x
if (assign_call) {241
# Check if parameters were named.242
# "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.243
# "EQ_SUB" is for `=` appearing after the name of the named parameter.244 8x
if (any(pd$token == "SYMBOL_SUB")) {245 4x
params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]246
# Remove sequence of "=", ",".247 4x
if (length(params > 1)) {248 4x
remove <- integer(0)249 4x
for (i in 2:length(params)) {250 20x
if (params[i - 1] == "=" && params[i] == ",") {251 4x
remove <- c(remove, i - 1, i)252
}253
}254 3x
if (length(remove)) params <- params[-remove]255
}256 4x
pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))257 4x
if (!pos) {258 !
return(character(0L))259
}260
# pos is indicator of the place of 'x'261
# 1. All parameters are named, but none is 'x' - return(character(0L))262
# 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))263
# - check "x" in params being just a vector of named parameters.264
# 3. Some parameters are named, 'x' is not in named parameters265
# - check first appearance of "," (unnamed parameter) in vector parameters.266
} else {267
# Object is the first entry after 'assign'.268 4x
pos <- 1269
}270 8x
sym <- pd[assign_call + pos, "text"]271 8x
return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))272
}273 274
# What occurs in a function body is not tracked.275 374x
x <- pd[!is_in_function(pd), ]276 374x
sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))277 374x
sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")278 279 374x
if (length(sym_cond) == 0) {280 17x
return(character(0L))281
}282
# Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.283
# For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.284 357x
dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]285 357x
if (length(dollar_ids)) {286 6x
object_ids <- x[sym_cond, "id"]287 6x
after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]288 6x
sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))289
}290 291 357x
assign_cond <- grep("ASSIGN", x$token)292 357x
if (!length(assign_cond)) {293 73x
return(c("<-", unique(x[sym_cond, "text"])))294
}295 296
# For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.297 284x
sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]298 299
# If there was an assignment operation detect direction of it.300 284x
if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.301 1x
sym_cond <- rev(sym_cond)302
}303 304 284x
after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1305 284x
ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))306 284x
roll <- in_parenthesis(pd)307 284x
if (length(roll)) {308 3x
c(setdiff(ans, roll), roll)309
} else {310 281x
ans311
}312
}313 314
#' Extract side effects315
#'316
#' Extracts all object names from the code that are marked with `@linksto` tag.317
#'318
#' The code may contain functions calls that create side effects, e.g. modify the environment.319
#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.320
#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.321
#' With this tag a complete object dependency structure can be established.322
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.323
#'324
#' @param pd `data.frame`;325
#' one of the results of `utils::getParseData()` split into subsets representing individual calls;326
#' created by `extract_calls()` function327
#'328
#' @return329
#' A character vector of names of objects330
#' depending a call tagged with `@linksto` in a corresponding element of `pd`.331
#'332
#' @keywords internal333
#' @noRd334
extract_side_effects <- function(pd) {335 372x
linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)336 372x
unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+"))337
}338 339
#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)340
#' @keywords internal341
#' @noRd342
extract_dependency <- function(parsed_code) {343 408x
full_pd <- normalize_pd(utils::getParseData(parsed_code))344 408x
reordered_full_pd <- extract_calls(full_pd)345 346
# Early return on empty code347 408x
if (length(reordered_full_pd) == 0L) {348 36x
return(NULL)349
}350 351 372x
if (length(parsed_code) == 0L) {352 1x
return(extract_side_effects(reordered_full_pd[[1]]))353
}354 371x
expr_ix <- lapply(parsed_code[[1]], class) == "{"355 356
# Build queue of expressions to parse individually357 371x
queue <- list()358 371x
parsed_code_list <- if (all(!expr_ix)) {359 363x
list(parsed_code)360
} else {361 8x
queue <- as.list(parsed_code[[1]][expr_ix])362 8x
new_list <- parsed_code[[1]]363 8x
new_list[expr_ix] <- NULL364 8x
list(parse(text = as.expression(new_list), keep.source = TRUE))365
}366 367 371x
while (length(queue) > 0) {368 22x
current <- queue[[1]]369 22x
queue <- queue[-1]370 22x
if (identical(current[[1L]], as.name("{"))) {371 8x
queue <- append(queue, as.list(current)[-1L])372
} else {373 14x
parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE)374
}375
}376 377 371x
parsed_occurences <- lapply(378 371x
parsed_code_list,379 371x
function(parsed_code) {380 385x
pd <- normalize_pd(utils::getParseData(parsed_code))381 385x
reordered_pd <- extract_calls(pd)382 385x
if (length(reordered_pd) > 0) {383
# extract_calls is needed to reorder the pd so that assignment operator comes before symbol names384
# extract_calls is needed also to substitute assignment operators into specific format with fix_arrows385
# extract_calls is needed to omit empty calls that contain only one token `"';'"`386
# This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different387
# than in original pd388 385x
extract_occurrence(reordered_pd[[1]])389
}390
}391
)392 393
# Merge results together394 371x
result <- Reduce(395 371x
function(u, v) {396 385x
ix <- if ("<-" %in% v) min(which(v == "<-")) else 0397 385x
u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))])398 385x
u$right_side <- c(399 385x
u$right_side,400 385x
if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))]401
)402 385x
u403
},404 371x
init = list(left_side = character(0L), right_side = character(0L)),405 371x
x = parsed_occurences406
)407 408 371x
c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side)409
}410 411
# graph_parser ----412 413
#' Return the indices of calls needed to reproduce an object414
#'415
#' @param x The name of the object to return code for.416
#' @param graph A result of `code_graph()`.417
#'418
#' @return419
#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.420
#'421
#' @keywords internal422
#' @noRd423
graph_parser <- function(x, graph) {424
# x occurrences (lhs)425 349x
occurrence <- vapply(426 349x
graph, function(call) {427 593x
ind <- match("<-", call, nomatch = length(call) + 1L)428 593x
x %in% call[seq_len(ind - 1L)]429
},430 349x
logical(1)431
)432 433
# x-dependent objects (rhs)434 349x
dependencies <- lapply(graph[occurrence], function(call) {435 155x
ind <- match("<-", call, nomatch = 0L)436 155x
call[(ind + 1L):length(call)]437
})438 349x
dependencies <- setdiff(unlist(dependencies), x)439 440 349x
dependency_occurrences <- lapply(dependencies, function(dependency) {441
# track down dependencies and where they occur on the lhs in previous calls442 251x
last_x_occurrence <- max(which(occurrence))443 251x
reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1)444 251x
c(graph_parser(dependency, reduced_graph), last_x_occurrence)445
})446 447 349x
sort(unique(c(which(occurrence), unlist(dependency_occurrences))))448
}449 450 451
# default_side_effects --------------------------------------------------------------------------------------------452 453
#' Detect library calls454
#'455
#' Detects `library()` and `require()` function calls.456
#'457
#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`458
#'459
#' @return460
#' Integer vector of indices that can be applied to `graph` to obtain all calls containing461
#' `library()` or `require()` calls that are always returned for reproducibility.462
#'463
#' @keywords internal464
#' @noRd465
detect_libraries <- function(graph) {466 83x
defaults <- c("library", "require")467 468 83x
which(469 83x
unlist(470 83x
lapply(471 83x
graph, function(x) {472 237x
any(grepl(pattern = paste(defaults, collapse = "|"), x = x))473
}474
)475
)476
)477
}478 479 480
# utils -----------------------------------------------------------------------------------------------------------481 482 483
#' Normalize parsed data removing backticks from symbols484
#'485
#' @param pd `data.frame` resulting from `utils::getParseData()` call.486
#'487
#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.488
#'489
#' @keywords internal490
#' @noRd491
normalize_pd <- function(pd) {492
# Remove backticks from SYMBOL tokens493 1003x
symbol_index <- grepl("^SYMBOL.*$", pd$token)494 1003x
pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])495 496 1003x
pd497
}498 499 500
# split_code ------------------------------------------------------------------------------------------------------501 502 503
#' Get line/column in the source where the calls end504
#'505
#'506
#' @param code `character(1)`507
#'508
#' @return `matrix` with `colnames = c("line", "col")`509
#'510
#' @keywords internal511
#' @noRd512
get_call_breaks <- function(code) {513 210x
parsed_code <- parse(text = code, keep.source = TRUE)514 210x
pd <- utils::getParseData(parsed_code)515 210x
pd <- normalize_pd(pd)516 210x
pd <- pd[pd$token != "';'", ]517 210x
call_breaks <- t(sapply(518 210x
extract_calls(pd),519 210x
function(x) {520 372x
matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)])))521
}522
))523 210x
call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only524 210x
if (nrow(call_breaks) == 0L) {525 122x
call_breaks <- matrix(numeric(0), ncol = 2)526
}527 210x
colnames(call_breaks) <- c("line", "col")528 210x
call_breaks529
}530 531
#' Split code by calls532
#'533
#' @param code `character` with the code.534
#'535
#' @return list of `character`s of the length equal to the number of calls in `code`.536
#'537
#' @keywords internal538
#' @noRd539
split_code <- function(code) {540 210x
call_breaks <- get_call_breaks(code)541 210x
if (nrow(call_breaks) == 0) {542 122x
return(code)543
}544 88x
call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE]545 88x
code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]]546 88x
char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)]547 548 88x
idx_start <- c(549 88x
0, # first call starts in the beginning of src550 88x
char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1551
)552 88x
idx_end <- c(553 88x
char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"],554 88x
nchar(code) # last call end in the end of src555
)556 88x
new_code <- substring(code, idx_start, idx_end)557 558
# line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line559
# we need to move remove leading and add \n instead when combining calls560 88x
c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))561
}
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4