A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://insightsengineering.github.io/teal.code/main/coverage-report/ below:

1
# get_code_dependency ----
2
 3 
#' Get code dependency of an object
4
#'
5
#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.
6
#'
7
#' Given a character vector with code, this function will extract the part of the code responsible for creating
8
#' the variables specified by `names`.
9
#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,
10
#' _i.e._ calls that create variables used in the final call and their parents, etc.
11
#' Also included are calls that create side-effects like establishing connections.
12
#'
13
#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .
14
#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.
15
#'
16
#' Side-effects are not detected automatically and must be marked in the code.
17
#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required
18
#' to reproduce a variable called `object`.
19
#'
20
#' @param code `character` with the code.
21
#' @param names `character` vector of object names.
22
#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.
23
#'
24
#' @return Character vector, a subset of `code`.
25
#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.
26
#'
27
#' @keywords internal
28
get_code_dependency <- function(code, names, check_code_names = TRUE) {
29 84x
  checkmate::assert_list(code, "character")
30 84x
  checkmate::assert_character(names, any.missing = FALSE)
31
 32 84x
  graph <- lapply(code, attr, "dependency")
33
 34 84x
  if (check_code_names) {
35 83x
    symbols <- unlist(lapply(graph, function(call) {
36 236x
      ind <- match("<-", call, nomatch = length(call) + 1L)
37 236x
      call[seq_len(ind - 1L)]
38
    }))
39
 40 83x
    if (!all(names %in% unique(symbols))) {
41 8x
      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE)
42
    }
43
  }
44
 45 84x
  if (length(code) == 0) {
46 1x
    return(code)
47
  }
48
 49 83x
  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))
50
 51 83x
  lib_ind <- detect_libraries(graph)
52
 53 83x
  code_ids <- sort(unique(c(lib_ind, ind)))
54 83x
  code[code_ids]
55
}
56
 57 
#' Locate function call token
58
#'
59
#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.
60
#'
61
#' Useful for determining occurrence of `assign` or `data` functions in an input call.
62
#'
63
#' @param call_pd `data.frame` as returned by `extract_calls()`
64
#' @param text `character(1)` to look for in `text` column of `call_pd`
65
#'
66
#' @return
67
#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.
68
#' 0 if not found.
69
#'
70
#' @keywords internal
71
#' @noRd
72
find_call <- function(call_pd, text) {
73 767x
  checkmate::check_data_frame(call_pd)
74 767x
  checkmate::check_names(call_pd, must.include = c("token", "text"))
75 767x
  checkmate::check_string(text)
76
 77 767x
  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)
78 767x
  if (length(ans)) {
79 11x
    ans
80
  } else {
81 756x
    0L
82
  }
83
}
84
 85 
#' Split the result of `utils::getParseData()` into separate calls
86
#'
87
#' @param pd (`data.frame`) A result of `utils::getParseData()`.
88
#'
89
#' @return
90
#' A `list` of `data.frame`s.
91
#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.
92
#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.
93
#'
94
#' @keywords internal
95
#' @noRd
96
extract_calls <- function(pd) {
97 1003x
  calls <- lapply(
98 1003x
    pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"],
99 1003x
    function(parent) {
100 1136x
      rbind(
101 1136x
        pd[pd$id == parent, ],
102 1136x
        get_children(pd = pd, parent = parent)
103
      )
104
    }
105
  )
106 1003x
  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)
107 1003x
  calls <- Filter(Negate(is.null), calls)
108 1003x
  calls <- fix_shifted_comments(calls)
109 1003x
  calls <- remove_custom_assign(calls, c(":="))
110 1003x
  fix_arrows(calls)
111
}
112
 113 
#' @keywords internal
114
#' @noRd
115
get_children <- function(pd, parent) {
116 11104x
  idx_children <- abs(pd$parent) == parent
117 11104x
  children <- pd[idx_children, ]
118 11104x
  if (nrow(children) == 0) {
119 6306x
    return(NULL)
120
  }
121
 122 4798x
  if (parent > 0) {
123 4798x
    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))
124
  }
125
}
126
 127 
#' Fixes edge case of comments being shifted to the next call.
128
#' @keywords internal
129
#' @noRd
130
fix_shifted_comments <- function(calls) {
131
  # If the first or the second token is a @linksto COMMENT,
132
  #  then it belongs to the previous call.
133 1003x
  if (length(calls) >= 2) {
134 94x
    for (i in 2:length(calls)) {
135 170x
      comment_idx <- grep("@linksto", calls[[i]][, "text"])
136 170x
      if (isTRUE(comment_idx[1] <= 2)) {
137 9x
        calls[[i - 1]] <- rbind(
138 9x
          calls[[i - 1]],
139 9x
          calls[[i]][comment_idx[1], ]
140
        )
141 9x
        calls[[i]] <- calls[[i]][-comment_idx[1], ]
142
      }
143
    }
144
  }
145 1003x
  Filter(nrow, calls)
146
}
147
 148 
#' Fixes edge case of custom assignments operator being treated as assignment.
149
#'
150
#' @param exclude (`character`) custom assignment operators to be excluded
151
#' @keywords internal
152
#' @noRd
153
remove_custom_assign <- function(calls, exclude = NULL) {
154 1003x
  checkmate::assert_list(calls)
155 1003x
  checkmate::assert_character(exclude, null.ok = TRUE)
156 1003x
  lapply(calls, function(call) {
157 1129x
    if (!is.null(exclude)) {
158 1129x
      call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ]
159
    } else {
160 !
      call
161
    }
162
  })
163
}
164
 165 
#' Fixes edge case of `<-` assignment operator being called as function,
166
#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.
167
#' @keywords internal
168
#' @noRd
169
fix_arrows <- function(calls) {
170 1003x
  checkmate::assert_list(calls)
171 1003x
  lapply(calls, function(call) {
172 1129x
    sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"
173 1129x
    call[sym_fun, ] <- sub_arrows(call[sym_fun, ])
174 1129x
    call
175
  })
176
}
177
 178 
#' Execution of assignment operator substitutions for a call.
179
#' @keywords internal
180
#' @noRd
181
sub_arrows <- function(call) {
182 1129x
  checkmate::assert_data_frame(call)
183 1129x
  map <- data.frame(
184 1129x
    row.names = c("<-", "<<-", "="),
185 1129x
    token = rep("LEFT_ASSIGN", 3),
186 1129x
    text = rep("<-", 3)
187
  )
188 1129x
  sub_ids <- call$text %in% rownames(map)
189 1129x
  call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]
190 1129x
  call
191
}
192
 193 
# code_graph ----
194
 195 
#' Extract object occurrence
196
#'
197
#' Extracts objects occurrence within calls passed by `pd`.
198
#' Also detects which objects depend on which within a call.
199
#'
200
#' @param pd `data.frame`;
201
#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
202
#'  created by `extract_calls()` function
203
#'
204
#' @return
205
#' A character vector listing names of objects that depend on this call
206
#' and names of objects that this call depends on.
207
#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`
208
#' depends on objects `b` and `c`.
209
#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.
210
#'
211
#' @keywords internal
212
#' @noRd
213
extract_occurrence <- function(pd) {
214 385x
  is_in_function <- function(x) {
215
    # If an object is a function parameter,
216
    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.
217 374x
    function_id <- x[x$token == "FUNCTION", "parent"]
218 374x
    if (length(function_id)) {
219 20x
      x$id %in% get_children(x, function_id[1])$id
220
    } else {
221 354x
      rep(FALSE, nrow(x))
222
    }
223
  }
224 385x
  in_parenthesis <- function(x) {
225 284x
    if (any(x$token %in% c("LBB", "'['"))) {
226 7x
      id_start <- min(x$id[x$token %in% c("LBB", "'['")])
227 7x
      id_end <- min(x$id[x$token == "']'"])
228 7x
      x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]
229
    }
230
  }
231
 232 
  # Handle data(object)/data("object")/data(object, envir = ) independently.
233 385x
  data_call <- find_call(pd, "data")
234 385x
  if (data_call) {
235 3x
    sym <- pd[data_call + 1, "text"]
236 3x
    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
237
  }
238
  # Handle assign(x = ).
239 382x
  assign_call <- find_call(pd, "assign")
240 382x
  if (assign_call) {
241
    # Check if parameters were named.
242
    # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.
243
    # "EQ_SUB" is for `=` appearing after the name of the named parameter.
244 8x
    if (any(pd$token == "SYMBOL_SUB")) {
245 4x
      params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]
246
      # Remove sequence of "=", ",".
247 4x
      if (length(params > 1)) {
248 4x
        remove <- integer(0)
249 4x
        for (i in 2:length(params)) {
250 20x
          if (params[i - 1] == "=" && params[i] == ",") {
251 4x
            remove <- c(remove, i - 1, i)
252
          }
253
        }
254 3x
        if (length(remove)) params <- params[-remove]
255
      }
256 4x
      pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))
257 4x
      if (!pos) {
258 !
        return(character(0L))
259
      }
260
      # pos is indicator of the place of 'x'
261
      # 1. All parameters are named, but none is 'x' - return(character(0L))
262
      # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))
263
      # - check "x" in params being just a vector of named parameters.
264
      # 3. Some parameters are named, 'x' is not in named parameters
265
      # - check first appearance of "," (unnamed parameter) in vector parameters.
266
    } else {
267
      # Object is the first entry after 'assign'.
268 4x
      pos <- 1
269
    }
270 8x
    sym <- pd[assign_call + pos, "text"]
271 8x
    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))
272
  }
273
 274 
  # What occurs in a function body is not tracked.
275 374x
  x <- pd[!is_in_function(pd), ]
276 374x
  sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))
277 374x
  sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")
278
 279 374x
  if (length(sym_cond) == 0) {
280 17x
    return(character(0L))
281
  }
282
  # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.
283
  # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.
284 357x
  dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]
285 357x
  if (length(dollar_ids)) {
286 6x
    object_ids <- x[sym_cond, "id"]
287 6x
    after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]
288 6x
    sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))
289
  }
290
 291 357x
  assign_cond <- grep("ASSIGN", x$token)
292 357x
  if (!length(assign_cond)) {
293 73x
    return(c("<-", unique(x[sym_cond, "text"])))
294
  }
295
 296 
  # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.
297 284x
  sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]
298
 299 
  # If there was an assignment operation detect direction of it.
300 284x
  if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.
301 1x
    sym_cond <- rev(sym_cond)
302
  }
303
 304 284x
  after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1
305 284x
  ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))
306 284x
  roll <- in_parenthesis(pd)
307 284x
  if (length(roll)) {
308 3x
    c(setdiff(ans, roll), roll)
309
  } else {
310 281x
    ans
311
  }
312
}
313
 314 
#' Extract side effects
315
#'
316
#' Extracts all object names from the code that are marked with `@linksto` tag.
317
#'
318
#' The code may contain functions calls that create side effects, e.g. modify the environment.
319
#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.
320
#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.
321
#' With this tag a complete object dependency structure can be established.
322
#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.
323
#'
324
#' @param pd `data.frame`;
325
#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;
326
#'  created by `extract_calls()` function
327
#'
328
#' @return
329
#' A character vector of names of objects
330
#' depending a call tagged with `@linksto` in a corresponding element of `pd`.
331
#'
332
#' @keywords internal
333
#' @noRd
334
extract_side_effects <- function(pd) {
335 372x
  linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)
336 372x
  unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+"))
337
}
338
 339 
#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)
340
#' @keywords internal
341
#' @noRd
342
extract_dependency <- function(parsed_code) {
343 408x
  full_pd <- normalize_pd(utils::getParseData(parsed_code))
344 408x
  reordered_full_pd <- extract_calls(full_pd)
345
 346 
  # Early return on empty code
347 408x
  if (length(reordered_full_pd) == 0L) {
348 36x
    return(NULL)
349
  }
350
 351 372x
  if (length(parsed_code) == 0L) {
352 1x
    return(extract_side_effects(reordered_full_pd[[1]]))
353
  }
354 371x
  expr_ix <- lapply(parsed_code[[1]], class) == "{"
355
 356 
  # Build queue of expressions to parse individually
357 371x
  queue <- list()
358 371x
  parsed_code_list <- if (all(!expr_ix)) {
359 363x
    list(parsed_code)
360
  } else {
361 8x
    queue <- as.list(parsed_code[[1]][expr_ix])
362 8x
    new_list <- parsed_code[[1]]
363 8x
    new_list[expr_ix] <- NULL
364 8x
    list(parse(text = as.expression(new_list), keep.source = TRUE))
365
  }
366
 367 371x
  while (length(queue) > 0) {
368 22x
    current <- queue[[1]]
369 22x
    queue <- queue[-1]
370 22x
    if (identical(current[[1L]], as.name("{"))) {
371 8x
      queue <- append(queue, as.list(current)[-1L])
372
    } else {
373 14x
      parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE)
374
    }
375
  }
376
 377 371x
  parsed_occurences <- lapply(
378 371x
    parsed_code_list,
379 371x
    function(parsed_code) {
380 385x
      pd <- normalize_pd(utils::getParseData(parsed_code))
381 385x
      reordered_pd <- extract_calls(pd)
382 385x
      if (length(reordered_pd) > 0) {
383
        # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names
384
        # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows
385
        # extract_calls is needed to omit empty calls that contain only one token `"';'"`
386
        # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different
387
        # than in original pd
388 385x
        extract_occurrence(reordered_pd[[1]])
389
      }
390
    }
391
  )
392
 393 
  # Merge results together
394 371x
  result <- Reduce(
395 371x
    function(u, v) {
396 385x
      ix <- if ("<-" %in% v) min(which(v == "<-")) else 0
397 385x
      u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))])
398 385x
      u$right_side <- c(
399 385x
        u$right_side,
400 385x
        if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))]
401
      )
402 385x
      u
403
    },
404 371x
    init = list(left_side = character(0L), right_side = character(0L)),
405 371x
    x = parsed_occurences
406
  )
407
 408 371x
  c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side)
409
}
410
 411 
# graph_parser ----
412
 413 
#' Return the indices of calls needed to reproduce an object
414
#'
415
#' @param x The name of the object to return code for.
416
#' @param graph A result of `code_graph()`.
417
#'
418
#' @return
419
#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.
420
#'
421
#' @keywords internal
422
#' @noRd
423
graph_parser <- function(x, graph) {
424
  # x occurrences (lhs)
425 349x
  occurrence <- vapply(
426 349x
    graph, function(call) {
427 593x
      ind <- match("<-", call, nomatch = length(call) + 1L)
428 593x
      x %in% call[seq_len(ind - 1L)]
429
    },
430 349x
    logical(1)
431
  )
432
 433 
  # x-dependent objects (rhs)
434 349x
  dependencies <- lapply(graph[occurrence], function(call) {
435 155x
    ind <- match("<-", call, nomatch = 0L)
436 155x
    call[(ind + 1L):length(call)]
437
  })
438 349x
  dependencies <- setdiff(unlist(dependencies), x)
439
 440 349x
  dependency_occurrences <- lapply(dependencies, function(dependency) {
441
    # track down dependencies and where they occur on the lhs in previous calls
442 251x
    last_x_occurrence <- max(which(occurrence))
443 251x
    reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1)
444 251x
    c(graph_parser(dependency, reduced_graph), last_x_occurrence)
445
  })
446
 447 349x
  sort(unique(c(which(occurrence), unlist(dependency_occurrences))))
448
}
449
 450 
 451 
# default_side_effects --------------------------------------------------------------------------------------------
452
 453 
#' Detect library calls
454
#'
455
#' Detects `library()` and `require()` function calls.
456
#'
457
#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`
458
#'
459
#' @return
460
#' Integer vector of indices that can be applied to `graph` to obtain all calls containing
461
#' `library()` or `require()` calls that are always returned for reproducibility.
462
#'
463
#' @keywords internal
464
#' @noRd
465
detect_libraries <- function(graph) {
466 83x
  defaults <- c("library", "require")
467
 468 83x
  which(
469 83x
    unlist(
470 83x
      lapply(
471 83x
        graph, function(x) {
472 237x
          any(grepl(pattern = paste(defaults, collapse = "|"), x = x))
473
        }
474
      )
475
    )
476
  )
477
}
478
 479 
 480 
# utils -----------------------------------------------------------------------------------------------------------
481
 482 
 483 
#' Normalize parsed data removing backticks from symbols
484
#'
485
#' @param pd `data.frame` resulting from `utils::getParseData()` call.
486
#'
487
#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.
488
#'
489
#' @keywords internal
490
#' @noRd
491
normalize_pd <- function(pd) {
492
  # Remove backticks from SYMBOL tokens
493 1003x
  symbol_index <- grepl("^SYMBOL.*$", pd$token)
494 1003x
  pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])
495
 496 1003x
  pd
497
}
498
 499 
 500 
# split_code ------------------------------------------------------------------------------------------------------
501
 502 
 503 
#' Get line/column in the source where the calls end
504
#'
505
#'
506
#' @param code `character(1)`
507
#'
508
#' @return `matrix` with `colnames = c("line", "col")`
509
#'
510
#' @keywords internal
511
#' @noRd
512
get_call_breaks <- function(code) {
513 210x
  parsed_code <- parse(text = code, keep.source = TRUE)
514 210x
  pd <- utils::getParseData(parsed_code)
515 210x
  pd <- normalize_pd(pd)
516 210x
  pd <- pd[pd$token != "';'", ]
517 210x
  call_breaks <- t(sapply(
518 210x
    extract_calls(pd),
519 210x
    function(x) {
520 372x
      matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)])))
521
    }
522
  ))
523 210x
  call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only
524 210x
  if (nrow(call_breaks) == 0L) {
525 122x
    call_breaks <- matrix(numeric(0), ncol = 2)
526
  }
527 210x
  colnames(call_breaks) <- c("line", "col")
528 210x
  call_breaks
529
}
530
 531 
#' Split code by calls
532
#'
533
#' @param code `character` with the code.
534
#'
535
#' @return list of `character`s of the length equal to the number of calls in `code`.
536
#'
537
#' @keywords internal
538
#' @noRd
539
split_code <- function(code) {
540 210x
  call_breaks <- get_call_breaks(code)
541 210x
  if (nrow(call_breaks) == 0) {
542 122x
    return(code)
543
  }
544 88x
  call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE]
545 88x
  code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]]
546 88x
  char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)]
547
 548 88x
  idx_start <- c(
549 88x
    0, # first call starts in the beginning of src
550 88x
    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1
551
  )
552 88x
  idx_end <- c(
553 88x
    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"],
554 88x
    nchar(code) # last call end in the end of src
555
  )
556 88x
  new_code <- substring(code, idx_start, idx_end)
557
 558 
  # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line
559
  # we need to move remove leading and add \n instead when combining calls
560 88x
  c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))
561
}

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4