RetroSearch Browse

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Showing content from https://insightsengineering.github.io/teal.code/main/coverage-report/ below:

# get_code_dependency ----

#' Get code dependency of an object

#'

#' Extract subset of code required to reproduce specific object(s), including code producing side-effects.

#'

#' Given a character vector with code, this function will extract the part of the code responsible for creating

#' the variables specified by `names`.

#' This includes the final call that creates the variable(s) in question as well as all _parent calls_,

#' _i.e._ calls that create variables used in the final call and their parents, etc.

#' Also included are calls that create side-effects like establishing connections.

#'

#' It is assumed that object dependency is established by using three assignment operators: `<-`, `=`, and `->` .

#' Other assignment methods (`assign`, `<<-`) or non-standard-evaluation methods are not supported.

#'

#' Side-effects are not detected automatically and must be marked in the code.

#' Add `# @linksto object` at the end of a line where a side-effect occurs to specify that this line is required

#' to reproduce a variable called `object`.

#'

#' @param code `character` with the code.

#' @param names `character` vector of object names.

#' @param check_code_names `logical(1)` flag specifying if a warning for non-existing names should be displayed.

#'

#' @return Character vector, a subset of `code`.

#' Note that subsetting is actually done on the calls `code`, not necessarily on the elements of the vector.

#'

#' @keywords internal

get_code_dependency <- function(code, names, check_code_names = TRUE) {

29 84x

  checkmate::assert_list(code, "character")

30 84x

  checkmate::assert_character(names, any.missing = FALSE)

32 84x

  graph <- lapply(code, attr, "dependency")

34 84x

  if (check_code_names) {

35 83x

    symbols <- unlist(lapply(graph, function(call) {

36 236x

      ind <- match("<-", call, nomatch = length(call) + 1L)

37 236x

      call[seq_len(ind - 1L)]

}))

40 83x

    if (!all(names %in% unique(symbols))) {

41 8x

      warning("Object(s) not found in code: ", toString(setdiff(names, symbols)), ".", call. = FALSE)

45 84x

  if (length(code) == 0) {

46 1x

    return(code)

49 83x

  ind <- unlist(lapply(names, function(x) graph_parser(x, graph)))

51 83x

  lib_ind <- detect_libraries(graph)

53 83x

  code_ids <- sort(unique(c(lib_ind, ind)))

54 83x

  code[code_ids]

#' Locate function call token

#'

#' Determine which row of parsed data is specific `SYMBOL_FUNCTION_CALL` token.

#'

#' Useful for determining occurrence of `assign` or `data` functions in an input call.

#'

#' @param call_pd `data.frame` as returned by `extract_calls()`

#' @param text `character(1)` to look for in `text` column of `call_pd`

#'

#' @return

#' Single integer specifying row in `call_pd` where `token` is `SYMBOL_FUNCTION_CALL` and `text` is `text`.

#' 0 if not found.

#'

#' @keywords internal

#' @noRd

find_call <- function(call_pd, text) {

73 767x

  checkmate::check_data_frame(call_pd)

74 767x

  checkmate::check_names(call_pd, must.include = c("token", "text"))

75 767x

  checkmate::check_string(text)

77 767x

  ans <- which(call_pd$token == "SYMBOL_FUNCTION_CALL" & call_pd$text == text)

78 767x

  if (length(ans)) {

79 11x

ans

  } else {

81 756x

0L

#' Split the result of `utils::getParseData()` into separate calls

#'

#' @param pd (`data.frame`) A result of `utils::getParseData()`.

#'

#' @return

#' A `list` of `data.frame`s.

#' Each element is a subset of `pd` corresponding to one call in the original code from which `pd` was obtained.

#' Only four columns (`"token"`, `"text"`, `"id"`, `"parent"`) are kept, the rest is discarded.

#'

#' @keywords internal

#' @noRd

extract_calls <- function(pd) {

97 1003x

  calls <- lapply(

98 1003x

    pd[pd$parent == 0 & (pd$token != "COMMENT" | grepl("@linksto", pd$text, fixed = TRUE)), "id"],

99 1003x

    function(parent) {

100 1136x

      rbind(

101 1136x

        pd[pd$id == parent, ],

102 1136x

        get_children(pd = pd, parent = parent)

103

104

105

106 1003x

  calls <- Filter(function(call) !(nrow(call) == 1 && call$token == "';'"), calls)

107 1003x

  calls <- Filter(Negate(is.null), calls)

108 1003x

  calls <- fix_shifted_comments(calls)

109 1003x

  calls <- remove_custom_assign(calls, c(":="))

110 1003x

  fix_arrows(calls)

111

112

113

#' @keywords internal

114

#' @noRd

115

get_children <- function(pd, parent) {

116 11104x

  idx_children <- abs(pd$parent) == parent

117 11104x

  children <- pd[idx_children, ]

118 11104x

  if (nrow(children) == 0) {

119 6306x

    return(NULL)

120

121

122 4798x

  if (parent > 0) {

123 4798x

    do.call(rbind, c(list(children), lapply(children$id, get_children, pd = pd)))

124

125

126

127

#' Fixes edge case of comments being shifted to the next call.

128

#' @keywords internal

129

#' @noRd

130

fix_shifted_comments <- function(calls) {

131

  # If the first or the second token is a @linksto COMMENT,

132

  #  then it belongs to the previous call.

133 1003x

  if (length(calls) >= 2) {

134 94x

    for (i in 2:length(calls)) {

135 170x

      comment_idx <- grep("@linksto", calls[[i]][, "text"])

136 170x

      if (isTRUE(comment_idx[1] <= 2)) {

137 9x

        calls[[i - 1]] <- rbind(

138 9x

          calls[[i - 1]],

139 9x

          calls[[i]][comment_idx[1], ]

140

141 9x

        calls[[i]] <- calls[[i]][-comment_idx[1], ]

142

143

144

145 1003x

  Filter(nrow, calls)

146

147

148

#' Fixes edge case of custom assignments operator being treated as assignment.

149

#'

150

#' @param exclude (`character`) custom assignment operators to be excluded

151

#' @keywords internal

152

#' @noRd

153

remove_custom_assign <- function(calls, exclude = NULL) {

154 1003x

  checkmate::assert_list(calls)

155 1003x

  checkmate::assert_character(exclude, null.ok = TRUE)

156 1003x

  lapply(calls, function(call) {

157 1129x

    if (!is.null(exclude)) {

158 1129x

      call[!(call$token == "LEFT_ASSIGN" & call$text %in% exclude), ]

159

    } else {

160 !

      call

161

162

})

163

164

165

#' Fixes edge case of `<-` assignment operator being called as function,

166

#' which is \code{`<-`(y,x)} instead of traditional `y <- x`.

167

#' @keywords internal

168

#' @noRd

169

fix_arrows <- function(calls) {

170 1003x

  checkmate::assert_list(calls)

171 1003x

  lapply(calls, function(call) {

172 1129x

    sym_fun <- call$token == "SYMBOL_FUNCTION_CALL"

173 1129x

    call[sym_fun, ] <- sub_arrows(call[sym_fun, ])

174 1129x

    call

175

})

176

177

178

#' Execution of assignment operator substitutions for a call.

179

#' @keywords internal

180

#' @noRd

181

sub_arrows <- function(call) {

182 1129x

  checkmate::assert_data_frame(call)

183 1129x

  map <- data.frame(

184 1129x

    row.names = c("<-", "<<-", "="),

185 1129x

    token = rep("LEFT_ASSIGN", 3),

186 1129x

    text = rep("<-", 3)

187

188 1129x

  sub_ids <- call$text %in% rownames(map)

189 1129x

  call[sub_ids, c("token", "text")] <- map[call$text[sub_ids], ]

190 1129x

  call

191

192

193

# code_graph ----

194

195

#' Extract object occurrence

196

#'

197

#' Extracts objects occurrence within calls passed by `pd`.

198

#' Also detects which objects depend on which within a call.

199

#'

200

#' @param pd `data.frame`;

201

#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;

202

#'  created by `extract_calls()` function

203

#'

204

#' @return

205

#' A character vector listing names of objects that depend on this call

206

#' and names of objects that this call depends on.

207

#' Dependencies are listed after the `"<-"` string, e.g. `c("a", "<-", "b", "c")` means that in this call object `a`

208

#' depends on objects `b` and `c`.

209

#' If a call is tagged with `@linksto a`, then object `a` is understood to depend on that call.

210

#'

211

#' @keywords internal

212

#' @noRd

213

extract_occurrence <- function(pd) {

214 385x

  is_in_function <- function(x) {

215

    # If an object is a function parameter,

216

    # then in calls_pd there is a `SYMBOL_FORMALS` entry for that object.

217 374x

    function_id <- x[x$token == "FUNCTION", "parent"]

218 374x

    if (length(function_id)) {

219 20x

      x$id %in% get_children(x, function_id[1])$id

220

    } else {

221 354x

      rep(FALSE, nrow(x))

222

223

224 385x

  in_parenthesis <- function(x) {

225 284x

    if (any(x$token %in% c("LBB", "'['"))) {

226 7x

      id_start <- min(x$id[x$token %in% c("LBB", "'['")])

227 7x

      id_end <- min(x$id[x$token == "']'"])

228 7x

      x$text[x$token == "SYMBOL" & x$id > id_start & x$id < id_end]

229

230

231

232

  # Handle data(object)/data("object")/data(object, envir = ) independently.

233 385x

  data_call <- find_call(pd, "data")

234 385x

  if (data_call) {

235 3x

    sym <- pd[data_call + 1, "text"]

236 3x

    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))

237

238

  # Handle assign(x = ).

239 382x

  assign_call <- find_call(pd, "assign")

240 382x

  if (assign_call) {

241

    # Check if parameters were named.

242

    # "','" is for unnamed parameters, where "SYMBOL_SUB" is for named.

243

    # "EQ_SUB" is for `=` appearing after the name of the named parameter.

244 8x

    if (any(pd$token == "SYMBOL_SUB")) {

245 4x

      params <- pd[pd$token %in% c("SYMBOL_SUB", "','", "EQ_SUB"), "text"]

246

      # Remove sequence of "=", ",".

247 4x

      if (length(params > 1)) {

248 4x

        remove <- integer(0)

249 4x

        for (i in 2:length(params)) {

250 20x

          if (params[i - 1] == "=" && params[i] == ",") {

251 4x

            remove <- c(remove, i - 1, i)

252

253

254 3x

        if (length(remove)) params <- params[-remove]

255

256 4x

      pos <- match("x", setdiff(params, ","), nomatch = match(",", params, nomatch = 0))

257 4x

      if (!pos) {

258 !

        return(character(0L))

259

260

      # pos is indicator of the place of 'x'

261

      # 1. All parameters are named, but none is 'x' - return(character(0L))

262

      # 2. Some parameters are named, 'x' is in named parameters: match("x", setdiff(params, ","))

263

      # - check "x" in params being just a vector of named parameters.

264

      # 3. Some parameters are named, 'x' is not in named parameters

265

      # - check first appearance of "," (unnamed parameter) in vector parameters.

266

    } else {

267

      # Object is the first entry after 'assign'.

268 4x

      pos <- 1

269

270 8x

    sym <- pd[assign_call + pos, "text"]

271 8x

    return(c(gsub("^['\"]|['\"]$", "", sym), "<-"))

272

273

274

  # What occurs in a function body is not tracked.

275 374x

  x <- pd[!is_in_function(pd), ]

276 374x

  sym_cond <- which(x$token %in% c("SPECIAL", "SYMBOL", "SYMBOL_FUNCTION_CALL"))

277 374x

  sym_fc_cond <- which(x$token == "SYMBOL_FUNCTION_CALL")

278

279 374x

  if (length(sym_cond) == 0) {

280 17x

    return(character(0L))

281

282

  # Watch out for SYMBOLS after $ and @. For x$a x@a: x is object, a is not.

283

  # For x$a, a's ID is $'s ID-2 so we need to remove all IDs that have ID = $ID - 2.

284 357x

  dollar_ids <- x[x$token %in% c("'$'", "'@'"), "id"]

285 357x

  if (length(dollar_ids)) {

286 6x

    object_ids <- x[sym_cond, "id"]

287 6x

    after_dollar <- object_ids[(object_ids - 2) %in% dollar_ids]

288 6x

    sym_cond <- setdiff(sym_cond, which(x$id %in% after_dollar))

289

290

291 357x

  assign_cond <- grep("ASSIGN", x$token)

292 357x

  if (!length(assign_cond)) {

293 73x

    return(c("<-", unique(x[sym_cond, "text"])))

294

295

296

  # For cases like 'eval(expression(c <- b + 2))' removes 'eval(expression('.

297 284x

  sym_cond <- sym_cond[!(sym_cond < min(assign_cond) & sym_cond %in% sym_fc_cond)]

298

299

  # If there was an assignment operation detect direction of it.

300 284x

  if (unique(x$text[assign_cond]) == "->") { # What if there are 2 assignments: e.g. a <- b -> c.

301 1x

    sym_cond <- rev(sym_cond)

302

303

304 284x

  after <- match(min(x$id[assign_cond]), sort(x$id[c(min(assign_cond), sym_cond)])) - 1

305 284x

  ans <- append(x[sym_cond, "text"], "<-", after = max(1, after))

306 284x

  roll <- in_parenthesis(pd)

307 284x

  if (length(roll)) {

308 3x

    c(setdiff(ans, roll), roll)

309

  } else {

310 281x

ans

311

312

313

314

#' Extract side effects

315

#'

316

#' Extracts all object names from the code that are marked with `@linksto` tag.

317

#'

318

#' The code may contain functions calls that create side effects, e.g. modify the environment.

319

#' Static code analysis may be insufficient to determine which objects are created or modified by such a function call.

320

#' The `@linksto` comment tag is introduced to mark a call as having a (side) effect on one or more objects.

321

#' With this tag a complete object dependency structure can be established.

322

#' Read more about side effects and the usage of `@linksto` tag in [`get_code_dependencies()`] function.

323

#'

324

#' @param pd `data.frame`;

325

#'  one of the results of `utils::getParseData()` split into subsets representing individual calls;

326

#'  created by `extract_calls()` function

327

#'

328

#' @return

329

#' A character vector of names of objects

330

#' depending a call tagged with `@linksto` in a corresponding element of `pd`.

331

#'

332

#' @keywords internal

333

#' @noRd

334

extract_side_effects <- function(pd) {

335 372x

  linksto <- grep("@linksto", pd[pd$token == "COMMENT", "text"], value = TRUE)

336 372x

  unlist(strsplit(sub("\\s*#.*@linksto\\s+", "", linksto), "\\s+"))

337

338

339

#' @param parsed_code results of `parse(text = code, keep.source = TRUE` (parsed text)

340

#' @keywords internal

341

#' @noRd

342

extract_dependency <- function(parsed_code) {

343 408x

  full_pd <- normalize_pd(utils::getParseData(parsed_code))

344 408x

  reordered_full_pd <- extract_calls(full_pd)

345

346

  # Early return on empty code

347 408x

  if (length(reordered_full_pd) == 0L) {

348 36x

    return(NULL)

349

350

351 372x

  if (length(parsed_code) == 0L) {

352 1x

    return(extract_side_effects(reordered_full_pd[[1]]))

353

354 371x

  expr_ix <- lapply(parsed_code[[1]], class) == "{"

355

356

  # Build queue of expressions to parse individually

357 371x

  queue <- list()

358 371x

  parsed_code_list <- if (all(!expr_ix)) {

359 363x

    list(parsed_code)

360

  } else {

361 8x

    queue <- as.list(parsed_code[[1]][expr_ix])

362 8x

    new_list <- parsed_code[[1]]

363 8x

    new_list[expr_ix] <- NULL

364 8x

    list(parse(text = as.expression(new_list), keep.source = TRUE))

365

366

367 371x

  while (length(queue) > 0) {

368 22x

    current <- queue[[1]]

369 22x

    queue <- queue[-1]

370 22x

    if (identical(current[[1L]], as.name("{"))) {

371 8x

      queue <- append(queue, as.list(current)[-1L])

372

    } else {

373 14x

      parsed_code_list[[length(parsed_code_list) + 1]] <- parse(text = as.expression(current), keep.source = TRUE)

374

375

376

377 371x

  parsed_occurences <- lapply(

378 371x

    parsed_code_list,

379 371x

    function(parsed_code) {

380 385x

      pd <- normalize_pd(utils::getParseData(parsed_code))

381 385x

      reordered_pd <- extract_calls(pd)

382 385x

      if (length(reordered_pd) > 0) {

383

        # extract_calls is needed to reorder the pd so that assignment operator comes before symbol names

384

        # extract_calls is needed also to substitute assignment operators into specific format with fix_arrows

385

        # extract_calls is needed to omit empty calls that contain only one token `"';'"`

386

        # This cleaning is needed as extract_occurrence assumes arrows are fixed, and order is different

387

        # than in original pd

388 385x

        extract_occurrence(reordered_pd[[1]])

389

390

391

392

393

  # Merge results together

394 371x

  result <- Reduce(

395 371x

    function(u, v) {

396 385x

      ix <- if ("<-" %in% v) min(which(v == "<-")) else 0

397 385x

      u$left_side <- c(u$left_side, v[seq_len(max(0, ix - 1))])

398 385x

      u$right_side <- c(

399 385x

        u$right_side,

400 385x

        if (ix == length(v)) character(0L) else v[seq(ix + 1, max(ix + 1, length(v)))]

401

402 385x

403

},

404 371x

    init = list(left_side = character(0L), right_side = character(0L)),

405 371x

    x = parsed_occurences

406

407

408 371x

  c(extract_side_effects(reordered_full_pd[[1]]), result$left_side, "<-", result$right_side)

409

410

411

# graph_parser ----

412

413

#' Return the indices of calls needed to reproduce an object

414

#'

415

#' @param x The name of the object to return code for.

416

#' @param graph A result of `code_graph()`.

417

#'

418

#' @return

419

#' Integer vector of indices that can be applied to `graph` to obtain all calls required to reproduce object `x`.

420

#'

421

#' @keywords internal

422

#' @noRd

423

graph_parser <- function(x, graph) {

424

  # x occurrences (lhs)

425 349x

  occurrence <- vapply(

426 349x

    graph, function(call) {

427 593x

      ind <- match("<-", call, nomatch = length(call) + 1L)

428 593x

      x %in% call[seq_len(ind - 1L)]

429

},

430 349x

    logical(1)

431

432

433

  # x-dependent objects (rhs)

434 349x

  dependencies <- lapply(graph[occurrence], function(call) {

435 155x

    ind <- match("<-", call, nomatch = 0L)

436 155x

    call[(ind + 1L):length(call)]

437

})

438 349x

  dependencies <- setdiff(unlist(dependencies), x)

439

440 349x

  dependency_occurrences <- lapply(dependencies, function(dependency) {

441

    # track down dependencies and where they occur on the lhs in previous calls

442 251x

    last_x_occurrence <- max(which(occurrence))

443 251x

    reduced_graph <- utils::head(graph[seq_len(last_x_occurrence)], -1)

444 251x

    c(graph_parser(dependency, reduced_graph), last_x_occurrence)

445

})

446

447 349x

  sort(unique(c(which(occurrence), unlist(dependency_occurrences))))

448

449

450

451

# default_side_effects --------------------------------------------------------------------------------------------

452

453

#' Detect library calls

454

#'

455

#' Detects `library()` and `require()` function calls.

456

#'

457

#' @param `graph` the dependency graph, result of `lapply(code, attr, "dependency")`

458

#'

459

#' @return

460

#' Integer vector of indices that can be applied to `graph` to obtain all calls containing

461

#' `library()` or `require()` calls that are always returned for reproducibility.

462

#'

463

#' @keywords internal

464

#' @noRd

465

detect_libraries <- function(graph) {

466 83x

  defaults <- c("library", "require")

467

468 83x

  which(

469 83x

    unlist(

470 83x

      lapply(

471 83x

        graph, function(x) {

472 237x

          any(grepl(pattern = paste(defaults, collapse = "|"), x = x))

473

474

475

476

477

478

479

480

# utils -----------------------------------------------------------------------------------------------------------

481

482

483

#' Normalize parsed data removing backticks from symbols

484

#'

485

#' @param pd `data.frame` resulting from `utils::getParseData()` call.

486

#'

487

#' @return `data.frame` with backticks removed from `text` column for `SYMBOL` tokens.

488

#'

489

#' @keywords internal

490

#' @noRd

491

normalize_pd <- function(pd) {

492

  # Remove backticks from SYMBOL tokens

493 1003x

  symbol_index <- grepl("^SYMBOL.*$", pd$token)

494 1003x

  pd[symbol_index, "text"] <- gsub("^`(.*)`$", "\\1", pd[symbol_index, "text"])

495

496 1003x

pd

497

498

499

500

# split_code ------------------------------------------------------------------------------------------------------

501

502

503

#' Get line/column in the source where the calls end

504

#'

505

#'

506

#' @param code `character(1)`

507

#'

508

#' @return `matrix` with `colnames = c("line", "col")`

509

#'

510

#' @keywords internal

511

#' @noRd

512

get_call_breaks <- function(code) {

513 210x

  parsed_code <- parse(text = code, keep.source = TRUE)

514 210x

  pd <- utils::getParseData(parsed_code)

515 210x

  pd <- normalize_pd(pd)

516 210x

  pd <- pd[pd$token != "';'", ]

517 210x

  call_breaks <- t(sapply(

518 210x

    extract_calls(pd),

519 210x

    function(x) {

520 372x

      matrix(c(max(x$line2), max(x$col2[x$line2 == max(x$line2)])))

521

522

))

523 210x

  call_breaks <- call_breaks[-nrow(call_breaks), , drop = FALSE] # breaks in between needed only

524 210x

  if (nrow(call_breaks) == 0L) {

525 122x

    call_breaks <- matrix(numeric(0), ncol = 2)

526

527 210x

  colnames(call_breaks) <- c("line", "col")

528 210x

  call_breaks

529

530

531

#' Split code by calls

532

#'

533

#' @param code `character` with the code.

534

#'

535

#' @return list of `character`s of the length equal to the number of calls in `code`.

536

#'

537

#' @keywords internal

538

#' @noRd

539

split_code <- function(code) {

540 210x

  call_breaks <- get_call_breaks(code)

541 210x

  if (nrow(call_breaks) == 0) {

542 122x

    return(code)

543

544 88x

  call_breaks <- call_breaks[order(call_breaks[, "line"], call_breaks[, "col"]), , drop = FALSE]

545 88x

  code_split <- strsplit(code, split = "\n", fixed = TRUE)[[1]]

546 88x

  char_count_lines <- c(0, cumsum(sapply(code_split, nchar, USE.NAMES = FALSE) + 1), -1)[seq_along(code_split)]

547

548 88x

  idx_start <- c(

549 88x

    0, # first call starts in the beginning of src

550 88x

    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"] + 1

551

552 88x

  idx_end <- c(

553 88x

    char_count_lines[call_breaks[, "line"]] + call_breaks[, "col"],

554 88x

    nchar(code) # last call end in the end of src

555

556 88x

  new_code <- substring(code, idx_start, idx_end)

557

558

  # line split happens before call terminator (it could be `;` or `\n`) and the terminator goes to the next line

559

  # we need to move remove leading and add \n instead when combining calls

560 88x

  c(new_code[1], gsub("^[\t ]*(\n|;)", "", new_code[-1]))

561

RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4