Skip to content

Commit

Permalink
Merge pull request #40 from brendanf/slurm_monitor
Browse files Browse the repository at this point in the history
add job monitor for SLURM
  • Loading branch information
wlandau committed Apr 10, 2024
2 parents 9f429d3 + 8ad7f9d commit 3082d40
Show file tree
Hide file tree
Showing 10 changed files with 347 additions and 5 deletions.
10 changes: 9 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ Authors@R: c(
email = "mglev1n@gmail.com",
comment = c(ORCID = "0000-0002-9937-9932")
),
person(
given = "Brendan",
family = "Furneaux",
role = "aut",
email = "brendan.furneaux@gmail.com",
comment = c(ORCID = "0000-0003-3522-7363")
),
person(
family = "Eli Lilly and Company",
role = "cph"
Expand All @@ -49,7 +56,8 @@ Imports:
rlang,
utils,
vctrs,
xml2
xml2,
yaml
Suggests:
knitr (>= 1.30),
markdown (>= 1.1),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(crew_class_launcher_sge)
export(crew_class_launcher_slurm)
export(crew_class_monitor_cluster)
export(crew_class_monitor_sge)
export(crew_class_monitor_slurm)
export(crew_controller_lsf)
export(crew_controller_pbs)
export(crew_controller_sge)
Expand All @@ -18,6 +19,7 @@ export(crew_launcher_sge)
export(crew_launcher_slurm)
export(crew_monitor_cluster)
export(crew_monitor_sge)
export(crew_monitor_slurm)
importFrom(R6,R6Class)
importFrom(crew,crew_assert)
importFrom(crew,crew_class_launcher)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# crew.cluster 0.3.0.9000


* Add a "monitor" class for SLURM clusters.

# crew.cluster 0.3.0

Expand Down
120 changes: 120 additions & 0 deletions R/crew_monitor_slurm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' @title `r lifecycle::badge("experimental")` Create a SLURM monitor object.
#' @export
#' @family slurm
#' @description Create an `R6` object to monitor SLURM cluster jobs.
#' @inheritParams crew_monitor_cluster
crew_monitor_slurm <- function(
verbose = TRUE,
command_list = as.character(Sys.which("squeue")),
command_terminate = as.character(Sys.which("scancel"))
) {
out <- crew_class_monitor_slurm$new(
verbose = verbose,
command_list = command_list,
command_terminate = command_terminate
)
out$validate()
out
}

#' @title `r lifecycle::badge("experimental")` SLURM monitor class
#' @export
#' @family slurm
#' @description SLURM monitor `R6` class
#' @details See [crew_monitor_slurm()].
crew_class_monitor_slurm <- R6::R6Class(
classname = "crew_class_monitor_slurm",
inherit = crew_class_monitor_cluster,
cloneable = FALSE,
public = list(
#' @description List SLURM jobs.
#'

Check warning on line 31 in R/crew_monitor_slurm.R

View workflow job for this annotation

GitHub Actions / lint

file=R/crew_monitor_slurm.R,line=31,col=7,[trailing_whitespace_linter] Trailing whitespace is superfluous.
#' This function loads the entire SLURM queue for all users, so it may take
#' several seconds to execute. It is intended for interactive use, and
#' should especially be avoided in scripts where it is called frequently.
#' It requires SLURM version 20.02 or higher, along with the YAML plugin.
#' @return A `tibble` with one row per SLURM job and columns with
#' specific details.
#' @param user Character of length 1, user name of the jobs to list.
jobs = function(user = ps::ps_username()) {
# Cannot be tested with automated tests.
# Tested in tests/slurm/monitor.R.
# nocov start
crew::crew_assert(
user,
is.character(.),
length(.) == 1L,
!anyNA(.),
nzchar(.),
message = "'user' must be `NULL` or a character vector of length 1"
)
text <- system2(
private$.command_list,
args = shQuote(c("--yaml")),
stdout = TRUE,
stderr = if_any(private$.verbose, "", FALSE),
wait = TRUE
)
monitor_cols <- c("job_id", "partition", "name", "user_name", "job_state",
"start_time", "node_count", "state_reason")
yaml <- yaml::read_yaml(text = text)
out <- map(
yaml$jobs,
~ tibble::new_tibble(
c(
map(.x[monitor_cols], ~ unlist(.x) %||% NA),
list(
nodes = paste(unlist(.x$job_resources$nodes), collapse = ",") %||% NA

Check warning on line 67 in R/crew_monitor_slurm.R

View workflow job for this annotation

GitHub Actions / lint

file=R/crew_monitor_slurm.R,line=67,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 83 characters.
)
)
)
)
out <- do.call(vctrs::vec_rbind, out)
out <- out[out$user_name == user,]

Check warning on line 73 in R/crew_monitor_slurm.R

View workflow job for this annotation

GitHub Actions / lint

file=R/crew_monitor_slurm.R,line=73,col=40,[commas_linter] Commas should always have a space after.
out <- out[which(out$job_state != "CANCELLED"),]

Check warning on line 74 in R/crew_monitor_slurm.R

View workflow job for this annotation

GitHub Actions / lint

file=R/crew_monitor_slurm.R,line=74,col=54,[commas_linter] Commas should always have a space after.
out$job_id <- as.character(out$job_id)
out$start_time <- as.POSIXct(out$start_time, origin = "1970-01-01")
out
# nocov end
},
#' @description Terminate one or more SLURM jobs.
#' @return `NULL` (invisibly).
#' @param jobs Character vector of job names or job IDs to terminate.
#' Ignored if `all` is set to `TRUE`.
#' @param all Logical of length 1, whether to terminate all the jobs
#' under your user name. This terminates ALL your SLURM jobs,
#' regardless of whether `crew.cluster` launched them,
#' so use with caution!
terminate = function(jobs = NULL, all = FALSE) {
# Cannot be tested with automated tests.
# Tested in tests/slurm/monitor.R.
# nocov start
crew::crew_assert(
jobs %||% "x",
is.character(.),
!anyNA(.),
nzchar(.),
message = paste(
"'jobs' must be `NULL` or a character vector of",
"valid job names or IDs."
)
)
crew::crew_assert(
all,
isTRUE(.) || isFALSE(.),
message = "'all' must be TRUE or FALSE."
)
args <- shQuote(if_any(all, c("-u", ps::ps_username()), jobs))
stream <- if_any(private$.verbose, "", FALSE)
system2(
command = private$.command_terminate,
args = args,
stdout = stream,
stderr = stream,
wait = TRUE
)
invisible()
# nocov end
}
)
)
4 changes: 3 additions & 1 deletion man/crew_class_launcher_slurm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions man/crew_class_monitor_slurm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/crew_controller_slurm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/crew_launcher_slurm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 39 additions & 0 deletions man/crew_monitor_slurm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3082d40

Please sign in to comment.