Using didehpc to run orderly tasks

When using orderly some tasks might be slow to run and would be better suited to running on a remote machine. You can use the cluster and orderly bundles to achieve this. If your orderly instance has an OrderlyWeb remote then you should favour running tasks on the configured remote via orderly::orderly_run_remote (unless it is too slow even for the OrderlyWeb remote). If your orderly instance is using a sharepoint remote or has no remote then you can use the cluster to run your slow running tasks.

Running a task on the cluster

To run a single task or report on the cluster start by initialising your bundle on a network drive. For example on Linux:

root <- "~/net/home/contexts"
path_bundles <- file.path(root, "bundles")

## Create bundle
bundle <- orderly::orderly_bundle_pack(path_bundles,
                                       "minimal",
                                       root = orderly_root)
#> [ name       ]  minimal
#> [ id         ]  20210917-170405-74c5ef39
#> [ start      ]  2021-09-17 17:04:05
#> [ data       ]  source => dat: 20 x 2
#> [ bundle pack ]  20210917-170405-74c5ef39

Then setup the cluster with required packages, you will need orderly at a minimum. You can use the list of packages from the orderly.yml e.g. for report minimal

orderly_packages <- yaml::read_yaml(
  file.path(orderly_root, "src/minimal/orderly.yml"))$packages
packages <- list(loaded = c("orderly", orderly_packages))
config <- didehpc::didehpc_config(workdir = root)
ctx <- context::context_save(root, packages = packages)
#> [ open:db   ]  rds
obj <- didehpc::queue_didehpc(ctx, config = config)
#> Loading context a2f7131b7f75f321d64919549cacb254
#> [ context   ]  a2f7131b7f75f321d64919549cacb254
#> [ library   ]
#> [ namespace ]  orderly
#> [ source    ]

The orderly task can then be run via orderly::orderly_bundle_run, being careful to make sure the paths are relative to the workdir passed in didehpc_config

path <- file.path("bundles", basename(bundle$path))
output_path <- "output"
t <- obj$enqueue(orderly::orderly_bundle_run(path, output_path))

When the job has completed you can import the returned bundle. If you are on windows then the path in the result should just work. If you are on Mac or Linux you will need to construct the path

output <- strsplit(t$wait(100)$path, "\\\\")[[1]]
#> (-) waiting for 1b51e7c...d7d, giving up in 99.5 s (\) waiting for
#> 1b51e7c...d7d, giving up in 99.0 s (|) waiting for 1b51e7c...d7d, giving up in
#> 98.4 s
output_filename <- output[length(output)]
orderly::orderly_bundle_import(file.path(root, output_path, output_filename),
                               root = orderly_root)
#> [ import     ]  minimal:20210917-170405-74c5ef39

And you can see that the report has been run and imported into the orderly archive

orderly::orderly_list_archive(root = orderly_root)
#>      name                       id
#> 1 minimal 20210917-170405-74c5ef39

orderly::orderly_bundle_pack can pack reports for running on the cluster which take parameters, instance and remote args like orderly_run. There are also remote equivalents which will can be used to pack a bundle on the remote instance, see orderly::orderly_bundle_pack_remote for details.

Bundle multiple reports or one report with multiple sets of parameters

A bundle can only contain 1 orderly task for running. If you want to run multiple reports on the cluster or one report with multiple sets of parameters you need to create multiple bundles. You can make this easier with a script e.g.

params <- c(0.25, 0.5, 0.75)
bundles <- lapply(params, function(nmin) {
  orderly::orderly_bundle_pack(path_bundles, "other",
                               parameters = list(nmin = nmin),
                               root = orderly_root)
  })
#> [ name       ]  other
#> [ id         ]  20210917-170408-b262a15f
#> [ sources    ]  functions.R
#> [ parameter  ]  nmin: 0.25
#> [ start      ]  2021-09-17 17:04:08
#> [ data       ]  source => extract: 19 x 2
#> [ bundle pack ]  20210917-170408-b262a15f
#> [ name       ]  other
#> [ id         ]  20210917-170408-de8cb4a7
#> [ sources    ]  functions.R
#> [ parameter  ]  nmin: 0.5
#> [ start      ]  2021-09-17 17:04:08
#> [ data       ]  source => extract: 8 x 2
#> [ bundle pack ]  20210917-170408-de8cb4a7
#> [ name       ]  other
#> [ id         ]  20210917-170409-0862e40f
#> [ sources    ]  functions.R
#> [ parameter  ]  nmin: 0.75
#> [ start      ]  2021-09-17 17:04:09
#> [ data       ]  source => extract: 7 x 2
#> [ bundle pack ]  20210917-170409-0862e40f
paths <- vapply(bundles, function(bundle) {
    file.path("bundles", basename(bundle$path))
  }, character(1))

and queue the tasks with lapply

t <- obj$lapply(paths, orderly::orderly_bundle_run, output_path)
#> Creating bundle: 'balsamic_chimpanzee'
#> [ bulk      ]  Creating 3 tasks
#> submitting 3 tasks
#> submitting (-) [=========================>-------------] 67% | waited for 0s
#> submitting (\) [=======================================] 100% | waited for 1s

import the results

for (output in t$wait(100)) {
  out <- strsplit(output$path, "\\\\")[[1]]
  output_filename <- out[length(out)]
  orderly::orderly_bundle_import(file.path(root, output_path, output_filename),
                               root = orderly_root)
}
#> 
(-) [==============================>---------------]  67% | giving up in  99 s
(\) [==============================>---------------]  67% | giving up in  99 s
(|) [==============================================] 100% | giving up in  98 s
                                                                              
[ import     ]  other:20210917-170408-b262a15f
#> [ import     ]  other:20210917-170408-de8cb4a7
#> [ import     ]  other:20210917-170409-0862e40f
orderly::orderly_list_archive(root = orderly_root)
#>      name                       id
#> 1 minimal 20210917-170405-74c5ef39
#> 2   other 20210917-170408-b262a15f
#> 3   other 20210917-170408-de8cb4a7
#> 4   other 20210917-170409-0862e40f