Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a Priority Controlled Binding scheduler #423

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions parsec/mca/sched/pcb/sched_pcb.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) 2022 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/

/**
* @file
*
* Priority Controlled Binding scheduler
*
* This scheduler uses some bits of the priority word attached to each
* task to define which set of threads can execute the task.
*
* The bits that are used in the priority are defined using the MCA
* parameter sched_pcb_priority_mask, which should contain enough
* consecutive bits to express a number between 0 and N (inclusive) where
* N is the number of 'thread groups'.
*
* Each computing thread of a given process belongs to a thread group.
* If PaRSEC is compiled with HWLOC, which threads belong to which group
* are defined using the HWLOC tree hierarchy and the MCA parameter
* sched_pcb_sharing_level: a sched_pcb_sharing_level of L means that all
* threads bound to a core that is under the same node of depth L in the HWLOC
* tree belong to the same group. Setting sched_pcb_sharing_level to 0 means
* that all threads are in the same group (they are under the root of the tree),
* and setting it to parsec_hwloc_nb_levels()-1 means that each thread is
* in its own group, by itself. Intermediate values have different results
* depending on the machine hierarchy.
*
* If PaRSEC is compiled without HWLOC, the MCA parameter is not exposed,
* and there is a single behavior: each thread belongs to its own group,
* by itself.
*
* There is a 'special' group: the group 0 (other groups are named 1 to N).
* Tasks that are bound to that group are in fact shared between all threads
* (as is usual for other schedulers).
*
* So, tasks with a priority 0 are always scheduled opportunistically on
* any thread. Because startup tasks also initialize their priority to -1,
* tasks with priority -1 are also handled specially by allowing any thread
* to execute them (they are assigned the group 0 despite their priority).
*
* Last, the scheduler uses the priority value to order all the tasks that
* are bound to a given group.
*
* At task selection time, a thread compares the priority of the highest
* priority task of the group 0 and the highest priority task of its own
* group, and selects the task with the highest priority.
*
* Access to the task lists are protected with locks.
*/

#ifndef MCA_SCHED_PCB_H
#define MCA_SCHED_PCB_H

#include "parsec/parsec_config.h"
#include "parsec/mca/mca.h"
#include "parsec/mca/sched/sched.h"


BEGIN_C_DECLS

/**
* Globally exported variable
*/
PARSEC_DECLSPEC extern const parsec_sched_base_component_t parsec_sched_pcb_component;
PARSEC_DECLSPEC extern const parsec_sched_module_t parsec_sched_pcb_module;
/* static accessor */
mca_base_component_t *sched_pcb_static_component(void);
extern int sched_pcb_sharing_level;
extern int sched_pcb_group_mask;
extern int sched_pcb_group_shift;

END_C_DECLS
#endif /* MCA_SCHED_PCB_H */
119 changes: 119 additions & 0 deletions parsec/mca/sched/pcb/sched_pcb_component.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (c) 2022 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/

#include "parsec/parsec_config.h"
#include "parsec/runtime.h"

#include "parsec/mca/sched/sched.h"
#include "parsec/mca/sched/pcb/sched_pcb.h"
#include "parsec/papi_sde.h"
#include "parsec/utils/debug.h"
#include "parsec/utils/mca_param.h"

#if defined(PARSEC_HAVE_HWLOC)
#include "parsec/parsec_hwloc.h"
#endif

/*
* Local function
*/
static int sched_pcb_component_query(mca_base_module_t **module, int *priority);
static int sched_pcb_component_register(void);

int sched_pcb_sharing_level = 1;
int sched_pcb_group_mask = 0x7f000000;
int sched_pcb_group_shift = 24;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
const parsec_sched_base_component_t parsec_sched_pcb_component = {

/* First, the mca_component_t struct containing meta information
about the component itself */

{
PARSEC_SCHED_BASE_VERSION_2_0_0,

/* Component name and version */
"pcb",
"", /* options */
PARSEC_VERSION_MAJOR,
PARSEC_VERSION_MINOR,

/* Component open and close functions */
NULL, /*< No open: sched_pcb is always available, no need to check at runtime */
NULL, /*< No close: open did not allocate any resource, no need to release them */
sched_pcb_component_query,
/*< specific query to return the module and add it to the list of available modules */
sched_pcb_component_register, /*< Register at least the SDE events */
"", /*< no reserve */
},
{
/* The component has no metada */
MCA_BASE_METADATA_PARAM_NONE,
"", /*< no reserve */
}
};
mca_base_component_t *sched_pcb_static_component(void)
{
return (mca_base_component_t *)&parsec_sched_pcb_component;
}

static int sched_pcb_component_query(mca_base_module_t **module, int *priority)
{
/* module type should be: const mca_base_module_t ** */
void *ptr = (void*)&parsec_sched_pcb_module;
*priority = 2;
*module = (mca_base_module_t *)ptr;
return MCA_SUCCESS;
}

static int sched_pcb_component_register(void)
{
PARSEC_PAPI_SDE_DESCRIBE_COUNTER("SCHEDULER::PENDING_TASKS::SCHED=PCB",
"the number of pending tasks for the PCB scheduler");
PARSEC_PAPI_SDE_DESCRIBE_COUNTER("SCHEDULER::PENDING_TASKS::QUEUE=<VPID>::SCHED=PCB",
"the number of pending tasks that end up in the virtual process <VPID> for the LFQ scheduler");
sched_pcb_sharing_level = 1;
#if defined(PARSEC_HAVE_HWLOC)
sched_pcb_sharing_level = parsec_hwloc_nb_levels()-1;
parsec_mca_param_reg_int_name("sched_pcb", "sharing_level",
"Defines at what level threads share the same task list for the Priority Controlled Binding scheduler. "
"Level 1 means each thread has its own task list, level 2 looks one level above in the HWLOC hierarchy, etc...",
false, false, parsec_hwloc_nb_levels()-1, &sched_pcb_sharing_level);
if(sched_pcb_sharing_level <= 0)
sched_pcb_sharing_level = 1;
if(sched_pcb_sharing_level >= parsec_hwloc_nb_levels())
sched_pcb_sharing_level = parsec_hwloc_nb_levels()-1;
#endif
parsec_mca_param_reg_int_name("sched_pcb", "group_mask",
"Defines what bits of the priority are used to designate a process group. Other bits are of the priority value "
"are used to define the priority of the task within that group.",
false, false, 0x7f000000, &sched_pcb_group_mask);
if(sched_pcb_group_mask != 0x7f000000) {
sched_pcb_group_shift = 0;
while( (unsigned int)sched_pcb_group_shift < 8*sizeof(int) &&
(((sched_pcb_group_mask >> sched_pcb_group_shift) & 1) == 0) )
sched_pcb_group_shift++;
if(sched_pcb_group_shift == 8*sizeof(int)) {
parsec_warning("Priority Controlled Binding Scheduler (sched_pcb): sched_pcb_group_mask is set to 0. Scheduler might not work as intended.");
}
}

return MCA_SUCCESS;
}
Loading