FOSSology  4.7.1
Open Source License Compliance by Open Source Software
scheduler.c File Reference

Scheduler operations. More...

#include <libfossrepo.h>
#include <agent.h>
#include <database.h>
#include <event.h>
#include <host.h>
#include <interface.h>
#include <scheduler.h>
#include <fossconfig.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <pwd.h>
#include <grp.h>
#include <glib.h>
#include <gio/gio.h>
Include dependency graph for scheduler.c:

Go to the source code of this file.

Classes

struct  stale_ctx
 Context for collect_stale_jobs traversal: carries current time so each node can apply a per-job grace period without an extra pass. More...
 
struct  version_refresh_ctx
 Context structure for the version-refresh tree traversal. More...
 

Macros

#define TEST_ERROR(error, ...)
 
#define SELECT_DECLS(type, name, l_op, w_op, val)   type CONF_##name = val;
 
#define MASK_SIGCHLD   (1 << 0)
 
#define MASK_SIGALRM   (1 << 1)
 
#define MASK_SIGTERM   (1 << 2)
 
#define MASK_SIGQUIT   (1 << 3)
 
#define MASK_SIGHUP   (1 << 4)
 
#define GU_HEADER   "DIRECTORIES"
 
#define GU_GROUP   "PROJECTGROUP"
 
#define GU_USER   "PROJECTUSER"
 
#define SELECT_CONF_INIT(type, name, l_op, w_op, val)
 

Functions

void scheduler_sig_handle (int signo)
 Handles any signals sent to the scheduler that are not SIGCHLD. More...
 
void scheduler_signal (scheduler_t *scheduler)
 Function that handles certain signals being delivered to the scheduler. More...
 
scheduler_tscheduler_init (gchar *sysconfigdir, log_t *log)
 Create a new scheduler object. More...
 
void scheduler_destroy (scheduler_t *scheduler)
 Free any memory associated with a scheduler_t. More...
 
static gboolean isMaxLimitReached (meta_agent_t *agent)
 Check if the current agent's max limit is respected. More...
 
static gboolean collect_stale_jobs (gpointer key, gpointer val, gpointer data)
 GTraverseFunc: collect CHECKEDOUT jobs with no agents that have been waiting longer than CONF_agent_update_interval seconds. More...
 
static void reap_stale_jobs (scheduler_t *scheduler)
 Reap CHECKEDOUT jobs that never had an agent spawned. More...
 
void scheduler_update (scheduler_t *scheduler)
 Update function called after every event. More...
 
void set_usr_grp (gchar *process_name, fo_conf *config)
 
int kill_scheduler (int force)
 Kills all other running scheduler. More...
 
void scheduler_clear_config (scheduler_t *scheduler)
 Clears any information that is loaded when loading the configuration. More...
 
static gboolean g_tree_collect (gpointer key, gpointer value, gpointer data)
 GTraverseFunc used by g_tree_clear to collect all the keys in a tree. More...
 
void g_tree_clear (GTree *tree)
 Clears the contents of a GTree. More...
 
void scheduler_agent_config (scheduler_t *scheduler)
 Loads a particular agents configuration file. More...
 
void scheduler_foss_config (scheduler_t *scheduler)
 Loads the configuration data from fossology.conf. More...
 
int scheduler_daemonize (scheduler_t *scheduler)
 Daemonizes the scheduler. More...
 
void scheduler_config_event (scheduler_t *scheduler, void *unused)
 Load both the fossology configuration and all the agent configurations. More...
 
void scheduler_close_event (scheduler_t *scheduler, void *killed)
 Sets the closing flag and possibly kills all currently running agents. More...
 
void scheduler_test_agents (scheduler_t *scheduler, void *unused)
 Event used when the scheduler tests the agents. More...
 
static gboolean version_refresh_kill_agent (int *pid_ptr, agent_t *agent, version_refresh_ctx *ctx)
 GTraverseFunc: respawn a not-yet-working agent on the new binary. More...
 
void scheduler_version_refresh (scheduler_t *scheduler, void *unused)
 Event run when the scheduler's own version changed. More...
 
gint string_is_num (gchar *str)
 Checks if a string is entirely composed of numeric characters. More...
 
gint string_compare (gconstpointer a, gconstpointer b, gpointer user_data)
 
gint int_compare (gconstpointer a, gconstpointer b, gpointer user_data)
 

Variables

int verbose = 0
 The verbose level. More...
 
int closing = 0
 Set if scheduler is shutting down.
 
GThread * main_thread
 Pointer to the main thread.
 
int sigmask = 0
 

Detailed Description

Scheduler operations.

Definition in file scheduler.c.

Macro Definition Documentation

◆ SELECT_CONF_INIT

#define SELECT_CONF_INIT (   type,
  name,
  l_op,
  w_op,
  val 
)
Value:
if(fo_config_has_key(scheduler->sysconfig, "SCHEDULER", #name)) \
CONF_##name = l_op(fo_config_get(scheduler->sysconfig, "SCHEDULER", #name, NULL)); \
V_SPECIAL("CONFIG: %s == " MK_STRING_LIT(w_op) "\n", #name, CONF_##name );
char * fo_config_get(fo_conf *conf, const char *group, const char *key, GError **error)
Gets an element based on its group name and key name. If the group or key is not found,...
Definition: fossconfig.c:336
int fo_config_has_key(fo_conf *conf, char *group, char *key)
Checks if the a specific group in the currently parsed configuration file has a specific key.
Definition: fossconfig.c:668
#define MK_STRING_LIT(passed)
Definition: scheduler.h:261

◆ TEST_ERROR

#define TEST_ERROR (   error,
  ... 
)
Value:
if(error) \
{ \
log_printf("ERROR %s.%d: %s\n", \
__FILE__, __LINE__, error->message); \
log_printf("ERROR %s.%d: ", __FILE__, __LINE__); \
log_printf(__VA_ARGS__); \
log_printf("\n"); \
g_clear_error(&error); \
continue; \
}

Test if error is not NULL then print it to the log.

Definition at line 44 of file scheduler.c.

Function Documentation

◆ collect_stale_jobs()

static gboolean collect_stale_jobs ( gpointer  key,
gpointer  val,
gpointer  data 
)
static

GTraverseFunc: collect CHECKEDOUT jobs with no agents that have been waiting longer than CONF_agent_update_interval seconds.

The grace period avoids flagging a job that was just loaded from the DB and is still waiting for the scheduling loop to start its agent.

Parameters
keyunused
valjob_t*
datastale_ctx*
Returns
FALSE to continue traversal

Definition at line 444 of file scheduler.c.

◆ g_tree_clear()

void g_tree_clear ( GTree *  tree)

Clears the contents of a GTree.

Parameters
treethe tree to remove all elements from

Definition at line 899 of file scheduler.c.

◆ g_tree_collect()

static gboolean g_tree_collect ( gpointer  key,
gpointer  value,
gpointer  data 
)
static

GTraverseFunc used by g_tree_clear to collect all the keys in a tree.

Parameters
keyThe current key
valueThe value mapped to the current key
dataA GList** that the key will be appended to
Returns
Always returns 0

Definition at line 885 of file scheduler.c.

◆ int_compare()

gint int_compare ( gconstpointer  a,
gconstpointer  b,
gpointer  user_data 
)

Utility function that enable the agents to be stored in a GTree using the PID of the associated process.

Parameters
aThe pid of the first process
bThe pid of the second process
user_dataunused in this function
Returns
integral value idicating the relationship between the two pids

Definition at line 1361 of file scheduler.c.

◆ isMaxLimitReached()

static gboolean isMaxLimitReached ( meta_agent_t agent)
static

Check if the current agent's max limit is respected.

Compare the number of running agents and run limit of the agent.

Parameters
agentAgent which has to be scheduled.
Returns
True if the agent can be scheduled (no. of running agents < max run limit of the agent), false otherwise.

Definition at line 409 of file scheduler.c.

◆ kill_scheduler()

int kill_scheduler ( int  force)

Kills all other running scheduler.

Parameters
forceif the scheduler should shutdown gracefully
Returns
0 for success (i.e. a scheduler was killed), -1 for failure.

This uses the /proc file system to find all processes that have fo_scheduler in the name and sends a kill signal to them.

Definition at line 795 of file scheduler.c.

◆ reap_stale_jobs()

static void reap_stale_jobs ( scheduler_t scheduler)
static

Reap CHECKEDOUT jobs that never had an agent spawned.

Skipped while shutting down: at close, CHECKEDOUT jobs with no agents are still valid in-flight work and should be left in the DB for the next start, not failed here.

Parameters
schedulerthe scheduler

Definition at line 482 of file scheduler.c.

◆ scheduler_agent_config()

void scheduler_agent_config ( scheduler_t scheduler)

Loads a particular agents configuration file.

This loads and saves the results as a new meta_agent. This assumes that the configuration file for the agent includes the following key/value pairs:

  1. command: The command that will be used to start the agent
  2. max: The maximum number of this agent that can run at once
  3. special: Anything that is special about the agent

Definition at line 921 of file scheduler.c.

◆ scheduler_clear_config()

void scheduler_clear_config ( scheduler_t scheduler)

Clears any information that is loaded when loading the configuration.

Parameters
schedulerthe scheduler to reset the information on

Definition at line 846 of file scheduler.c.

◆ scheduler_close_event()

void scheduler_close_event ( scheduler_t scheduler,
void *  killed 
)

Sets the closing flag and possibly kills all currently running agents.

This function will cause the scheduler to slowly shutdown. If killed is true this is a quick, ungraceful shutdown.

Parameters
schedulerthe scheduler
killedshould the scheduler kill all currently executing agents before exiting the event loop, or should it wait for them to finished first.

Definition at line 1220 of file scheduler.c.

◆ scheduler_config_event()

void scheduler_config_event ( scheduler_t scheduler,
void *  unused 
)

Load both the fossology configuration and all the agent configurations.

Parameters
schedulerthe scheduler to load the configuration for
unusedthis can be called as an event

Definition at line 1197 of file scheduler.c.

◆ scheduler_daemonize()

int scheduler_daemonize ( scheduler_t scheduler)

Daemonizes the scheduler.

This will make sure that the pid that is maintained in the scheduler struct is correct during the daemonizing process.

Parameters
schedulerthe scheduler_t struct
Returns
if the daemonizing was successful.

Definition at line 1179 of file scheduler.c.

◆ scheduler_destroy()

void scheduler_destroy ( scheduler_t scheduler)

Free any memory associated with a scheduler_t.

This will stop the interface if it is currently running, and free all the memory associated with the different regular expression and similar structures.

Parameters
scheduler
Todo:

Interface close

Repo close

Definition at line 364 of file scheduler.c.

◆ scheduler_foss_config()

void scheduler_foss_config ( scheduler_t scheduler)

Loads the configuration data from fossology.conf.

This assumes that fossology.conf contains the following key/value pairs:

  1. port: the port that the scheduler will listen on
  2. LOG_DIR: the directory that the log should be in

There should be a group named HOSTS with all of the hosts listed as key/value pairs under this category. For each of these hosts, the scheduler will create a new host as an internal representation.

Definition at line 1031 of file scheduler.c.

◆ scheduler_init()

scheduler_t* scheduler_init ( gchar *  sysconfigdir,
log_t log 
)

Create a new scheduler object.

This will initialize everything to a point where it can be used. All regular expressions, GTree's and the job_queue will be correctly created.

Parameters
sysconfigdirDirectory containing the fossology.conf
logLog file to log messages to
Returns
A new scheduler_t* that can be further populated

Definition at line 249 of file scheduler.c.

◆ scheduler_sig_handle()

void scheduler_sig_handle ( int  signo)

Handles any signals sent to the scheduler that are not SIGCHLD.

Currently Handles:

Signal Effect
SIGCHLD Scheduler will handle to death of the child process or agent
SIGALRM Scheduler will run agent updates and database updates
SIGTERM Scheduler will gracefully shut down
SIGQUIT Scheduler will forcefully shut down
SIGHIP Scheduler will reload configuration data
Parameters
signothe number of the signal that was sent

Definition at line 93 of file scheduler.c.

◆ scheduler_signal()

void scheduler_signal ( scheduler_t scheduler)

Function that handles certain signals being delivered to the scheduler.

This function is called every time the event loop attempts to take something from the event queue. It will also get called once a second regardless of if a new event has been queued.

This function checks the sigmask variable to check what signals have been received since the last time it was called. The sigmask variable should always be accessed atomically since it is accessed by the event loop thread as well as the signal handlers.

Parameters
schedulerScheduler to sent signal to

Definition at line 142 of file scheduler.c.

◆ scheduler_test_agents()

void scheduler_test_agents ( scheduler_t scheduler,
void *  unused 
)

Event used when the scheduler tests the agents.

Parameters
schedulerthe scheduler struct
unused

Definition at line 1235 of file scheduler.c.

◆ scheduler_update()

void scheduler_update ( scheduler_t scheduler)

Update function called after every event.

The heart of the scheduler, the actual scheduling algorithm. This will be passed to the event loop as a call back and will be called every time an event is executed. Therefore the code should be light weight since it will be run very frequently.

Todo:
Currently this will only grab a job and create a single agent to execute the job.
Todo:

Allow for runonpfile jobs to have multiple agents based on size

Allow for job preemption. The scheduler can pause jobs, allow it

Allow for specific hosts to be chosen.

Definition at line 564 of file scheduler.c.

◆ scheduler_version_refresh()

void scheduler_version_refresh ( scheduler_t scheduler,
void *  unused 
)

Event run when the scheduler's own version changed.

Kills the agents that can be respawned so they pick up the new binary on the next scheduler_update(). Triggered by scheduler_foss_config() when COMMIT_HASH changes between two config loads (e.g. SIGHUP after a rebuild).

Parameters
schedulerthe scheduler
unusedignored (required by event signature)

Definition at line 1306 of file scheduler.c.

◆ set_usr_grp()

void set_usr_grp ( gchar *  process_name,
fo_conf config 
)

Correctly set the project user and group. The fossology scheduler must run as the user specified by PROJECT_USER and PROJECT_GROUP since the agents must be able to connect to the database. This ensures that that happens correctly.

Parameters
process_name
config

Definition at line 732 of file scheduler.c.

◆ string_compare()

gint string_compare ( gconstpointer  a,
gconstpointer  b,
gpointer  user_data 
)

Utility function that enables the use of the strcmp function with a GTree.

Parameters
aThe first string
bThe second string
user_dataunused in this function
Returns
Integral value indicating the relationship between the two strings

Definition at line 1347 of file scheduler.c.

◆ string_is_num()

gint string_is_num ( gchar *  str)

Checks if a string is entirely composed of numeric characters.

Parameters
strthe string to test
Returns
TRUE if the string is entirely numeric, FALSE otherwise

Definition at line 1328 of file scheduler.c.

◆ version_refresh_kill_agent()

static gboolean version_refresh_kill_agent ( int *  pid_ptr,
agent_t agent,
version_refresh_ctx ctx 
)
static

GTraverseFunc: respawn a not-yet-working agent on the new binary.

Resets every meta agent's cached version so the first respawned agent of each type sets the new one.

Only AG_SPAWNED agents are killed (job still JB_CHECKEDOUT, no data sent): the death event re-queues those for a fresh dispatch. AG_RUNNING agents are left to finish on the binary they started with, otherwise their half-done job would be marked complete.

Parameters
pid_ptrKey in the agents GTree (pid)
agentThe running agent
ctxversion_refresh_ctx*
Returns
0 to continue traversal

Definition at line 1270 of file scheduler.c.

Variable Documentation

◆ verbose

int verbose = 0

The verbose level.

The verbose flag for the cli.

Definition at line 57 of file scheduler.c.