Skip to content

Commit 1ccfa40

Browse files
authored
Logging error reason of failures in qrmi_resource_new(), qrmi_resource_is_accessible() etc. (#149)
* Logging resource_new() error reason * add QRMI_PLUGIN_ERROR envvar * keep_if_exists * remove func name from log message * call qrmi_get_last_error() * use actual qrmi_config.json path
1 parent 48fc674 commit 1ccfa40

File tree

1 file changed

+45
-30
lines changed

1 file changed

+45
-30
lines changed

plugins/spank_qrmi/spank_qrmi.c

Lines changed: 45 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static List g_acquired_resources = NULL;
5151
static qpu_resource_t *_acquired_resource_create(char *name, QrmiResourceType type,
5252
const char *token);
5353
static void acquired_resource_destroy(void *object);
54-
static qpu_resource_t *_acquire_qpu(char *name, QrmiResourceType type);
54+
static qpu_resource_t *_acquire_qpu(spank_t spank_ctxt, char *name, QrmiResourceType type);
5555
static void _release_qpu(qpu_resource_t *res);
5656

5757
/*
@@ -207,8 +207,8 @@ int slurm_spank_init_post_opt(spank_t spank_ctxt, int argc, char **argv) {
207207
while ((token = strtok_r(rest, ",", &rest))) {
208208
QrmiResourceDef *res = qrmi_config_resource_def_get(cnf, token);
209209
if (res != NULL) {
210-
slurm_debug("%s: name(%s), type(%d) found in qrmi_config",
211-
plugin_name, res->name, res->type);
210+
slurm_debug("%s: name(%s), type(%d) found in %s",
211+
plugin_name, res->name, res->type, argv[0]);
212212
/*
213213
* If user specifies access details in environment variables,
214214
* these are available as job environment variables. Reads through
@@ -261,7 +261,7 @@ int slurm_spank_init_post_opt(spank_t spank_ctxt, int argc, char **argv) {
261261
/*
262262
* Acquire QPU resource.
263263
*/
264-
qpu_resource_t *acquired = _acquire_qpu(res->name, res->type);
264+
qpu_resource_t *acquired = _acquire_qpu(spank_ctxt, res->name, res->type);
265265
if (acquired != NULL) {
266266
slurm_list_append(g_acquired_resources, acquired);
267267
qrmi_buf_envvarname_for_res_create(&keybuf, res->name,
@@ -278,7 +278,7 @@ int slurm_spank_init_post_opt(spank_t spank_ctxt, int argc, char **argv) {
278278
}
279279
qrmi_config_resource_def_free(res);
280280
} else {
281-
slurm_debug("resource %s not found.", token);
281+
slurm_error("resource %s not found in %s", token, argv[0]);
282282
}
283283
}
284284
free(bufp);
@@ -544,35 +544,42 @@ static void acquired_resource_destroy(void *object) {
544544
* Acquire QPU resource specified by `name` and `type`. Returns
545545
* qpu_resource_t object if succeeded.
546546
*/
547-
static qpu_resource_t *_acquire_qpu(char *name, QrmiResourceType type) {
548-
qpu_resource_t *record = NULL;
547+
static qpu_resource_t *_acquire_qpu(spank_t spank_ctxt, char *name, QrmiResourceType type) {
549548
char *acquisition_token = NULL;
550549
bool is_accessible = false;
551550
QrmiReturnCode rc;
551+
const char* last_error = NULL;
552552

553553
void *qrmi = qrmi_resource_new(name, type);
554-
if (qrmi != NULL) {
555-
slurm_debug("%s, qrmi: %p", plugin_name, qrmi);
556-
rc = qrmi_resource_is_accessible(qrmi, &is_accessible);
557-
if ((rc != QRMI_RETURN_CODE_SUCCESS) || (is_accessible == false)) {
558-
slurm_error("%s, %s is not accessible", plugin_name, name);
559-
//functbl->free(qrmi);
560-
qrmi_resource_free(qrmi);
561-
return NULL;
562-
}
563-
rc = qrmi_resource_acquire(qrmi, &acquisition_token);
564-
if ((rc == QRMI_RETURN_CODE_SUCCESS) && (acquisition_token != NULL)) {
565-
slurm_debug("%s, acquisition_token: %s", plugin_name,
566-
acquisition_token);
567-
record = _acquired_resource_create(name, type, acquisition_token);
568-
}
554+
if (qrmi == NULL) {
555+
last_error = qrmi_get_last_error();
556+
slurm_error("%s, %s", plugin_name, last_error);
557+
spank_setenv(spank_ctxt, "QRMI_PLUGIN_ERROR", last_error, KEEP_IF_EXISTS);
558+
qrmi_string_free((char*)last_error);
559+
return NULL;
560+
}
561+
562+
slurm_debug("%s, qrmi: %p", plugin_name, qrmi);
563+
rc = qrmi_resource_is_accessible(qrmi, &is_accessible);
564+
if ((rc != QRMI_RETURN_CODE_SUCCESS) || (is_accessible == false)) {
565+
last_error = qrmi_get_last_error();
566+
slurm_error("%s, %s is not accessible. %s", plugin_name, name, last_error);
567+
qrmi_string_free((char*)last_error);
569568
qrmi_resource_free(qrmi);
570-
} else {
571-
slurm_error("%s/%s: Unsupported resource type: %d", plugin_name,
572-
__func__, type);
569+
return NULL;
570+
}
571+
rc = qrmi_resource_acquire(qrmi, &acquisition_token);
572+
qrmi_resource_free(qrmi);
573+
if ((rc != QRMI_RETURN_CODE_SUCCESS) || (acquisition_token == NULL)) {
574+
last_error = qrmi_get_last_error();
575+
slurm_error("%s, resource acquisition failed: %s. %s", plugin_name, name, last_error);
576+
spank_setenv(spank_ctxt, "QRMI_PLUGIN_ERROR", last_error, KEEP_IF_EXISTS);
577+
qrmi_string_free((char*)last_error);
573578
}
574579

575-
return record;
580+
slurm_debug("%s, acquisition_token: %s", plugin_name,
581+
acquisition_token);
582+
return _acquired_resource_create(name, type, acquisition_token);
576583
}
577584

578585
/*
@@ -589,21 +596,29 @@ static void _release_qpu(qpu_resource_t *res) {
589596
slurm_debug("%s: releasing name(%s), type(%d), token(%s)", plugin_name,
590597
res->name, res->type, res->acquisition_token);
591598
void *qrmi = qrmi_resource_new(res->name, res->type);
599+
if (qrmi == NULL) {
600+
const char* last_error = qrmi_get_last_error();
601+
slurm_error("%s, %s", plugin_name, last_error);
602+
qrmi_string_free((char*)last_error);
603+
return;
604+
}
592605
rc = qrmi_resource_release(qrmi, res->acquisition_token);
593606
if (rc != QRMI_RETURN_CODE_SUCCESS) {
594-
slurm_error("%s: Failed to release acquired resource: name(%s), type(%d), token(%s)",
607+
const char* last_error = qrmi_get_last_error();
608+
slurm_error("%s, Failed to release acquired resource: name(%s), type(%d), token(%s), %s",
595609
plugin_name,
596-
res->name, res->type, res->acquisition_token);
610+
res->name, res->type, res->acquisition_token, last_error);
611+
qrmi_string_free((char*)last_error);
597612
}
598613
rc = qrmi_string_free(res->acquisition_token);
599614
if (rc != QRMI_RETURN_CODE_SUCCESS) {
600-
slurm_error("%s: Failed to free acquisition token string: (%s)",
615+
slurm_error("%s, Failed to free acquisition token string: (%s)",
601616
plugin_name,
602617
res->acquisition_token);
603618
}
604619
rc = qrmi_resource_free(qrmi);
605620
if (rc != QRMI_RETURN_CODE_SUCCESS) {
606-
slurm_error("%s: Failed to free QrmiQuantumResource handle: (%p)",
621+
slurm_error("%s, Failed to free QrmiQuantumResource handle: (%p)",
607622
plugin_name,
608623
qrmi);
609624
}

0 commit comments

Comments
 (0)