summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÍñigo Huguet <ihuguet@redhat.com>2024-01-26 17:42:04 +0100
committerÍñigo Huguet <ihuguet@redhat.com>2024-02-21 11:27:29 +0100
commitdd7810c473c9d5c0dff94f0fa76e7843c3a5a21d (patch)
treebe10415f3072f5eb06a645a0ece1c9b4857ca736
parent1ba2b774023c7ca2f8d5094ad6278b9eefa59671 (diff)
platform: destroy VFs before changing the eswitch mode
It is not safe to change the eswitch mode when there are VFs already created: it often fails, or even worse, doesn't fail immediatelly but there are later problems with the VFs. What is supposed to be well tested in all drivers is to change the eswitch mode with no VFs created, and then create the VFs, so let's set num_vfs=0 before changing the eswitch mode. As we want to change num_vfs asynchronously in a separate thread, we need to do a multi-step process with callbacks each time that a step finish (before it was just set num_vfs asynchronously and invoke the callback when it's done). This makes link_set_sriov_params_async to become even larger and more complex than it already was. Refactor it to make it cleaner and easier to follow, and hopefully less error prone, and implement that multi-step process. (cherry picked from commit 770340627ba78496843d731f971562c396fe2d04)
-rw-r--r--src/libnm-platform/nm-linux-platform.c413
1 files changed, 305 insertions, 108 deletions
diff --git a/src/libnm-platform/nm-linux-platform.c b/src/libnm-platform/nm-linux-platform.c
index 28eafc837e..1e92f60309 100644
--- a/src/libnm-platform/nm-linux-platform.c
+++ b/src/libnm-platform/nm-linux-platform.c
@@ -8882,21 +8882,241 @@ nla_put_failure:
g_return_val_if_reached(FALSE);
}
+static gint64
+sriov_read_sysctl_uint(NMPlatform *platform,
+ int dirfd,
+ const char *ifname,
+ const char *dev_file,
+ GError **error)
+{
+ const char *path;
+ gint64 val;
+
+ nm_assert(NM_STRLEN("device/%s") + strlen(dev_file));
+
+ path = nm_sprintf_bufa(256, "device/%s", dev_file);
+ val = nm_platform_sysctl_get_int_checked(platform,
+ NMP_SYSCTL_PATHID_NETDIR_UNSAFE_A(dirfd, ifname, path),
+ 10,
+ 0,
+ G_MAXUINT,
+ -1);
+
+ if (val < 0) {
+ g_set_error(error,
+ NM_UTILS_ERROR,
+ NM_UTILS_ERROR_UNKNOWN,
+ "couldn't read %s: %s",
+ dev_file,
+ nm_strerror_native(errno));
+ return -errno;
+ }
+
+ return val;
+}
+
+static gboolean
+sriov_set_autoprobe(NMPlatform *platform,
+ int dirfd,
+ const char *ifname,
+ NMOptionBool autoprobe,
+ GError **error)
+{
+ int current_autoprobe =
+ (int) sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_drivers_autoprobe", error);
+
+ if (current_autoprobe == -ENOENT) {
+ /* older kernel versions don't have this sysctl. Assume the value is "1". */
+ current_autoprobe = 1;
+ g_clear_error(error);
+ }
+
+ if (current_autoprobe < 0)
+ return FALSE;
+
+ if (autoprobe != NM_OPTION_BOOL_DEFAULT && current_autoprobe != autoprobe) {
+ if (!nm_platform_sysctl_set(
+ platform,
+ NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"),
+ autoprobe == 1 ? "1" : "0")) {
+ g_set_error(error,
+ NM_UTILS_ERROR,
+ NM_UTILS_ERROR_UNKNOWN,
+ "couldn't set SR-IOV drivers-autoprobe to %d: %s",
+ (int) autoprobe,
+ nm_strerror_native(errno));
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+#define _SRIOV_ASYNC_MAX_STEPS 4
+
+typedef struct _SriovAsyncState {
+ NMPlatform *platform;
+ int ifindex;
+ guint num_vfs;
+ _NMSriovEswitchMode eswitch_mode;
+ void (*steps[_SRIOV_ASYNC_MAX_STEPS])(struct _SriovAsyncState *);
+ int current_step;
+ NMPlatformAsyncCallback callback;
+ gpointer data;
+ GCancellable *cancellable;
+} SriovAsyncState;
+
static void
-sriov_idle_cb(gpointer user_data, GCancellable *cancellable)
+sriov_async_invoke_callback(gpointer user_data, GCancellable *cancellable)
{
- gs_unref_object NMPlatform *platform = NULL;
- gs_free_error GError *cancelled_error = NULL;
- gs_free_error GError *error = NULL;
- NMPlatformAsyncCallback callback;
- gpointer callback_data;
+ gs_free_error GError *cancelled_error = NULL;
+ gs_free_error GError *error = NULL;
+ NMPlatformAsyncCallback callback;
+ gpointer callback_data;
g_cancellable_set_error_if_cancelled(cancellable, &cancelled_error);
- nm_utils_user_data_unpack(user_data, &platform, &error, &callback, &callback_data);
+ nm_utils_user_data_unpack(user_data, &error, &callback, &callback_data);
callback(cancelled_error ?: error, callback_data);
}
static void
+sriov_async_finish_err(SriovAsyncState *async_state, GError *error)
+{
+ NMPlatform *platform = async_state->platform;
+
+ _LOGD("finished configuring SR-IOV, error: %s", error ? error->message : "none");
+
+ if (async_state->callback) {
+ /* nm_platform_link_set_sriov_params() promises to always call the callback,
+ * and always asynchronously. We might have reached here without doing
+ * any asynchronous task, so invoke the user's callback in the idle task
+ * to make it asynchronous. Actually, let's make it simple and do it
+ * always in this way, even if asynchronous tasks were made.
+ */
+ gpointer packed = nm_utils_user_data_pack(g_steal_pointer(&error),
+ async_state->callback,
+ async_state->data);
+ nm_utils_invoke_on_idle(async_state->cancellable, sriov_async_invoke_callback, packed);
+ }
+
+ g_object_unref(async_state->platform);
+ g_object_unref(async_state->cancellable);
+ g_free(async_state);
+ g_free(error);
+}
+
+static void
+sriov_async_call_next_step(SriovAsyncState *async_state)
+{
+ if (g_cancellable_is_cancelled(async_state->cancellable)) {
+ sriov_async_finish_err(async_state, NULL); /* The error will be set later */
+ return;
+ }
+
+ async_state->current_step++;
+
+ nm_assert(async_state->current_step >= 0);
+ nm_assert(async_state->current_step < _SRIOV_ASYNC_MAX_STEPS);
+ nm_assert(async_state->steps[async_state->current_step] != NULL);
+
+ async_state->steps[async_state->current_step](async_state);
+}
+
+static void
+sriov_async_sysctl_done_cb(GError *error, gpointer data)
+{
+ SriovAsyncState *async_state = data;
+
+ if (error)
+ sriov_async_finish_err(async_state, g_error_copy(error));
+ else
+ sriov_async_call_next_step(async_state);
+}
+
+static void
+sriov_async_set_num_vfs(SriovAsyncState *async_state, const char *val)
+{
+ NMPlatform *platform = async_state->platform;
+ const char *values[] = {val, NULL};
+ nm_auto_close int dirfd = -1;
+ char ifname[IFNAMSIZ];
+ gs_free_error GError *error = NULL;
+
+ dirfd = nm_platform_sysctl_open_netdir(platform, async_state->ifindex, ifname);
+ if (!dirfd) {
+ g_set_error(&error,
+ NM_UTILS_ERROR,
+ NM_UTILS_ERROR_UNKNOWN,
+ "couldn't open netdir for device with ifindex %d",
+ async_state->ifindex);
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
+ }
+
+ sysctl_set_async(platform,
+ NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"),
+ values,
+ sriov_async_sysctl_done_cb,
+ async_state,
+ async_state->cancellable);
+}
+
+static void
+sriov_async_step1_destroy_vfs(SriovAsyncState *async_state)
+{
+ NMPlatform *platform = async_state->platform;
+
+ _LOGD("destroying VFs before configuring SR-IOV");
+
+ sriov_async_set_num_vfs(async_state, "0");
+}
+
+static void
+sriov_async_step2_set_eswitch_mode(SriovAsyncState *async_state)
+{
+ NMPlatform *platform = async_state->platform;
+ NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform);
+ enum devlink_eswitch_mode eswitch_mode = (enum devlink_eswitch_mode) async_state->eswitch_mode;
+ gs_free NMDevlink *devlink = NULL;
+ gs_free_error GError *error = NULL;
+
+ _LOGD("setting eswitch-mode to '%d'", (int) eswitch_mode);
+
+ /* We set eswitch mode as a sriov_async step because it's in the middle of
+ * other steps that are async. However, this step itself is synchronous. */
+ devlink = nm_devlink_new(platform, priv->sk_genl_sync, async_state->ifindex);
+ if (!nm_devlink_set_eswitch_mode(devlink, eswitch_mode, &error)) {
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
+ }
+
+ sriov_async_call_next_step(async_state);
+}
+
+static void
+sriov_async_step3_create_vfs(SriovAsyncState *async_state)
+{
+ NMPlatform *platform = async_state->platform;
+ const char *val = nm_sprintf_bufa(32, "%u", async_state->num_vfs);
+
+ _LOGD("setting sriov_numvfs to %u", async_state->num_vfs);
+
+ sriov_async_set_num_vfs(async_state, val);
+}
+
+static void
+sriov_async_step_finish_ok(SriovAsyncState *async_state)
+{
+ sriov_async_finish_err(async_state, NULL);
+}
+
+/*
+ * Take special care when setting new values:
+ * - don't touch anything if the right values are already set
+ * - to change the number of VFs, eswitch mode or autoprobe we need to destroy existing VFs
+ * - the autoprobe setting is irrelevant when numvfs is zero
+ */
+static void
link_set_sriov_params_async(NMPlatform *platform,
int ifindex,
guint num_vfs,
@@ -8906,140 +9126,117 @@ link_set_sriov_params_async(NMPlatform *platform,
gpointer data,
GCancellable *cancellable)
{
+ SriovAsyncState *async_state;
NMLinuxPlatformPrivate *priv = NM_LINUX_PLATFORM_GET_PRIVATE(platform);
nm_auto_pop_netns NMPNetns *netns = NULL;
gs_free_error GError *error = NULL;
nm_auto_close int dirfd = -1;
- int current_autoprobe;
- guint i, total;
- gint64 current_num;
char ifname[IFNAMSIZ];
- gpointer packed;
- const char *values[3];
- char buf[64];
- gs_free NMDevlink *devlink = NULL;
- int current_eswitch_mode;
+ int max_vfs;
+ int current_num_vfs;
+ int current_eswitch_mode = _NM_SRIOV_ESWITCH_MODE_PRESERVE;
+ gboolean need_change_eswitch_mode;
+ gboolean need_change_vfs;
+ gboolean need_destroy_vfs;
+ gboolean need_create_vfs;
+ int i;
g_return_if_fail(callback || !data);
g_return_if_fail(cancellable);
+ async_state = g_new0(SriovAsyncState, 1);
+ async_state->platform = g_object_ref(platform);
+ async_state->ifindex = ifindex;
+ async_state->eswitch_mode = eswitch_mode;
+ async_state->current_step = -1;
+ async_state->callback = callback;
+ async_state->data = data;
+ async_state->cancellable = g_object_ref(cancellable);
+
if (!nm_platform_netns_push(platform, &netns)) {
g_set_error_literal(&error,
NM_UTILS_ERROR,
NM_UTILS_ERROR_UNKNOWN,
- "couldn't change namespace");
- goto out_idle;
+ "couldn't change network namespace");
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
}
- devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex);
-
dirfd = nm_platform_sysctl_open_netdir(platform, ifindex, ifname);
if (!dirfd) {
- g_set_error_literal(&error, NM_UTILS_ERROR, NM_UTILS_ERROR_UNKNOWN, "couldn't open netdir");
- goto out_idle;
+ g_set_error(&error,
+ NM_UTILS_ERROR,
+ NM_UTILS_ERROR_UNKNOWN,
+ "couldn't open netdir for device with ifindex %d",
+ ifindex);
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
}
- total = nm_platform_sysctl_get_int_checked(
- platform,
- NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_totalvfs"),
- 10,
- 0,
- G_MAXUINT,
- 0);
- if (!errno && num_vfs > total) {
- _LOGW("link: %d only supports %u VFs (requested %u)", ifindex, total, num_vfs);
- num_vfs = total;
+ current_num_vfs = sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_numvfs", &error);
+ if (current_num_vfs < 0) {
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
}
- /*
- * Take special care when setting new values:
- * - don't touch anything if the right values are already set
- * - to change the number of VFs or autoprobe we need to destroy existing VFs
- * - the autoprobe setting is irrelevant when numvfs is zero
- */
- current_num = nm_platform_sysctl_get_int_checked(
- platform,
- NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"),
- 10,
- 0,
- G_MAXUINT,
- -1);
-
- current_autoprobe = nm_platform_sysctl_get_int_checked(
- platform,
- NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"),
- 10,
- 0,
- 1,
- -1);
- if (current_autoprobe == -1 && errno == ENOENT) {
- /* older kernel versions don't have this sysctl. Assume the value is
- * "1". */
- current_autoprobe = 1;
+ max_vfs = sriov_read_sysctl_uint(platform, dirfd, ifname, "sriov_totalvfs", &error);
+ if (max_vfs < 0) {
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
}
- current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error);
- if (current_eswitch_mode < 0) {
- /* We can proceed if eswith-mode is "preserve", otherwise propagate the error */
- if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE)
- goto out_idle;
-
- _LOGD("%s", error->message);
- g_clear_error(&error);
+ if (num_vfs > max_vfs) {
+ _LOGW("link: device %d only supports %u VFs (requested %u)", ifindex, max_vfs, num_vfs);
+ _LOGW("link: reducing num_vfs to %u for device %d", max_vfs, ifindex);
+ num_vfs = max_vfs;
}
- if (current_num == num_vfs
- && (autoprobe == NM_OPTION_BOOL_DEFAULT || current_autoprobe == autoprobe)
- && (eswitch_mode == _NM_SRIOV_ESWITCH_MODE_PRESERVE
- || current_eswitch_mode == eswitch_mode))
- goto out_idle;
+ async_state->num_vfs = num_vfs;
- if (NM_IN_SET(autoprobe, NM_OPTION_BOOL_TRUE, NM_OPTION_BOOL_FALSE)
- && current_autoprobe != autoprobe
- && !nm_platform_sysctl_set(
- platform,
- NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_drivers_autoprobe"),
- nm_sprintf_buf(buf, "%d", (int) autoprobe))) {
- g_set_error(&error,
- NM_UTILS_ERROR,
- NM_UTILS_ERROR_UNKNOWN,
- "couldn't set SR-IOV drivers-autoprobe to %d: %s",
- (int) autoprobe,
- nm_strerror_native(errno));
- goto out_idle;
+ /* Setting autoprobe goes first, we can do it synchronously */
+ if (num_vfs > 0 && !sriov_set_autoprobe(platform, dirfd, ifname, autoprobe, &error)) {
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
}
- if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode) {
- if (!nm_devlink_set_eswitch_mode(devlink, (enum devlink_eswitch_mode) eswitch_mode, &error))
- goto out_idle;
+ if (eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE) {
+ gs_free NMDevlink *devlink = nm_devlink_new(platform, priv->sk_genl_sync, ifindex);
+
+ current_eswitch_mode = nm_devlink_get_eswitch_mode(devlink, &error);
+ if (current_eswitch_mode < 0) {
+ sriov_async_finish_err(async_state, g_steal_pointer(&error));
+ return;
+ }
}
- if (current_num == 0 && num_vfs == 0)
- goto out_idle;
+ /* Decide what actions we must do. Note that we might need to destroy the VFs even
+ * if num_vfs == current_num_vfs, for example to change the eswitch mode. Because of
+ * that, we might need to create VFs even if num_vfs == current_num_vfs.
+ * Steps in order (unnecessary steps are skipped):
+ * 1. Destroy VFs
+ * 2. Set eswitch mode
+ * 3. Create VFs
+ * 4. Invoke caller's callback
+ */
+ need_change_eswitch_mode =
+ eswitch_mode != _NM_SRIOV_ESWITCH_MODE_PRESERVE && current_eswitch_mode != eswitch_mode;
+ need_change_vfs = num_vfs != current_num_vfs;
+ need_destroy_vfs = current_num_vfs > 0 && (need_change_eswitch_mode || need_change_vfs);
+ need_create_vfs = (current_num_vfs == 0 || need_destroy_vfs) && num_vfs > 0;
i = 0;
- if (current_num != 0)
- values[i++] = "0";
- if (num_vfs != 0)
- values[i++] = nm_sprintf_bufa(32, "%u", num_vfs);
- values[i++] = NULL;
+ if (need_destroy_vfs)
+ async_state->steps[i++] = sriov_async_step1_destroy_vfs;
+ if (need_change_eswitch_mode)
+ async_state->steps[i++] = sriov_async_step2_set_eswitch_mode;
+ if (need_create_vfs)
+ async_state->steps[i++] = sriov_async_step3_create_vfs;
- sysctl_set_async(platform,
- NMP_SYSCTL_PATHID_NETDIR_A(dirfd, ifname, "device/sriov_numvfs"),
- values,
- callback,
- data,
- cancellable);
- return;
-
-out_idle:
- if (callback) {
- packed = nm_utils_user_data_pack(g_object_ref(platform),
- g_steal_pointer(&error),
- callback,
- data);
- nm_utils_invoke_on_idle(cancellable, sriov_idle_cb, packed);
- }
+ nm_assert(i < _SRIOV_ASYNC_MAX_STEPS);
+
+ async_state->steps[i] = sriov_async_step_finish_ok;
+
+ sriov_async_call_next_step(async_state);
}
static gboolean