Skip to content

Commit

Permalink
net/mlx5: Lag, add debugfs to query hardware lag state
Browse files Browse the repository at this point in the history
Lag state has become very complicated with many modes, flags, types and
port selections methods and future work will add additional features.

Add a debugfs to query the current lag state. A new directory named "lag"
will be created under the mlx5 debugfs directory. As the driver has
debugfs per pci function the location will be: <debugfs>/mlx5/<BDF>/lag

For example:
/sys/kernel/debug/mlx5/0000:08:00.0/lag

The following files are exposed:

- state: Returns "active" or "disabled". If "active" it means hardware
         lag is active.

- members: Returns the BDFs of all the members of lag object.

- type: Returns the type of the lag currently configured. Valid only
	if hardware lag is active.
	* "roce" - Members are bare metal PFs.
	* "switchdev" - Members are in switchdev mode.
	* "multipath" - ECMP offloads.

- port_sel_mode: Returns the egress port selection method, valid
		 only if hardware lag is active.
		 * "queue_affinity" - Egress port is selected by
		   the QP/SQ affinity.
		 * "hash" - Egress port is selected by hash done on
		   each packet. Controlled by: xmit_hash_policy of the
		   bond device.
- flags: Returns flags that are specific per lag @type. Valid only if
	 hardware lag is active.
	 * "shared_fdb" - "on" or "off", if "on" single FDB is used.

- mapping: Returns the mapping which is used to select egress port.
	   Valid only if hardware lag is active.
	   If @port_sel_mode is "hash" returns the active egress ports.
	   The hash result will select only active ports.
	   if @port_sel_mode is "queue_affinity" returns the mapping
	   between the configured port affinity of the QP/SQ and actual
	   egress port. For example:
	   * 1:1 - Mapping means if the configured affinity is port 1
	           traffic will egress via port 1.
	   * 1:2 - Mapping means if the configured affinity is port 1
		   traffic will egress via port 2. This can happen
		   if port 1 is down or in active/backup mode and port 1
		   is backup.

Signed-off-by: Mark Bloch <[email protected]>
Signed-off-by: Saeed Mahameed <[email protected]>
  • Loading branch information
mark-bloch authored and Saeed Mahameed committed May 10, 2022
1 parent 352899f commit 7f46a0b
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 4 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o lib/tout.o
Expand Down
173 changes: 173 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */

#include "lag.h"

static char *get_str_mode_type(struct mlx5_lag *ldev)
{
if (ldev->flags & MLX5_LAG_FLAG_ROCE)
return "roce";
if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
return "switchdev";
if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
return "multipath";

return NULL;
}

static int type_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
char *mode = NULL;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_mode_type(ldev);
mutex_unlock(&ldev->lock);
if (!mode)
return -EINVAL;
seq_printf(file, "%s\n", mode);

return 0;
}

static int port_sel_mode_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
int ret = 0;
char *mode;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_port_sel_mode(ldev->flags);
else
ret = -EINVAL;
mutex_unlock(&ldev->lock);
if (ret || !mode)
return ret;

seq_printf(file, "%s\n", mode);
return 0;
}

static int state_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
bool active;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
active = __mlx5_lag_is_active(ldev);
mutex_unlock(&ldev->lock);
seq_printf(file, "%s\n", active ? "active" : "disabled");
return 0;
}

static int flags_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
bool shared_fdb;
bool lag_active;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active)
shared_fdb = ldev->shared_fdb;

mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;

seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
return 0;
}

static int mapping_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
u8 ports[MLX5_MAX_PORTS] = {};
struct mlx5_lag *ldev;
bool hash = false;
bool lag_active;
int num_ports;
int i;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active) {
if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
&num_ports);
hash = true;
} else {
for (i = 0; i < ldev->ports; i++)
ports[i] = ldev->v2p_map[i];
num_ports = ldev->ports;
}
}
mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;

for (i = 0; i < num_ports; i++) {
if (hash)
seq_printf(file, "%d\n", ports[i] + 1);
else
seq_printf(file, "%d:%d\n", i + 1, ports[i]);
}

return 0;
}

static int members_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
int i;

ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
}
mutex_unlock(&ldev->lock);

return 0;
}

DEFINE_SHOW_ATTRIBUTE(type);
DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
DEFINE_SHOW_ATTRIBUTE(state);
DEFINE_SHOW_ATTRIBUTE(flags);
DEFINE_SHOW_ATTRIBUTE(mapping);
DEFINE_SHOW_ATTRIBUTE(members);

void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
{
struct dentry *dbg;

dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
dev->priv.dbg.lag_debugfs = dbg;

debugfs_create_file("type", 0444, dbg, dev, &type_fops);
debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
debugfs_create_file("state", 0444, dbg, dev, &state_fops);
debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
debugfs_create_file("members", 0444, dbg, dev, &members_fops);
}

void mlx5_ldev_remove_debugfs(struct dentry *dbg)
{
debugfs_remove_recursive(dbg);
}
11 changes: 8 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
}
}

static void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled)
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled)
{
int i;

Expand Down Expand Up @@ -454,7 +454,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
}

static char *get_str_port_sel_mode(u8 flags)
char *get_str_port_sel_mode(u8 flags)
{
if (flags & MLX5_LAG_FLAG_HASH_BASED)
return "hash";
Expand Down Expand Up @@ -1106,6 +1106,10 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev)
return;

/* mdev is being removed, might as well remove debugfs
* as early as possible.
*/
mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck:
mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) {
Expand Down Expand Up @@ -1137,6 +1141,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
msleep(100);
goto recheck;
}
mlx5_ldev_add_debugfs(dev);
}

void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
Expand Down
9 changes: 9 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__

#include <linux/debugfs.h>

#define MLX5_LAG_MAX_HASH_BUCKETS 16
#include "mlx5_core.h"
#include "mp.h"
Expand Down Expand Up @@ -90,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev);

char *get_str_port_sel_mode(u8 flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled);

void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
void mlx5_ldev_remove_debugfs(struct dentry *dbg);

#endif /* __MLX5_LAG_H__ */
1 change: 1 addition & 0 deletions include/linux/mlx5/driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ struct mlx5_debugfs_entries {
struct dentry *cq_debugfs;
struct dentry *cmdif_debugfs;
struct dentry *pages_debugfs;
struct dentry *lag_debugfs;
};

struct mlx5_ft_pool;
Expand Down

0 comments on commit 7f46a0b

Please sign in to comment.