Skip to content

Commit

Permalink
Merge branch 'mlxsw-Enable-minimum-shaper-on-MC-TCs'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
mlxsw: Enable minimum shaper on MC TCs

Petr says:

An MC-aware mode was introduced in commit 7b81953 ("mlxsw:
spectrum: Configure MC-aware mode on mlxsw ports"). In MC-aware mode,
BUM traffic gets a special treatment by being assigned to a separate set
of traffic classes 8..15. Pairs of TCs 0 and 8, 1 and 9, etc., are then
configured to strictly prioritize the lower-numbered ones. The intention
is to prevent BUM traffic from flooding the switch and push out all UC
traffic, which would otherwise happen, and instead give UC traffic
precedence.

However strictly prioritizing UC traffic has the effect that UC overload
pushes out all BUM traffic, such as legitimate ARP queries. These
packets are kept in queues for a while, but under sustained UC overload,
their lifetime eventually expires and these packets are dropped. That is
detrimental to network performance as well.

In this patchset, MC TCs (8..15) are configured with minimum shaper of
200Mbps (a minimum permitted value) to allow a trickle of necessary
control traffic to get through.

First in patch #1, the QEEC register is extended with fields necessary
to configure the minimum shaper.

In patch #2, minimum shaper is enabled on TCs 8..15.

In patches #3 and #4, first the MC-awareness test is tweaked to support
the minimum shaper, and then a new test is introduced to test that MC
traffic behaves well under UC overload.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Oct 31, 2018
2 parents c4d63c7 + a5ee171 commit e2acddd
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 25 deletions.
22 changes: 21 additions & 1 deletion drivers/net/ethernet/mellanox/mlxsw/reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
* Configures the ETS elements.
*/
#define MLXSW_REG_QEEC_ID 0x400D
#define MLXSW_REG_QEEC_LEN 0x1C
#define MLXSW_REG_QEEC_LEN 0x20

MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);

Expand Down Expand Up @@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);

/* reg_qeec_mise
* Min shaper configuration enable. Enables configuration of the min
* shaper on this ETS element
* 0 - Disable
* 1 - Enable
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);

enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
Expand All @@ -3342,6 +3351,17 @@ enum {
*/
MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);

/* The smallest permitted min shaper rate. */
#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */

/* reg_qeec_min_shaper_rate
* Min shaper information rate.
* For CPU port, can only be configured for port hierarchy.
* When in bytes mode, value is specified in units of 1000bps.
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);

/* reg_qeec_mase
* Max shaper configuration enable. Enables configuration of the max
* shaper on this ETS element.
Expand Down
25 changes: 25 additions & 0 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.c
Original file line number Diff line number Diff line change
Expand Up @@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}

static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 minrate)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qeec_pl[MLXSW_REG_QEEC_LEN];

mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
next_index);
mlxsw_reg_qeec_mise_set(qeec_pl, true);
mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);

return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}

int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
u8 switch_prio, u8 tclass)
{
Expand Down Expand Up @@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}

/* Configure the min shaper for multicast TCs. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
MLXSW_REG_QEEC_MIS_MIN);
if (err)
return err;
}

/* Map all priorities to traffic class 0. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
Expand Down
95 changes: 71 additions & 24 deletions tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,24 @@
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
# multicast traffic uses 8K frames.
#
# +-----------------------+ +----------------------------------+
# | H1 | | H2 |
# | | | unicast --> + $h2.111 |
# | | | traffic | 192.0.2.129/28 |
# | multicast | | | e-qos-map 0:1 |
# | traffic | | | |
# | $h1 + <----- | | + $h2 |
# +-----|-----------------+ +--------------|-------------------+
# | |
# +-----|-------------------------------------------------|-------------------+
# | + $swp1 + $swp2 |
# | | >1Gbps | >1Gbps |
# | +---|----------------+ +----------|----------------+ |
# | | + $swp1.1 | | + $swp2.111 | |
# +---------------------------+ +----------------------------------+
# | H1 | | H2 |
# | | | unicast --> + $h2.111 |
# | multicast | | traffic | 192.0.2.129/28 |
# | traffic | | | e-qos-map 0:1 |
# | $h1 + <----- | | | |
# | 192.0.2.65/28 | | | + $h2 |
# +---------------|-----------+ +--------------|-------------------+
# | |
# +---------------|---------------------------------------|-------------------+
# | $swp1 + + $swp2 |
# | >1Gbps | | >1Gbps |
# | +-------------|------+ +----------|----------------+ |
# | | $swp1.1 + | | + $swp2.111 | |
# | | BR1 | SW | BR111 | |
# | | + $swp3.1 | | + $swp3.111 | |
# | +---|----------------+ +----------|----------------+ |
# | \_________________________________________________/ |
# | | $swp3.1 + | | + $swp3.111 | |
# | +-------------|------+ +----------|----------------+ |
# | \_______________________________________/ |
# | | |
# | + $swp3 |
# | | 1Gbps bottleneck |
Expand All @@ -51,6 +51,7 @@
# |
# +--|-----------------+
# | + $h3 H3 |
# | | 192.0.2.66/28 |
# | | |
# | + $h3.111 |
# | 192.0.2.130/28 |
Expand All @@ -59,6 +60,7 @@
ALL_TESTS="
ping_ipv4
test_mc_aware
test_uc_aware
"

lib_dir=$(dirname $0)/../../../net/forwarding
Expand All @@ -68,14 +70,14 @@ source $lib_dir/lib.sh

h1_create()
{
simple_if_init $h1
simple_if_init $h1 192.0.2.65/28
mtu_set $h1 10000
}

h1_destroy()
{
mtu_restore $h1
simple_if_fini $h1
simple_if_fini $h1 192.0.2.65/28
}

h2_create()
Expand All @@ -97,7 +99,7 @@ h2_destroy()

h3_create()
{
simple_if_init $h3
simple_if_init $h3 192.0.2.66/28
mtu_set $h3 10000

vlan_create $h3 111 v$h3 192.0.2.130/28
Expand All @@ -108,7 +110,7 @@ h3_destroy()
vlan_destroy $h3 111

mtu_restore $h3
simple_if_fini $h3
simple_if_fini $h3 192.0.2.66/28
}

switch_create()
Expand Down Expand Up @@ -251,7 +253,7 @@ measure_uc_rate()
# average ingress rate to somewhat mitigate this.
local min_ingress=2147483648

mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
sleep 1

Expand Down Expand Up @@ -291,7 +293,7 @@ test_mc_aware()
check_err $? "Could not get high enough UC-only ingress rate"
local ucth1=${uc_rate[1]}

mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &

local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
Expand All @@ -311,7 +313,7 @@ test_mc_aware()
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
check_err $(bc <<< "$deg > 10")
check_err $(bc <<< "$deg > 25")

local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
Expand All @@ -335,6 +337,51 @@ test_mc_aware()
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
echo " ingress MC throughput $(humanize $mc_ir)"
echo " egress MC throughput $(humanize $mc_er)"
echo
}

test_uc_aware()
{
RET=0

$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &

local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
sleep 1

local attempts=50
local passes=0
local i

for ((i = 0; i < attempts; ++i)); do
if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
((passes++))
fi

sleep 0.1
done

local d1=$(date +%s)
local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)

local interval=$((d1 - d0))
local uc_ir=$(rate $u0 $u1 $interval)
local uc_er=$(rate $t0 $t1 $interval)

((attempts == passes))
check_err $?

# Suppress noise from killing mausezahn.
{ kill %% && wait; } 2>/dev/null

log_test "MC performace under UC overload"
echo " ingress UC throughput $(humanize ${uc_ir})"
echo " egress UC throughput $(humanize ${uc_er})"
echo " sent $attempts BC ARPs, got $passes responses"
}

trap cleanup EXIT
Expand Down

0 comments on commit e2acddd

Please sign in to comment.