]> git.dujemihanovic.xyz Git - linux.git/commitdiff
net: bridge: switchdev: allow the TX data plane forwarding to be offloaded
authorTobias Waldekranz <tobias@waldekranz.com>
Thu, 22 Jul 2021 15:55:38 +0000 (18:55 +0300)
committerDavid S. Miller <davem@davemloft.net>
Fri, 23 Jul 2021 15:32:37 +0000 (16:32 +0100)
Allow switchdevs to forward frames from the CPU in accordance with the
bridge configuration in the same way as is done between bridge
ports. This means that the bridge will only send a single skb towards
one of the ports under the switchdev's control, and expects the driver
to deliver the packet to all eligible ports in its domain.

Primarily this improves the performance of multicast flows with
multiple subscribers, as it allows the hardware to perform the frame
replication.

The basic flow between the driver and the bridge is as follows:

- When joining a bridge port, the switchdev driver calls
  switchdev_bridge_port_offload() with tx_fwd_offload = true.

- The bridge sends offloadable skbs to one of the ports under the
  switchdev's control using skb->offload_fwd_mark = true.

- The switchdev driver checks the skb->offload_fwd_mark field and lets
  its FDB lookup select the destination port mask for this packet.

v1->v2:
- convert br_input_skb_cb::fwd_hwdoms to a plain unsigned long
- introduce a static key "br_switchdev_fwd_offload_used" to minimize the
  impact of the newly introduced feature on all the setups which don't
  have hardware that can make use of it
- introduce a check for nbp->flags & BR_FWD_OFFLOAD to optimize cache
  line access
- reorder nbp_switchdev_frame_mark_accel() and br_handle_vlan() in
  __br_forward()
- do not strip VLAN on egress if forwarding offload on VLAN-aware bridge
  is being used
- propagate errors from .ndo_dfwd_add_station() if not EOPNOTSUPP

v2->v3:
- replace the solution based on .ndo_dfwd_add_station with a solution
  based on switchdev_bridge_port_offload
- rename BR_FWD_OFFLOAD to BR_TX_FWD_OFFLOAD
v3->v4: rebase
v4->v5:
- make sure the static key is decremented on bridge port unoffload
- more function and variable renaming and comments for them:
  br_switchdev_fwd_offload_used to br_switchdev_tx_fwd_offload
  br_switchdev_accels_skb to br_switchdev_frame_uses_tx_fwd_offload
  nbp_switchdev_frame_mark_tx_fwd to nbp_switchdev_frame_mark_tx_fwd_to_hwdom
  nbp_switchdev_frame_mark_accel to nbp_switchdev_frame_mark_tx_fwd_offload
  fwd_accel to tx_fwd_offload

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
14 files changed:
drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c
drivers/net/ethernet/mscc/ocelot_net.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/ethernet/ti/cpsw_new.c
include/linux/if_bridge.h
net/bridge/br_forward.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/bridge/br_vlan.c
net/dsa/port.c

index 3d021edb78e675518e3ff2a04bc660c8e4d2f46b..c233e8786e19ebf92ae43ad8c71be4dd5f41ae67 100644 (file)
@@ -1936,7 +1936,7 @@ static int dpaa2_switch_port_bridge_join(struct net_device *netdev,
        err = switchdev_bridge_port_offload(netdev, netdev, NULL,
                                            &dpaa2_switch_port_switchdev_nb,
                                            &dpaa2_switch_port_switchdev_blocking_nb,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
index 7fe1287228e5224129851a4b7a8dd2dbabc996bb..be01ec8284e6831fd571bc48da99524bfb6d180b 100644 (file)
@@ -502,7 +502,7 @@ int prestera_bridge_port_join(struct net_device *br_dev,
        }
 
        err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
-                                           NULL, NULL, extack);
+                                           NULL, NULL, false, extack);
        if (err)
                goto err_switchdev_offload;
 
index 0a53f1d8e7e1c95c26ca1543fe4e68a960d72d92..f5d0d392efbfe3e14000cfad34c95310c1185937 100644 (file)
@@ -362,7 +362,7 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
        bridge_port->ref_count = 1;
 
        err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev,
-                                           NULL, NULL, NULL, extack);
+                                           NULL, NULL, NULL, false, extack);
        if (err)
                goto err_switchdev_offload;
 
index 807dc45cfae46a1179cc2626d30ec4f6bce0dacf..649ca609884a3f893868a4dc0475ebc8820d54b2 100644 (file)
@@ -113,7 +113,7 @@ static int sparx5_port_bridge_join(struct sparx5_port *port,
        set_bit(port->portno, sparx5->bridge_mask);
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
index 3558ee8d92123d4816ca3616e3df110d7f8e0b4c..c52f175df389c396de7b88458707ee6a331a3d33 100644 (file)
@@ -1200,7 +1200,7 @@ static int ocelot_netdevice_bridge_join(struct net_device *dev,
        err = switchdev_bridge_port_offload(brport_dev, dev, priv,
                                            &ocelot_netdevice_nb,
                                            &ocelot_switchdev_blocking_nb,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
index 03df6a24d0bac354f4945771ba9aea4c1e2bda87..b82e169b78365213e7bfe2c716940e4b44c96cc1 100644 (file)
@@ -2599,7 +2599,7 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port,
                return err;
 
        return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL,
-                                            extack);
+                                            false, extack);
 }
 
 static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
index b285606f963d9c719b626f1c2c896e332acbd166..229e2f09d6059442d13621762b06888f01cf9f3d 100644 (file)
@@ -2097,7 +2097,7 @@ static int am65_cpsw_netdevice_port_link(struct net_device *ndev,
        }
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                return err;
 
index 31030f73840d4b01585ee3bdd178418e645daef3..4448a91cce54f3a08d1861c30b681f3207c7a5e5 100644 (file)
@@ -1518,7 +1518,7 @@ static int cpsw_netdevice_port_link(struct net_device *ndev,
        }
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                return err;
 
index bbf680093823a7c1f2172eafe33351ec85581819..f0b4ffbd858285ef65cc831ef99bc6e0074fd9f0 100644 (file)
@@ -57,6 +57,7 @@ struct br_ip_list {
 #define BR_MRP_AWARE           BIT(17)
 #define BR_MRP_LOST_CONT       BIT(18)
 #define BR_MRP_LOST_IN_CONT    BIT(19)
+#define BR_TX_FWD_OFFLOAD      BIT(20)
 
 #define BR_DEFAULT_AGEING_TIME (300 * HZ)
 
@@ -182,6 +183,7 @@ int switchdev_bridge_port_offload(struct net_device *brport_dev,
                                  struct net_device *dev, const void *ctx,
                                  struct notifier_block *atomic_nb,
                                  struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
                                  struct netlink_ext_ack *extack);
 void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
                                     const void *ctx,
@@ -195,6 +197,7 @@ switchdev_bridge_port_offload(struct net_device *brport_dev,
                              struct net_device *dev, const void *ctx,
                              struct notifier_block *atomic_nb,
                              struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
                              struct netlink_ext_ack *extack)
 {
        return -EINVAL;
index bfdbaf3015b93bc72be3b41b00963fb7eec1dc2e..bc14b1b384e9b8056edbd72553fc46cc0839c431 100644 (file)
@@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
                skb_set_network_header(skb, depth);
        }
 
+       skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+
        dev_queue_xmit(skb);
 
        return 0;
@@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to,
        struct net *net;
        int br_hook;
 
+       /* Mark the skb for forwarding offload early so that br_handle_vlan()
+        * can know whether to pop the VLAN header on egress or keep it.
+        */
+       nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
+
        vg = nbp_vlan_group_rcu(to);
        skb = br_handle_vlan(to->br, to, vg, skb);
        if (!skb)
@@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver(
        if (!should_deliver(p, skb))
                return prev;
 
+       nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
+
        if (!prev)
                goto out;
 
index 2f32d330b648f16f7a95f718f04c7fb621ac7a0e..86ca617fec7ab0203b3b608f87bf5751136bbd6d 100644 (file)
@@ -552,12 +552,20 @@ struct br_input_skb_cb {
 #endif
 
 #ifdef CONFIG_NET_SWITCHDEV
+       /* Set if TX data plane offloading is used towards at least one
+        * hardware domain.
+        */
+       u8 tx_fwd_offload:1;
        /* The switchdev hardware domain from which this packet was received.
         * If skb->offload_fwd_mark was set, then this packet was already
         * forwarded by hardware to the other ports in the source hardware
         * domain, otherwise it wasn't.
         */
        int src_hwdom;
+       /* Bit mask of hardware domains towards this packet has already been
+        * transmitted using the TX data plane offload.
+        */
+       unsigned long fwd_hwdoms;
 #endif
 };
 
@@ -1871,6 +1879,12 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 
 /* br_switchdev.c */
 #ifdef CONFIG_NET_SWITCHDEV
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
+
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb);
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb);
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb);
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
@@ -1891,6 +1905,23 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
        skb->offload_fwd_mark = 0;
 }
 #else
+static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+       return false;
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                       struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                        struct sk_buff *skb)
+{
+}
+
 static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                                            struct sk_buff *skb)
 {
index 6bfff28ede235ea5d543566c3dcefe0a1ac8d2f3..96ce069d0c8c964a0eb8a0a66414379ada00da97 100644 (file)
@@ -8,6 +8,46 @@
 
 #include "br_private.h"
 
+static struct static_key_false br_switchdev_tx_fwd_offload;
+
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+                                            const struct sk_buff *skb)
+{
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
+
+       return (p->flags & BR_TX_FWD_OFFLOAD) &&
+              (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
+}
+
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
+
+       return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
+
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
+
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
+}
+
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb)
 {
@@ -18,8 +58,10 @@ void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
                                  const struct sk_buff *skb)
 {
-       return !skb->offload_fwd_mark ||
-              BR_INPUT_SKB_CB(skb)->src_hwdom != p->hwdom;
+       struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+       return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+               (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
 }
 
 /* Flags that can be offloaded to hardware */
@@ -164,8 +206,11 @@ static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
 
 static int nbp_switchdev_add(struct net_bridge_port *p,
                             struct netdev_phys_item_id ppid,
+                            bool tx_fwd_offload,
                             struct netlink_ext_ack *extack)
 {
+       int err;
+
        if (p->offload_count) {
                /* Prevent unsupported configurations such as a bridge port
                 * which is a bonding interface, and the member ports are from
@@ -189,7 +234,16 @@ static int nbp_switchdev_add(struct net_bridge_port *p,
        p->ppid = ppid;
        p->offload_count = 1;
 
-       return nbp_switchdev_hwdom_set(p);
+       err = nbp_switchdev_hwdom_set(p);
+       if (err)
+               return err;
+
+       if (tx_fwd_offload) {
+               p->flags |= BR_TX_FWD_OFFLOAD;
+               static_branch_inc(&br_switchdev_tx_fwd_offload);
+       }
+
+       return 0;
 }
 
 static void nbp_switchdev_del(struct net_bridge_port *p)
@@ -204,6 +258,11 @@ static void nbp_switchdev_del(struct net_bridge_port *p)
 
        if (p->hwdom)
                nbp_switchdev_hwdom_put(p);
+
+       if (p->flags & BR_TX_FWD_OFFLOAD) {
+               p->flags &= ~BR_TX_FWD_OFFLOAD;
+               static_branch_dec(&br_switchdev_tx_fwd_offload);
+       }
 }
 
 static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
@@ -262,6 +321,7 @@ int switchdev_bridge_port_offload(struct net_device *brport_dev,
                                  struct net_device *dev, const void *ctx,
                                  struct notifier_block *atomic_nb,
                                  struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
                                  struct netlink_ext_ack *extack)
 {
        struct netdev_phys_item_id ppid;
@@ -278,7 +338,7 @@ int switchdev_bridge_port_offload(struct net_device *brport_dev,
        if (err)
                return err;
 
-       err = nbp_switchdev_add(p, ppid, extack);
+       err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
        if (err)
                return err;
 
index 382ab992badf26261abf464d00436e76161381c4..325600361487eeabb54f6717fec5330d432af36c 100644 (file)
@@ -465,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
                u64_stats_update_end(&stats->syncp);
        }
 
-       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
+       /* If the skb will be sent using forwarding offload, the assumption is
+        * that the switchdev will inject the packet into hardware together
+        * with the bridge VLAN, so that it can be forwarded according to that
+        * VLAN. The switchdev should deal with popping the VLAN header in
+        * hardware on each egress port as appropriate. So only strip the VLAN
+        * header if forwarding offload is not being used.
+        */
+       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED &&
+           !br_switchdev_frame_uses_tx_fwd_offload(skb))
                __vlan_hwaccel_clear_tag(skb);
 
        if (p && (p->flags & BR_VLAN_TUNNEL) &&
index d81c283b7358fa9417cd116e9e9133f16a825b68..f2704f101ccfc467192a59cfe29e143466d5ad23 100644 (file)
@@ -257,7 +257,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
        err = switchdev_bridge_port_offload(brport_dev, dev, dp,
                                            &dsa_slave_switchdev_notifier,
                                            &dsa_slave_switchdev_blocking_notifier,
-                                           extack);
+                                           false, extack);
        if (err)
                goto out_rollback_unbridge;