This reverts commit
a580ea994fd37f4105028f5a85c38ff6508a2b25.
This revert is to resolve Dragos's report of page_pool leak here:
https://lore.kernel.org/lkml/
20240424165646.
1625690-2-dtatulea@nvidia.com/
The reverted patch interacts very badly with commit
2cc3aeb5eccc ("skbuff:
Fix a potential race while recycling page_pool packets"). The reverted
commit hopes that the pp_recycle + is_pp_page variables do not change
between the skb_frag_ref and skb_frag_unref operation. If such a change
occurs, the skb_frag_ref/unref will not operate on the same reference type.
In the case of Dragos's report, the grabbed ref was a pp ref, but the unref
was a page ref, because the pp_recycle setting on the skb was changed.
Attempting to fix this issue on the fly is risky. Lets revert and I hope
to reland this with better understanding and testing to ensure we don't
regress some edge case while streamlining skb reffing.
Fixes: a580ea994fd3 ("net: mirror skb frag ref/unref helpers")
Reported-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Mina Almasry <almasrymina@google.com>
Link: https://lore.kernel.org/r/20240502175423.2456544-1-almasrymina@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
for (i = 0; i < record->num_frags; i++) {
skb_shinfo(nskb)->frags[i] = record->frags[i];
/* increase the frag ref count */
- __skb_frag_ref(&skb_shinfo(nskb)->frags[i], nskb->pp_recycle);
+ __skb_frag_ref(&skb_shinfo(nskb)->frags[i]);
}
skb_shinfo(nskb)->nr_frags = record->num_frags;
skb->len += hlen - swivel;
skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
- __skb_frag_ref(frag, skb->pp_recycle);
+ __skb_frag_ref(frag);
/* any more data? */
if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
frag++;
skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
- __skb_frag_ref(frag, skb->pp_recycle);
+ __skb_frag_ref(frag);
RX_USED_ADD(page, hlen + cp->crc_size);
}
return;
for (i = 0; i < sinfo->nr_frags; i++)
- __skb_frag_ref(&sinfo->frags[i], false);
+ __skb_frag_ref(&sinfo->frags[i]);
}
static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
#define _LINUX_SKBUFF_REF_H
#include <linux/skbuff.h>
-#include <net/page_pool/helpers.h>
-
-#ifdef CONFIG_PAGE_POOL
-static inline bool is_pp_page(struct page *page)
-{
- return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
-}
-
-static inline bool napi_pp_get_page(struct page *page)
-{
- page = compound_head(page);
-
- if (!is_pp_page(page))
- return false;
-
- page_pool_ref_page(page);
- return true;
-}
-#endif
-
-static inline void skb_page_ref(struct page *page, bool recycle)
-{
-#ifdef CONFIG_PAGE_POOL
- if (recycle && napi_pp_get_page(page))
- return;
-#endif
- get_page(page);
-}
/**
* __skb_frag_ref - take an addition reference on a paged fragment.
* @frag: the paged fragment
- * @recycle: skb->pp_recycle param of the parent skb. False if no parent skb.
*
- * Takes an additional reference on the paged fragment @frag. Obtains the
- * correct reference count depending on whether skb->pp_recycle is set and
- * whether the frag is a page pool frag.
+ * Takes an additional reference on the paged fragment @frag.
*/
-static inline void __skb_frag_ref(skb_frag_t *frag, bool recycle)
+static inline void __skb_frag_ref(skb_frag_t *frag)
{
- skb_page_ref(skb_frag_page(frag), recycle);
+ get_page(skb_frag_page(frag));
}
/**
*/
static inline void skb_frag_ref(struct sk_buff *skb, int f)
{
- __skb_frag_ref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
+ __skb_frag_ref(&skb_shinfo(skb)->frags[f]);
}
bool napi_pp_put_page(struct page *page);
skb_get(list);
}
+static bool is_pp_page(struct page *page)
+{
+ return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
return napi_pp_put_page(virt_to_page(data));
}
+/**
+ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
+ * @skb: page pool aware skb
+ *
+ * Increase the fragment reference count (pp_ref_count) of a skb. This is
+ * intended to gain fragment references only for page pool aware skbs,
+ * i.e. when skb->pp_recycle is true, and not for fragments in a
+ * non-pp-recycling skb. It has a fallback to increase references on normal
+ * pages, as page pool aware skbs may also have normal page fragments.
+ */
+static int skb_pp_frag_ref(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+ struct page *head_page;
+ int i;
+
+ if (!skb->pp_recycle)
+ return -EINVAL;
+
+ shinfo = skb_shinfo(skb);
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
+ if (likely(is_pp_page(head_page)))
+ page_pool_ref_page(head_page);
+ else
+ page_ref_inc(head_page);
+ }
+ return 0;
+}
+
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
to++;
} else {
- __skb_frag_ref(fragfrom, skb->pp_recycle);
+ __skb_frag_ref(fragfrom);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
}
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag, nskb->pp_recycle);
+ __skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
+ if (skb_pp_frag_ref(from)) {
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
+ }
to->truesize += delta;
to->len += len;
for (i = 0; remaining > 0; i++) {
skb_frag_t *frag = &record->frags[i];
- __skb_frag_ref(frag, false);
+ __skb_frag_ref(frag);
sg_set_page(sg_in + i, skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));