243 lines
6.6 KiB
C
243 lines
6.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/* Copyright (C) 2024 Intel Corporation */
|
|
|
|
#ifndef __LIBETH_RX_H
|
|
#define __LIBETH_RX_H
|
|
|
|
#include <linux/if_vlan.h>
|
|
|
|
#include <net/page_pool/helpers.h>
|
|
#include <net/xdp.h>
|
|
|
|
/* Rx buffer management */
|
|
|
|
/* Space reserved in front of each frame */
|
|
#define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
|
|
/* Maximum headroom for worst-case calculations */
|
|
#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
|
|
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
|
|
#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
|
|
|
|
/* Always use order-0 pages */
|
|
#define LIBETH_RX_PAGE_ORDER 0
|
|
/* Pick a sane buffer stride and align to a cacheline boundary */
|
|
#define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128)
|
|
/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
|
|
#define LIBETH_RX_PAGE_LEN(hr) \
|
|
ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \
|
|
LIBETH_RX_BUF_STRIDE)
|
|
|
|
/**
|
|
* struct libeth_fqe - structure representing an Rx buffer (fill queue element)
|
|
* @page: page holding the buffer
|
|
* @offset: offset from the page start (to the headroom)
|
|
* @truesize: total space occupied by the buffer (w/ headroom and tailroom)
|
|
*
|
|
* Depending on the MTU, API switches between one-page-per-frame and shared
|
|
* page model (to conserve memory on bigger-page platforms). In case of the
|
|
* former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
|
|
*/
|
|
struct libeth_fqe {
|
|
struct page *page;
|
|
u32 offset;
|
|
u32 truesize;
|
|
} __aligned_largest;
|
|
|
|
/**
|
|
* struct libeth_fq - structure representing a buffer (fill) queue
|
|
* @fp: hotpath part of the structure
|
|
* @pp: &page_pool for buffer management
|
|
* @fqes: array of Rx buffers
|
|
* @truesize: size to allocate per buffer, w/overhead
|
|
* @count: number of descriptors/buffers the queue has
|
|
* @buf_len: HW-writeable length per each buffer
|
|
* @nid: ID of the closest NUMA node with memory
|
|
*/
|
|
struct libeth_fq {
|
|
struct_group_tagged(libeth_fq_fp, fp,
|
|
struct page_pool *pp;
|
|
struct libeth_fqe *fqes;
|
|
|
|
u32 truesize;
|
|
u32 count;
|
|
);
|
|
|
|
/* Cold fields */
|
|
u32 buf_len;
|
|
int nid;
|
|
};
|
|
|
|
int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
|
|
void libeth_rx_fq_destroy(struct libeth_fq *fq);
|
|
|
|
/**
|
|
* libeth_rx_alloc - allocate a new Rx buffer
|
|
* @fq: fill queue to allocate for
|
|
* @i: index of the buffer within the queue
|
|
*
|
|
* Return: DMA address to be passed to HW for Rx on successful allocation,
|
|
* ```DMA_MAPPING_ERROR``` otherwise.
|
|
*/
|
|
static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
|
|
{
|
|
struct libeth_fqe *buf = &fq->fqes[i];
|
|
|
|
buf->truesize = fq->truesize;
|
|
buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
|
|
if (unlikely(!buf->page))
|
|
return DMA_MAPPING_ERROR;
|
|
|
|
return page_pool_get_dma_addr(buf->page) + buf->offset +
|
|
fq->pp->p.offset;
|
|
}
|
|
|
|
void libeth_rx_recycle_slow(struct page *page);
|
|
|
|
/**
|
|
* libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
|
|
* @fqe: buffer to process
|
|
* @len: frame length from the descriptor
|
|
*
|
|
* Process the buffer after it's written by HW. The regular path is to
|
|
* synchronize DMA for CPU, but in case of no data it will be immediately
|
|
* recycled back to its PP.
|
|
*
|
|
* Return: true when there's data to process, false otherwise.
|
|
*/
|
|
static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
|
|
u32 len)
|
|
{
|
|
struct page *page = fqe->page;
|
|
|
|
/* Very rare, but possible case. The most common reason:
|
|
* the last fragment contained FCS only, which was then
|
|
* stripped by the HW.
|
|
*/
|
|
if (unlikely(!len)) {
|
|
libeth_rx_recycle_slow(page);
|
|
return false;
|
|
}
|
|
|
|
page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Converting abstract packet type numbers into a software structure with
|
|
* the packet parameters to do O(1) lookup on Rx.
|
|
*/
|
|
|
|
enum {
|
|
LIBETH_RX_PT_OUTER_L2 = 0U,
|
|
LIBETH_RX_PT_OUTER_IPV4,
|
|
LIBETH_RX_PT_OUTER_IPV6,
|
|
};
|
|
|
|
enum {
|
|
LIBETH_RX_PT_NOT_FRAG = 0U,
|
|
LIBETH_RX_PT_FRAG,
|
|
};
|
|
|
|
enum {
|
|
LIBETH_RX_PT_TUNNEL_IP_NONE = 0U,
|
|
LIBETH_RX_PT_TUNNEL_IP_IP,
|
|
LIBETH_RX_PT_TUNNEL_IP_GRENAT,
|
|
LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
|
|
LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
|
|
};
|
|
|
|
enum {
|
|
LIBETH_RX_PT_TUNNEL_END_NONE = 0U,
|
|
LIBETH_RX_PT_TUNNEL_END_IPV4,
|
|
LIBETH_RX_PT_TUNNEL_END_IPV6,
|
|
};
|
|
|
|
enum {
|
|
LIBETH_RX_PT_INNER_NONE = 0U,
|
|
LIBETH_RX_PT_INNER_UDP,
|
|
LIBETH_RX_PT_INNER_TCP,
|
|
LIBETH_RX_PT_INNER_SCTP,
|
|
LIBETH_RX_PT_INNER_ICMP,
|
|
LIBETH_RX_PT_INNER_TIMESYNC,
|
|
};
|
|
|
|
#define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE
|
|
#define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2
|
|
#define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3
|
|
#define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4
|
|
|
|
struct libeth_rx_pt {
|
|
u32 outer_ip:2;
|
|
u32 outer_frag:1;
|
|
u32 tunnel_type:3;
|
|
u32 tunnel_end_prot:2;
|
|
u32 tunnel_end_frag:1;
|
|
u32 inner_prot:3;
|
|
enum pkt_hash_types payload_layer:2;
|
|
|
|
u32 pad:2;
|
|
enum xdp_rss_hash_type hash_type:16;
|
|
};
|
|
|
|
void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
|
|
|
|
/**
|
|
* libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
|
|
* @pt: packet type params
|
|
*
|
|
* Wrapper to compile out the IPv6 code from the drivers when not supported
|
|
* by the kernel.
|
|
*
|
|
* Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
|
|
*/
|
|
static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
|
|
{
|
|
#if !IS_ENABLED(CONFIG_IPV6)
|
|
switch (pt.outer_ip) {
|
|
case LIBETH_RX_PT_OUTER_IPV4:
|
|
return LIBETH_RX_PT_OUTER_IPV4;
|
|
default:
|
|
return LIBETH_RX_PT_OUTER_L2;
|
|
}
|
|
#else
|
|
return pt.outer_ip;
|
|
#endif
|
|
}
|
|
|
|
/* libeth_has_*() can be used to quickly check whether the HW metadata is
|
|
* available to avoid further expensive processing such as descriptor reads.
|
|
* They already check for the corresponding netdev feature to be enabled,
|
|
* thus can be used as drop-in replacements.
|
|
*/
|
|
|
|
static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
|
|
struct libeth_rx_pt pt)
|
|
{
|
|
/* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
|
|
* it is enough to check only for the L4 type.
|
|
*/
|
|
return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
|
|
(dev->features & NETIF_F_RXCSUM));
|
|
}
|
|
|
|
static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
|
|
struct libeth_rx_pt pt)
|
|
{
|
|
return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
|
|
(dev->features & NETIF_F_RXHASH));
|
|
}
|
|
|
|
/**
|
|
* libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
|
|
* @skb: skb to fill the hash in
|
|
* @hash: 32-bit hash value from the descriptor
|
|
* @pt: packet type
|
|
*/
|
|
static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
|
|
struct libeth_rx_pt pt)
|
|
{
|
|
skb_set_hash(skb, hash, pt.payload_layer);
|
|
}
|
|
|
|
#endif /* __LIBETH_RX_H */
|