idpf: split &idpf_queue into 4 strictly-typed queue structures

Currently, sizeof(struct idpf_queue) is 32 Kb. This is due to the 12-bit hashtable declaration at the end of the queue. This HT is needed only for Tx queues when the flow scheduling mode is enabled. But &idpf_queue is unified for all of the queue types, provoking excessive memory usage. The unified structure in general makes the code less effective via suboptimal fields placement. You can't avoid that unless you make unions each 2 fields. Even then, different field alignment etc., doesn't allow you to optimize things to the limit. Split &idpf_queue into 4 structures corresponding to the queue types: RQ (Rx queue), SQ (Tx queue), FQ (buffer queue), and CQ (completion queue). Place only needed fields there and shortcuts handy for hotpath. Allocate the abovementioned hashtable dynamically and only when needed, keeping &idpf_tx_queue relatively short (192 bytes, same as Rx). This HT is used only for OOO completions, which aren't really hotpath anyway. Note that this change must be done atomically, otherwise it's really easy to get lost and miss something. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
2025-03-06 20:59:54 +01:00 · 2024-06-20 15:53:38 +02:00 · 2024-06-20 15:53:38 +02:00 · e4891e4687
commit e4891e4687
parent 66c27e3b19
7 changed files with 1026 additions and 736 deletions
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@ -17,7 +17,6 @@ struct idpf_vport_max_q;
 #include <linux/sctp.h>
 #include <linux/ethtool_netlink.h>
 #include <net/gro.h>
-#include <linux/dim.h>

 #include "virtchnl2.h"
 #include "idpf_txrx.h"
@ -301,7 +300,7 @@ struct idpf_vport {
 	u16 num_txq_grp;
 	struct idpf_txq_group *txq_grps;
 	u32 txq_model;
-	struct idpf_queue **txqs;
+	struct idpf_tx_queue **txqs;
 	bool crc_enable;

 	u16 num_rxq;
--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
@ -437,22 +437,24 @@ struct idpf_stats {
 	.stat_offset = offsetof(_type, _stat) \
 }

-/* Helper macro for defining some statistics related to queues */
-#define IDPF_QUEUE_STAT(_name, _stat) \
-	IDPF_STAT(struct idpf_queue, _name, _stat)
+/* Helper macros for defining some statistics related to queues */
+#define IDPF_RX_QUEUE_STAT(_name, _stat) \
+	IDPF_STAT(struct idpf_rx_queue, _name, _stat)
+#define IDPF_TX_QUEUE_STAT(_name, _stat) \
+	IDPF_STAT(struct idpf_tx_queue, _name, _stat)

 /* Stats associated with a Tx queue */
 static const struct idpf_stats idpf_gstrings_tx_queue_stats[] = {
-	IDPF_QUEUE_STAT("pkts", q_stats.tx.packets),
-	IDPF_QUEUE_STAT("bytes", q_stats.tx.bytes),
-	IDPF_QUEUE_STAT("lso_pkts", q_stats.tx.lso_pkts),
+	IDPF_TX_QUEUE_STAT("pkts", q_stats.packets),
+	IDPF_TX_QUEUE_STAT("bytes", q_stats.bytes),
+	IDPF_TX_QUEUE_STAT("lso_pkts", q_stats.lso_pkts),
 };

 /* Stats associated with an Rx queue */
 static const struct idpf_stats idpf_gstrings_rx_queue_stats[] = {
-	IDPF_QUEUE_STAT("pkts", q_stats.rx.packets),
-	IDPF_QUEUE_STAT("bytes", q_stats.rx.bytes),
-	IDPF_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rx.rsc_pkts),
+	IDPF_RX_QUEUE_STAT("pkts", q_stats.packets),
+	IDPF_RX_QUEUE_STAT("bytes", q_stats.bytes),
+	IDPF_RX_QUEUE_STAT("rx_gro_hw_pkts", q_stats.rsc_pkts),
 };

 #define IDPF_TX_QUEUE_STATS_LEN		ARRAY_SIZE(idpf_gstrings_tx_queue_stats)
@ -633,7 +635,7 @@ static int idpf_get_sset_count(struct net_device *netdev, int sset)
 * Copies the stat data defined by the pointer and stat structure pair into
 * the memory supplied as data. If the pointer is null, data will be zero'd.
 */
-static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
+static void idpf_add_one_ethtool_stat(u64 *data, const void *pstat,
 				      const struct idpf_stats *stat)
 {
 	char *p;
@ -671,6 +673,7 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
 * idpf_add_queue_stats - copy queue statistics into supplied buffer
 * @data: ethtool stats buffer
 * @q: the queue to copy
+ * @type: type of the queue
 *
 * Queue statistics must be copied while protected by u64_stats_fetch_begin,
 * so we can't directly use idpf_add_ethtool_stats. Assumes that queue stats
@ -681,19 +684,23 @@ static void idpf_add_one_ethtool_stat(u64 *data, void *pstat,
 *
 * This function expects to be called while under rcu_read_lock().
 */
-static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
+static void idpf_add_queue_stats(u64 **data, const void *q,
+				 enum virtchnl2_queue_type type)
 {
+	const struct u64_stats_sync *stats_sync;
 	const struct idpf_stats *stats;
 	unsigned int start;
 	unsigned int size;
 	unsigned int i;

-	if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+	if (type == VIRTCHNL2_QUEUE_TYPE_RX) {
 		size = IDPF_RX_QUEUE_STATS_LEN;
 		stats = idpf_gstrings_rx_queue_stats;
+		stats_sync = &((const struct idpf_rx_queue *)q)->stats_sync;
 	} else {
 		size = IDPF_TX_QUEUE_STATS_LEN;
 		stats = idpf_gstrings_tx_queue_stats;
+		stats_sync = &((const struct idpf_tx_queue *)q)->stats_sync;
 	}

 	/* To avoid invalid statistics values, ensure that we keep retrying
@ -701,10 +708,10 @@ static void idpf_add_queue_stats(u64 **data, struct idpf_queue *q)
 	 * u64_stats_fetch_retry.
 	 */
 	do {
-		start = u64_stats_fetch_begin(&q->stats_sync);
+		start = u64_stats_fetch_begin(stats_sync);
 		for (i = 0; i < size; i++)
 			idpf_add_one_ethtool_stat(&(*data)[i], q, &stats[i]);
-	} while (u64_stats_fetch_retry(&q->stats_sync, start));
+	} while (u64_stats_fetch_retry(stats_sync, start));

 	/* Once we successfully copy the stats in, update the data pointer */
 	*data += size;
@ -793,7 +800,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
 		for (j = 0; j < num_rxq; j++) {
 			u64 hw_csum_err, hsplit, hsplit_hbo, bad_descs;
 			struct idpf_rx_queue_stats *stats;
-			struct idpf_queue *rxq;
+			struct idpf_rx_queue *rxq;
 			unsigned int start;

 			if (idpf_is_queue_model_split(vport->rxq_model))
@ -807,7 +814,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
 			do {
 				start = u64_stats_fetch_begin(&rxq->stats_sync);

-				stats = &rxq->q_stats.rx;
+				stats = &rxq->q_stats;
 				hw_csum_err = u64_stats_read(&stats->hw_csum_err);
 				hsplit = u64_stats_read(&stats->hsplit_pkts);
 				hsplit_hbo = u64_stats_read(&stats->hsplit_buf_ovf);
@ -828,7 +835,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)

 		for (j = 0; j < txq_grp->num_txq; j++) {
 			u64 linearize, qbusy, skb_drops, dma_map_errs;
-			struct idpf_queue *txq = txq_grp->txqs[j];
+			struct idpf_tx_queue *txq = txq_grp->txqs[j];
 			struct idpf_tx_queue_stats *stats;
 			unsigned int start;

@ -838,7 +845,7 @@ static void idpf_collect_queue_stats(struct idpf_vport *vport)
 			do {
 				start = u64_stats_fetch_begin(&txq->stats_sync);

-				stats = &txq->q_stats.tx;
+				stats = &txq->q_stats;
 				linearize = u64_stats_read(&stats->linearize);
 				qbusy = u64_stats_read(&stats->q_busy);
 				skb_drops = u64_stats_read(&stats->skb_drops);
@ -896,12 +903,12 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
 		qtype = VIRTCHNL2_QUEUE_TYPE_TX;

 		for (j = 0; j < txq_grp->num_txq; j++, total++) {
-			struct idpf_queue *txq = txq_grp->txqs[j];
+			struct idpf_tx_queue *txq = txq_grp->txqs[j];

 			if (!txq)
 				idpf_add_empty_queue_stats(&data, qtype);
 			else
-				idpf_add_queue_stats(&data, txq);
+				idpf_add_queue_stats(&data, txq, qtype);
 		}
 	}

@ -929,7 +936,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
 			num_rxq = rxq_grp->singleq.num_rxq;

 		for (j = 0; j < num_rxq; j++, total++) {
-			struct idpf_queue *rxq;
+			struct idpf_rx_queue *rxq;

 			if (is_splitq)
 				rxq = &rxq_grp->splitq.rxq_sets[j]->rxq;
@ -938,7 +945,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
 			if (!rxq)
 				idpf_add_empty_queue_stats(&data, qtype);
 			else
-				idpf_add_queue_stats(&data, rxq);
+				idpf_add_queue_stats(&data, rxq, qtype);

 			/* In splitq mode, don't get page pool stats here since
 			 * the pools are attached to the buffer queues
@ -953,7 +960,7 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,

 	for (i = 0; i < vport->num_rxq_grp; i++) {
 		for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
-			struct idpf_queue *rxbufq =
+			struct idpf_buf_queue *rxbufq =
 				&vport->rxq_grps[i].splitq.bufq_sets[j].bufq;

 			page_pool_get_stats(rxbufq->pp, &pp_stats);
@ -971,60 +978,64 @@ static void idpf_get_ethtool_stats(struct net_device *netdev,
 }

 /**
- * idpf_find_rxq - find rxq from q index
+ * idpf_find_rxq_vec - find rxq vector from q index
 * @vport: virtual port associated to queue
 * @q_num: q index used to find queue
 *
- * returns pointer to rx queue
+ * returns pointer to rx vector
 */
-static struct idpf_queue *idpf_find_rxq(struct idpf_vport *vport, int q_num)
+static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport,
+					       int q_num)
 {
 	int q_grp, q_idx;

 	if (!idpf_is_queue_model_split(vport->rxq_model))
-		return vport->rxq_grps->singleq.rxqs[q_num];
+		return vport->rxq_grps->singleq.rxqs[q_num]->q_vector;

 	q_grp = q_num / IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;
 	q_idx = q_num % IDPF_DFLT_SPLITQ_RXQ_PER_GROUP;

-	return &vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq;
+	return vport->rxq_grps[q_grp].splitq.rxq_sets[q_idx]->rxq.q_vector;
 }

 /**
- * idpf_find_txq - find txq from q index
+ * idpf_find_txq_vec - find txq vector from q index
 * @vport: virtual port associated to queue
 * @q_num: q index used to find queue
 *
- * returns pointer to tx queue
+ * returns pointer to tx vector
 */
-static struct idpf_queue *idpf_find_txq(struct idpf_vport *vport, int q_num)
+static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport,
+					       int q_num)
 {
 	int q_grp;

 	if (!idpf_is_queue_model_split(vport->txq_model))
-		return vport->txqs[q_num];
+		return vport->txqs[q_num]->q_vector;

 	q_grp = q_num / IDPF_DFLT_SPLITQ_TXQ_PER_GROUP;

-	return vport->txq_grps[q_grp].complq;
+	return vport->txq_grps[q_grp].complq->q_vector;
 }

 /**
 * __idpf_get_q_coalesce - get ITR values for specific queue
 * @ec: ethtool structure to fill with driver's coalesce settings
- * @q: quuee of Rx or Tx
+ * @q_vector: queue vector corresponding to this queue
+ * @type: queue type
 */
 static void __idpf_get_q_coalesce(struct ethtool_coalesce *ec,
-				  struct idpf_queue *q)
+				  const struct idpf_q_vector *q_vector,
+				  enum virtchnl2_queue_type type)
 {
-	if (q->q_type == VIRTCHNL2_QUEUE_TYPE_RX) {
+	if (type == VIRTCHNL2_QUEUE_TYPE_RX) {
 		ec->use_adaptive_rx_coalesce =
-				IDPF_ITR_IS_DYNAMIC(q->q_vector->rx_intr_mode);
-		ec->rx_coalesce_usecs = q->q_vector->rx_itr_value;
+				IDPF_ITR_IS_DYNAMIC(q_vector->rx_intr_mode);
+		ec->rx_coalesce_usecs = q_vector->rx_itr_value;
 	} else {
 		ec->use_adaptive_tx_coalesce =
-				IDPF_ITR_IS_DYNAMIC(q->q_vector->tx_intr_mode);
-		ec->tx_coalesce_usecs = q->q_vector->tx_itr_value;
+				IDPF_ITR_IS_DYNAMIC(q_vector->tx_intr_mode);
+		ec->tx_coalesce_usecs = q_vector->tx_itr_value;
 	}
 }

@ -1040,8 +1051,8 @@ static int idpf_get_q_coalesce(struct net_device *netdev,
 			       struct ethtool_coalesce *ec,
 			       u32 q_num)
 {
-	struct idpf_netdev_priv *np = netdev_priv(netdev);
-	struct idpf_vport *vport;
+	const struct idpf_netdev_priv *np = netdev_priv(netdev);
+	const struct idpf_vport *vport;
 	int err = 0;

 	idpf_vport_ctrl_lock(netdev);
@ -1056,10 +1067,12 @@ static int idpf_get_q_coalesce(struct net_device *netdev,
 	}

 	if (q_num < vport->num_rxq)
-		__idpf_get_q_coalesce(ec, idpf_find_rxq(vport, q_num));
+		__idpf_get_q_coalesce(ec, idpf_find_rxq_vec(vport, q_num),
+				      VIRTCHNL2_QUEUE_TYPE_RX);

 	if (q_num < vport->num_txq)
-		__idpf_get_q_coalesce(ec, idpf_find_txq(vport, q_num));
+		__idpf_get_q_coalesce(ec, idpf_find_txq_vec(vport, q_num),
+				      VIRTCHNL2_QUEUE_TYPE_TX);

 unlock_mutex:
 	idpf_vport_ctrl_unlock(netdev);
@ -1103,16 +1116,15 @@ static int idpf_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
 /**
 * __idpf_set_q_coalesce - set ITR values for specific queue
 * @ec: ethtool structure from user to update ITR settings
- * @q: queue for which itr values has to be set
+ * @qv: queue vector for which itr values has to be set
 * @is_rxq: is queue type rx
 *
 * Returns 0 on success, negative otherwise.
 */
-static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
-				 struct idpf_queue *q, bool is_rxq)
+static int __idpf_set_q_coalesce(const struct ethtool_coalesce *ec,
+				 struct idpf_q_vector *qv, bool is_rxq)
 {
 	u32 use_adaptive_coalesce, coalesce_usecs;
-	struct idpf_q_vector *qv = q->q_vector;
 	bool is_dim_ena = false;
 	u16 itr_val;

@ -1128,7 +1140,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
 		itr_val = qv->tx_itr_value;
 	}
 	if (coalesce_usecs != itr_val && use_adaptive_coalesce) {
-		netdev_err(q->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");
+		netdev_err(qv->vport->netdev, "Cannot set coalesce usecs if adaptive enabled\n");

 		return -EINVAL;
 	}
@ -1137,7 +1149,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
 		return 0;

 	if (coalesce_usecs > IDPF_ITR_MAX) {
-		netdev_err(q->vport->netdev,
+		netdev_err(qv->vport->netdev,
 			   "Invalid value, %d-usecs range is 0-%d\n",
 			   coalesce_usecs, IDPF_ITR_MAX);

@ -1146,7 +1158,7 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,

 	if (coalesce_usecs % 2) {
 		coalesce_usecs--;
-		netdev_info(q->vport->netdev,
+		netdev_info(qv->vport->netdev,
 			    "HW only supports even ITR values, ITR rounded to %d\n",
 			    coalesce_usecs);
 	}
@ -1185,15 +1197,16 @@ static int __idpf_set_q_coalesce(struct ethtool_coalesce *ec,
 *
 * Return 0 on success, and negative on failure
 */
-static int idpf_set_q_coalesce(struct idpf_vport *vport,
-			       struct ethtool_coalesce *ec,
+static int idpf_set_q_coalesce(const struct idpf_vport *vport,
+			       const struct ethtool_coalesce *ec,
 			       int q_num, bool is_rxq)
 {
-	struct idpf_queue *q;
+	struct idpf_q_vector *qv;

-	q = is_rxq ? idpf_find_rxq(vport, q_num) : idpf_find_txq(vport, q_num);
+	qv = is_rxq ? idpf_find_rxq_vec(vport, q_num) :
+		      idpf_find_txq_vec(vport, q_num);

-	if (q && __idpf_set_q_coalesce(ec, q, is_rxq))
+	if (qv && __idpf_set_q_coalesce(ec, qv, is_rxq))
 		return -EINVAL;

 	return 0;
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@ -1318,14 +1318,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)

 		if (idpf_is_queue_model_split(vport->rxq_model)) {
 			for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
-				struct idpf_queue *q =
+				const struct idpf_buf_queue *q =
 					&grp->splitq.bufq_sets[j].bufq;

 				writel(q->next_to_alloc, q->tail);
 			}
 		} else {
 			for (j = 0; j < grp->singleq.num_rxq; j++) {
-				struct idpf_queue *q =
+				const struct idpf_rx_queue *q =
 					grp->singleq.rxqs[j];

 				writel(q->next_to_alloc, q->tail);
@ -1855,7 +1855,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 	enum idpf_vport_state current_state = np->state;
 	struct idpf_adapter *adapter = vport->adapter;
 	struct idpf_vport *new_vport;
-	int err, i;
+	int err;

 	/* If the system is low on memory, we can end up in bad state if we
 	 * free all the memory for queue resources and try to allocate them
@ -1929,46 +1929,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 	 */
 	memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps));

-	/* Since idpf_vport_queues_alloc was called with new_port, the queue
-	 * back pointers are currently pointing to the local new_vport. Reset
-	 * the backpointers to the original vport here
-	 */
-	for (i = 0; i < vport->num_txq_grp; i++) {
-		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
-		int j;
-
-		tx_qgrp->vport = vport;
-		for (j = 0; j < tx_qgrp->num_txq; j++)
-			tx_qgrp->txqs[j]->vport = vport;
-
-		if (idpf_is_queue_model_split(vport->txq_model))
-			tx_qgrp->complq->vport = vport;
-	}
-
-	for (i = 0; i < vport->num_rxq_grp; i++) {
-		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
-		struct idpf_queue *q;
-		u16 num_rxq;
-		int j;
-
-		rx_qgrp->vport = vport;
-		for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
-			rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
-
-		if (idpf_is_queue_model_split(vport->rxq_model))
-			num_rxq = rx_qgrp->splitq.num_rxq_sets;
-		else
-			num_rxq = rx_qgrp->singleq.num_rxq;
-
-		for (j = 0; j < num_rxq; j++) {
-			if (idpf_is_queue_model_split(vport->rxq_model))
-				q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
-			else
-				q = rx_qgrp->singleq.rxqs[j];
-			q->vport = vport;
-		}
-	}
-
 	if (reset_cause == IDPF_SR_Q_CHANGE)
 		idpf_vport_alloc_vec_indexes(vport);

--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@ -186,7 +186,7 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb,
 * and gets a physical address for each memory location and programs
 * it and the length into the transmit base mode descriptor.
 */
-static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
+static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q,
 				struct idpf_tx_buf *first,
 				struct idpf_tx_offload_params *offloads)
 {
@ -210,7 +210,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
 	dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE);

 	/* write each descriptor with CRC bit */
-	if (tx_q->vport->crc_enable)
+	if (idpf_queue_has(CRC_EN, tx_q))
 		td_cmd |= IDPF_TX_DESC_CMD_ICRC;

 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
@ -285,7 +285,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
 	/* set next_to_watch value indicating a packet is present */
 	first->next_to_watch = tx_desc;

-	nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx);
+	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
 	netdev_tx_sent_queue(nq, first->bytecount);

 	idpf_tx_buf_hw_update(tx_q, i, netdev_xmit_more());
@ -299,7 +299,7 @@ static void idpf_tx_singleq_map(struct idpf_queue *tx_q,
 * ring entry to reflect that this index is a context descriptor
 */
 static struct idpf_base_tx_ctx_desc *
-idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
+idpf_tx_singleq_get_ctx_desc(struct idpf_tx_queue *txq)
 {
 	struct idpf_base_tx_ctx_desc *ctx_desc;
 	int ntu = txq->next_to_use;
@ -320,7 +320,7 @@ idpf_tx_singleq_get_ctx_desc(struct idpf_queue *txq)
 * @txq: queue to send buffer on
 * @offload: offload parameter structure
 **/
-static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
+static void idpf_tx_singleq_build_ctx_desc(struct idpf_tx_queue *txq,
 					   struct idpf_tx_offload_params *offload)
 {
 	struct idpf_base_tx_ctx_desc *desc = idpf_tx_singleq_get_ctx_desc(txq);
@ -333,7 +333,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
 		qw1 |= FIELD_PREP(IDPF_TXD_CTX_QW1_MSS_M, offload->mss);

 		u64_stats_update_begin(&txq->stats_sync);
-		u64_stats_inc(&txq->q_stats.tx.lso_pkts);
+		u64_stats_inc(&txq->q_stats.lso_pkts);
 		u64_stats_update_end(&txq->stats_sync);
 	}

@ -352,7 +352,7 @@ static void idpf_tx_singleq_build_ctx_desc(struct idpf_queue *txq,
 * Returns NETDEV_TX_OK if sent, else an error code
 */
 static netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
-					 struct idpf_queue *tx_q)
+					 struct idpf_tx_queue *tx_q)
 {
 	struct idpf_tx_offload_params offload = { };
 	struct idpf_tx_buf *first;
@ -419,7 +419,7 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
 	struct idpf_vport *vport = idpf_netdev_to_vport(netdev);
-	struct idpf_queue *tx_q;
+	struct idpf_tx_queue *tx_q;

 	tx_q = vport->txqs[skb_get_queue_mapping(skb)];

@ -442,16 +442,15 @@ netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
 * @cleaned: returns number of packets cleaned
 *
 */
-static bool idpf_tx_singleq_clean(struct idpf_queue *tx_q, int napi_budget,
+static bool idpf_tx_singleq_clean(struct idpf_tx_queue *tx_q, int napi_budget,
 				  int *cleaned)
 {
-	unsigned int budget = tx_q->vport->compln_clean_budget;
 	unsigned int total_bytes = 0, total_pkts = 0;
 	struct idpf_base_tx_desc *tx_desc;
+	u32 budget = tx_q->clean_budget;
 	s16 ntc = tx_q->next_to_clean;
 	struct idpf_netdev_priv *np;
 	struct idpf_tx_buf *tx_buf;
-	struct idpf_vport *vport;
 	struct netdev_queue *nq;
 	bool dont_wake;

@ -550,16 +549,15 @@ fetch_next_txq_desc:
 	*cleaned += total_pkts;

 	u64_stats_update_begin(&tx_q->stats_sync);
-	u64_stats_add(&tx_q->q_stats.tx.packets, total_pkts);
-	u64_stats_add(&tx_q->q_stats.tx.bytes, total_bytes);
+	u64_stats_add(&tx_q->q_stats.packets, total_pkts);
+	u64_stats_add(&tx_q->q_stats.bytes, total_bytes);
 	u64_stats_update_end(&tx_q->stats_sync);

-	vport = tx_q->vport;
-	np = netdev_priv(vport->netdev);
-	nq = netdev_get_tx_queue(vport->netdev, tx_q->idx);
+	np = netdev_priv(tx_q->netdev);
+	nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);

 	dont_wake = np->state != __IDPF_VPORT_UP ||
-		    !netif_carrier_ok(vport->netdev);
+		    !netif_carrier_ok(tx_q->netdev);
 	__netif_txq_completed_wake(nq, total_pkts, total_bytes,
 				   IDPF_DESC_UNUSED(tx_q), IDPF_TX_WAKE_THRESH,
 				   dont_wake);
@ -584,7 +582,7 @@ static bool idpf_tx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,

 	budget_per_q = num_txq ? max(budget / num_txq, 1) : 0;
 	for (i = 0; i < num_txq; i++) {
-		struct idpf_queue *q;
+		struct idpf_tx_queue *q;

 		q = q_vec->tx[i];
 		clean_complete &= idpf_tx_singleq_clean(q, budget_per_q,
@ -614,14 +612,9 @@ static bool idpf_rx_singleq_test_staterr(const union virtchnl2_rx_desc *rx_desc,

 /**
 * idpf_rx_singleq_is_non_eop - process handling of non-EOP buffers
- * @rxq: Rx ring being processed
 * @rx_desc: Rx descriptor for current buffer
- * @skb: Current socket buffer containing buffer in progress
- * @ntc: next to clean
 */
-static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
-				       union virtchnl2_rx_desc *rx_desc,
-				       struct sk_buff *skb, u16 ntc)
+static bool idpf_rx_singleq_is_non_eop(const union virtchnl2_rx_desc *rx_desc)
 {
 	/* if we are the last buffer then there is nothing else to do */
 	if (likely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_EOF_SINGLEQ)))
@ -639,7 +632,7 @@ static bool idpf_rx_singleq_is_non_eop(struct idpf_queue *rxq,
 *
 * skb->protocol must be set before this function is called
 */
-static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
+static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb,
 				 struct idpf_rx_csum_decoded *csum_bits,
 				 u16 ptype)
 {
@ -647,14 +640,14 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,
 	bool ipv4, ipv6;

 	/* check if Rx checksum is enabled */
-	if (unlikely(!(rxq->vport->netdev->features & NETIF_F_RXCSUM)))
+	if (unlikely(!(rxq->netdev->features & NETIF_F_RXCSUM)))
 		return;

 	/* check if HW has decoded the packet and checksum */
 	if (unlikely(!(csum_bits->l3l4p)))
 		return;

-	decoded = rxq->vport->rx_ptype_lkup[ptype];
+	decoded = rxq->rx_ptype_lkup[ptype];
 	if (unlikely(!(decoded.known && decoded.outer_ip)))
 		return;

@ -707,7 +700,7 @@ static void idpf_rx_singleq_csum(struct idpf_queue *rxq, struct sk_buff *skb,

 checksum_fail:
 	u64_stats_update_begin(&rxq->stats_sync);
-	u64_stats_inc(&rxq->q_stats.rx.hw_csum_err);
+	u64_stats_inc(&rxq->q_stats.hw_csum_err);
 	u64_stats_update_end(&rxq->stats_sync);
 }

@ -721,9 +714,9 @@ checksum_fail:
 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
 * descriptor writeback format.
 **/
-static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
+static void idpf_rx_singleq_base_csum(struct idpf_rx_queue *rx_q,
 				      struct sk_buff *skb,
-				      union virtchnl2_rx_desc *rx_desc,
+				      const union virtchnl2_rx_desc *rx_desc,
 				      u16 ptype)
 {
 	struct idpf_rx_csum_decoded csum_bits;
@ -761,9 +754,9 @@ static void idpf_rx_singleq_base_csum(struct idpf_queue *rx_q,
 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
 * descriptor writeback format.
 **/
-static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
+static void idpf_rx_singleq_flex_csum(struct idpf_rx_queue *rx_q,
 				      struct sk_buff *skb,
-				      union virtchnl2_rx_desc *rx_desc,
+				      const union virtchnl2_rx_desc *rx_desc,
 				      u16 ptype)
 {
 	struct idpf_rx_csum_decoded csum_bits;
@ -801,14 +794,14 @@ static void idpf_rx_singleq_flex_csum(struct idpf_queue *rx_q,
 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
 * descriptor writeback format.
 **/
-static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
+static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q,
 				      struct sk_buff *skb,
-				      union virtchnl2_rx_desc *rx_desc,
+				      const union virtchnl2_rx_desc *rx_desc,
 				      struct idpf_rx_ptype_decoded *decoded)
 {
 	u64 mask, qw1;

-	if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+	if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH)))
 		return;

 	mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M;
@ -831,12 +824,12 @@ static void idpf_rx_singleq_base_hash(struct idpf_queue *rx_q,
 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
 * descriptor writeback format.
 **/
-static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
+static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q,
 				      struct sk_buff *skb,
-				      union virtchnl2_rx_desc *rx_desc,
+				      const union virtchnl2_rx_desc *rx_desc,
 				      struct idpf_rx_ptype_decoded *decoded)
 {
-	if (unlikely(!(rx_q->vport->netdev->features & NETIF_F_RXHASH)))
+	if (unlikely(!(rx_q->netdev->features & NETIF_F_RXHASH)))
 		return;

 	if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M,
@ -857,16 +850,16 @@ static void idpf_rx_singleq_flex_hash(struct idpf_queue *rx_q,
 * order to populate the hash, checksum, VLAN, protocol, and
 * other fields within the skb.
 */
-static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
+static void
+idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q,
 				   struct sk_buff *skb,
-					       union virtchnl2_rx_desc *rx_desc,
+				   const union virtchnl2_rx_desc *rx_desc,
 				   u16 ptype)
 {
-	struct idpf_rx_ptype_decoded decoded =
-					rx_q->vport->rx_ptype_lkup[ptype];
+	struct idpf_rx_ptype_decoded decoded = rx_q->rx_ptype_lkup[ptype];

 	/* modifies the skb - consumes the enet header */
-	skb->protocol = eth_type_trans(skb, rx_q->vport->netdev);
+	skb->protocol = eth_type_trans(skb, rx_q->netdev);

 	/* Check if we're using base mode descriptor IDs */
 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) {
@ -878,6 +871,22 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
 	}
 }

+/**
+ * idpf_rx_buf_hw_update - Store the new tail and head values
+ * @rxq: queue to bump
+ * @val: new head index
+ */
+static void idpf_rx_buf_hw_update(struct idpf_rx_queue *rxq, u32 val)
+{
+	rxq->next_to_use = val;
+
+	if (unlikely(!rxq->tail))
+		return;
+
+	/* writel has an implicit memory barrier */
+	writel(val, rxq->tail);
+}
+
 /**
 * idpf_rx_singleq_buf_hw_alloc_all - Replace used receive buffers
 * @rx_q: queue for which the hw buffers are allocated
@ -885,7 +894,7 @@ static void idpf_rx_singleq_process_skb_fields(struct idpf_queue *rx_q,
 *
 * Returns false if all allocations were successful, true if any fail
 */
-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
 				      u16 cleaned_count)
 {
 	struct virtchnl2_singleq_rx_buf_desc *desc;
@ -896,7 +905,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
 		return false;

 	desc = &rx_q->single_buf[nta];
-	buf = &rx_q->rx_buf.buf[nta];
+	buf = &rx_q->rx_buf[nta];

 	do {
 		dma_addr_t addr;
@ -916,7 +925,7 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
 		nta++;
 		if (unlikely(nta == rx_q->desc_count)) {
 			desc = &rx_q->single_buf[0];
-			buf = rx_q->rx_buf.buf;
+			buf = rx_q->rx_buf;
 			nta = 0;
 		}

@ -933,7 +942,6 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,

 /**
 * idpf_rx_singleq_extract_base_fields - Extract fields from the Rx descriptor
- * @rx_q: Rx descriptor queue
 * @rx_desc: the descriptor to process
 * @fields: storage for extracted values
 *
@ -943,8 +951,8 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rx_q,
 * This function only operates on the VIRTCHNL2_RXDID_1_32B_BASE_M base 32byte
 * descriptor writeback format.
 */
-static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
-						union virtchnl2_rx_desc *rx_desc,
+static void
+idpf_rx_singleq_extract_base_fields(const union virtchnl2_rx_desc *rx_desc,
 				    struct idpf_rx_extracted *fields)
 {
 	u64 qword;
@ -957,7 +965,6 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,

 /**
 * idpf_rx_singleq_extract_flex_fields - Extract fields from the Rx descriptor
- * @rx_q: Rx descriptor queue
 * @rx_desc: the descriptor to process
 * @fields: storage for extracted values
 *
@ -967,8 +974,8 @@ static void idpf_rx_singleq_extract_base_fields(struct idpf_queue *rx_q,
 * This function only operates on the VIRTCHNL2_RXDID_2_FLEX_SQ_NIC flexible
 * descriptor writeback format.
 */
-static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
-						union virtchnl2_rx_desc *rx_desc,
+static void
+idpf_rx_singleq_extract_flex_fields(const union virtchnl2_rx_desc *rx_desc,
 				    struct idpf_rx_extracted *fields)
 {
 	fields->size = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M,
@ -984,14 +991,15 @@ static void idpf_rx_singleq_extract_flex_fields(struct idpf_queue *rx_q,
 * @fields: storage for extracted values
 *
 */
-static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
-					   union virtchnl2_rx_desc *rx_desc,
+static void
+idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q,
+			       const union virtchnl2_rx_desc *rx_desc,
 			       struct idpf_rx_extracted *fields)
 {
 	if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M)
-		idpf_rx_singleq_extract_base_fields(rx_q, rx_desc, fields);
+		idpf_rx_singleq_extract_base_fields(rx_desc, fields);
 	else
-		idpf_rx_singleq_extract_flex_fields(rx_q, rx_desc, fields);
+		idpf_rx_singleq_extract_flex_fields(rx_desc, fields);
 }

 /**
@ -1001,7 +1009,7 @@ static void idpf_rx_singleq_extract_fields(struct idpf_queue *rx_q,
 *
 * Returns true if there's any budget left (e.g. the clean is finished)
 */
-static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)
+static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	struct sk_buff *skb = rx_q->skb;
@ -1036,7 +1044,7 @@ static int idpf_rx_singleq_clean(struct idpf_queue *rx_q, int budget)

 		idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);

-		rx_buf = &rx_q->rx_buf.buf[ntc];
+		rx_buf = &rx_q->rx_buf[ntc];
 		if (!fields.size) {
 			idpf_rx_put_page(rx_buf);
 			goto skip_data;
@ -1058,7 +1066,7 @@ skip_data:
 		cleaned_count++;

 		/* skip if it is non EOP desc */
-		if (idpf_rx_singleq_is_non_eop(rx_q, rx_desc, skb, ntc))
+		if (idpf_rx_singleq_is_non_eop(rx_desc))
 			continue;

 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
@ -1084,7 +1092,7 @@ skip_data:
 						   rx_desc, fields.rx_ptype);

 		/* send completed skb up the stack */
-		napi_gro_receive(&rx_q->q_vector->napi, skb);
+		napi_gro_receive(rx_q->pp->p.napi, skb);
 		skb = NULL;

 		/* update budget accounting */
@ -1099,8 +1107,8 @@ skip_data:
 		failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);

 	u64_stats_update_begin(&rx_q->stats_sync);
-	u64_stats_add(&rx_q->q_stats.rx.packets, total_rx_pkts);
-	u64_stats_add(&rx_q->q_stats.rx.bytes, total_rx_bytes);
+	u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts);
+	u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes);
 	u64_stats_update_end(&rx_q->stats_sync);

 	/* guarantee a trip back through this routine if there was a failure */
@ -1127,7 +1135,7 @@ static bool idpf_rx_singleq_clean_all(struct idpf_q_vector *q_vec, int budget,
 	 */
 	budget_per_q = num_rxq ? max(budget / num_rxq, 1) : 0;
 	for (i = 0; i < num_rxq; i++) {
-		struct idpf_queue *rxq = q_vec->rx[i];
+		struct idpf_rx_queue *rxq = q_vec->rx[i];
 		int pkts_cleaned_per_q;

 		pkts_cleaned_per_q = idpf_rx_singleq_clean(rxq, budget_per_q);
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@ -4,6 +4,8 @@
 #ifndef _IDPF_TXRX_H_
 #define _IDPF_TXRX_H_

+#include <linux/dim.h>
+
 #include <net/page_pool/helpers.h>
 #include <net/tcp.h>
 #include <net/netdev_queues.h>
@ -84,7 +86,7 @@
 do {								\
 	if (unlikely(++(ntc) == (rxq)->desc_count)) {		\
 		ntc = 0;					\
-		change_bit(__IDPF_Q_GEN_CHK, (rxq)->flags);	\
+		idpf_queue_change(GEN_CHK, rxq);		\
 	}							\
 } while (0)

@ -111,10 +113,9 @@ do {								\
 */
 #define IDPF_TX_SPLITQ_RE_MIN_GAP	64

-#define IDPF_RX_BI_BUFID_S		0
-#define IDPF_RX_BI_BUFID_M		GENMASK(14, 0)
-#define IDPF_RX_BI_GEN_S		15
-#define IDPF_RX_BI_GEN_M		BIT(IDPF_RX_BI_GEN_S)
+#define IDPF_RX_BI_GEN_M		BIT(16)
+#define IDPF_RX_BI_BUFID_M		GENMASK(15, 0)
+
 #define IDPF_RXD_EOF_SPLITQ		VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M
 #define IDPF_RXD_EOF_SINGLEQ		VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M

@ -122,7 +123,7 @@ do {								\
 	((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \
 	(txq)->next_to_clean - (txq)->next_to_use - 1)

-#define IDPF_TX_BUF_RSV_UNUSED(txq)	((txq)->buf_stack.top)
+#define IDPF_TX_BUF_RSV_UNUSED(txq)	((txq)->stash->buf_stack.top)
 #define IDPF_TX_BUF_RSV_LOW(txq)	(IDPF_TX_BUF_RSV_UNUSED(txq) < \
 					 (txq)->desc_count >> 2)

@ -433,23 +434,37 @@ struct idpf_rx_ptype_decoded {
 *		      to 1 and knows that reading a gen bit of 1 in any
 *		      descriptor on the initial pass of the ring indicates a
 *		      writeback. It also flips on every ring wrap.
- * @__IDPF_RFLQ_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW bit
- *			 and RFLGQ_GEN is the SW bit.
+ * @__IDPF_Q_RFL_GEN_CHK: Refill queues are SW only, so Q_GEN acts as the HW
+ *			  bit and Q_RFL_GEN is the SW bit.
 * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
 * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
 * @__IDPF_Q_POLL_MODE: Enable poll mode
+ * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode
+ * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq)
 * @__IDPF_Q_FLAGS_NBITS: Must be last
 */
 enum idpf_queue_flags_t {
 	__IDPF_Q_GEN_CHK,
-	__IDPF_RFLQ_GEN_CHK,
+	__IDPF_Q_RFL_GEN_CHK,
 	__IDPF_Q_FLOW_SCH_EN,
 	__IDPF_Q_SW_MARKER,
 	__IDPF_Q_POLL_MODE,
+	__IDPF_Q_CRC_EN,
+	__IDPF_Q_HSPLIT_EN,

 	__IDPF_Q_FLAGS_NBITS,
 };

+#define idpf_queue_set(f, q)		__set_bit(__IDPF_Q_##f, (q)->flags)
+#define idpf_queue_clear(f, q)		__clear_bit(__IDPF_Q_##f, (q)->flags)
+#define idpf_queue_change(f, q)		__change_bit(__IDPF_Q_##f, (q)->flags)
+#define idpf_queue_has(f, q)		test_bit(__IDPF_Q_##f, (q)->flags)
+
+#define idpf_queue_has_clear(f, q)			\
+	__test_and_clear_bit(__IDPF_Q_##f, (q)->flags)
+#define idpf_queue_assign(f, q, v)			\
+	__assign_bit(__IDPF_Q_##f, (q)->flags, v)
+
 /**
 * struct idpf_vec_regs
 * @dyn_ctl_reg: Dynamic control interrupt register offset
@ -495,7 +510,9 @@ struct idpf_intr_reg {
 * @v_idx: Vector index
 * @intr_reg: See struct idpf_intr_reg
 * @num_txq: Number of TX queues
+ * @num_complq: number of completion queues
 * @tx: Array of TX queues to service
+ * @complq: array of completion queues
 * @tx_dim: Data for TX net_dim algorithm
 * @tx_itr_value: TX interrupt throttling rate
 * @tx_intr_mode: Dynamic ITR or not
@ -519,21 +536,24 @@ struct idpf_q_vector {
 	struct idpf_intr_reg intr_reg;

 	u16 num_txq;
-	struct idpf_queue **tx;
+	u16 num_complq;
+	struct idpf_tx_queue **tx;
+	struct idpf_compl_queue **complq;
+
 	struct dim tx_dim;
 	u16 tx_itr_value;
 	bool tx_intr_mode;
 	u32 tx_itr_idx;

 	u16 num_rxq;
-	struct idpf_queue **rx;
+	struct idpf_rx_queue **rx;
 	struct dim rx_dim;
 	u16 rx_itr_value;
 	bool rx_intr_mode;
 	u32 rx_itr_idx;

 	u16 num_bufq;
-	struct idpf_queue **bufq;
+	struct idpf_buf_queue **bufq;

 	u16 total_events;
 	char *name;
@ -564,11 +584,6 @@ struct idpf_cleaned_stats {
 	u32 bytes;
 };

-union idpf_queue_stats {
-	struct idpf_rx_queue_stats rx;
-	struct idpf_tx_queue_stats tx;
-};
-
 #define IDPF_ITR_DYNAMIC	1
 #define IDPF_ITR_MAX		0x1FE0
 #define IDPF_ITR_20K		0x0032
@ -584,39 +599,114 @@ union idpf_queue_stats {
 #define IDPF_DIM_DEFAULT_PROFILE_IX		1

 /**
- * struct idpf_queue
- * @dev: Device back pointer for DMA mapping
- * @vport: Back pointer to associated vport
- * @txq_grp: See struct idpf_txq_group
- * @rxq_grp: See struct idpf_rxq_group
- * @idx: For buffer queue, it is used as group id, either 0 or 1. On clean,
- *	 buffer queue uses this index to determine which group of refill queues
- *	 to clean.
- *	 For TX queue, it is used as index to map between TX queue group and
- *	 hot path TX pointers stored in vport. Used in both singleq/splitq.
- *	 For RX queue, it is used to index to total RX queue across groups and
- *	 used for skb reporting.
- * @tail: Tail offset. Used for both queue models single and split. In splitq
- *	  model relevant only for TX queue and RX queue.
- * @tx_buf: See struct idpf_tx_buf
- * @rx_buf: Struct with RX buffer related members
- * @rx_buf.buf: See struct idpf_rx_buf
- * @rx_buf.hdr_buf_pa: DMA handle
- * @rx_buf.hdr_buf_va: Virtual address
- * @pp: Page pool pointer
- * @skb: Pointer to the skb
- * @q_type: Queue type (TX, RX, TX completion, RX buffer)
- * @q_id: Queue id
- * @desc_count: Number of descriptors
- * @next_to_use: Next descriptor to use. Relevant in both split & single txq
- *		 and bufq.
- * @next_to_clean: Next descriptor to clean. In split queue model, only
- *		   relevant to TX completion queue and RX queue.
- * @next_to_alloc: RX buffer to allocate at. Used only for RX. In splitq model
- *		   only relevant to RX queue.
+ * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode
+ * @buf_stack: Stack of empty buffers to store buffer info for out of order
+ *	       buffer completions. See struct idpf_buf_lifo
+ * @sched_buf_hash: Hash table to store buffers
+ */
+struct idpf_txq_stash {
+	struct idpf_buf_lifo buf_stack;
+	DECLARE_HASHTABLE(sched_buf_hash, 12);
+} ____cacheline_aligned;
+
+/**
+ * struct idpf_rx_queue - software structure representing a receive queue
+ * @rx: universal receive descriptor array
+ * @single_buf: buffer descriptor array in singleq
+ * @desc_ring: virtual descriptor ring address
+ * @bufq_sets: Pointer to the array of buffer queues in splitq mode
+ * @napi: NAPI instance corresponding to this queue (splitq)
+ * @rx_buf: See struct idpf_rx_buf
+ * @pp: Page pool pointer in singleq mode
+ * @netdev: &net_device corresponding to this queue
+ * @tail: Tail offset. Used for both queue models single and split.
 * @flags: See enum idpf_queue_flags_t
- * @q_stats: See union idpf_queue_stats
+ * @idx: For RX queue, it is used to index to total RX queue across groups and
+ *	 used for skb reporting.
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use
+ * @next_to_clean: Next descriptor to clean
+ * @next_to_alloc: RX buffer to allocate at
+ * @rxdids: Supported RX descriptor ids
+ * @rx_ptype_lkup: LUT of Rx ptypes
+ * @skb: Pointer to the skb
 * @stats_sync: See struct u64_stats_sync
+ * @q_stats: See union idpf_rx_queue_stats
+ * @q_id: Queue id
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @q_vector: Backreference to associated vector
+ * @rx_buffer_low_watermark: RX buffer low watermark
+ * @rx_hbuf_size: Header buffer size
+ * @rx_buf_size: Buffer size
+ * @rx_max_pkt_size: RX max packet size
+ */
+struct idpf_rx_queue {
+	union {
+		union virtchnl2_rx_desc *rx;
+		struct virtchnl2_singleq_rx_buf_desc *single_buf;
+
+		void *desc_ring;
+	};
+	union {
+		struct {
+			struct idpf_bufq_set *bufq_sets;
+			struct napi_struct *napi;
+		};
+		struct {
+			struct idpf_rx_buf *rx_buf;
+			struct page_pool *pp;
+		};
+	};
+	struct net_device *netdev;
+	void __iomem *tail;
+
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+	u16 idx;
+	u16 desc_count;
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+
+	u32 rxdids;
+
+	const struct idpf_rx_ptype_decoded *rx_ptype_lkup;
+	struct sk_buff *skb;
+
+	struct u64_stats_sync stats_sync;
+	struct idpf_rx_queue_stats q_stats;
+
+	/* Slowpath */
+	u32 q_id;
+	u32 size;
+	dma_addr_t dma;
+
+	struct idpf_q_vector *q_vector;
+
+	u16 rx_buffer_low_watermark;
+	u16 rx_hbuf_size;
+	u16 rx_buf_size;
+	u16 rx_max_pkt_size;
+} ____cacheline_aligned;
+
+/**
+ * struct idpf_tx_queue - software structure representing a transmit queue
+ * @base_tx: base Tx descriptor array
+ * @base_ctx: base Tx context descriptor array
+ * @flex_tx: flex Tx descriptor array
+ * @flex_ctx: flex Tx context descriptor array
+ * @desc_ring: virtual descriptor ring address
+ * @tx_buf: See struct idpf_tx_buf
+ * @txq_grp: See struct idpf_txq_group
+ * @dev: Device back pointer for DMA mapping
+ * @tail: Tail offset. Used for both queue models single and split
+ * @flags: See enum idpf_queue_flags_t
+ * @idx: For TX queue, it is used as index to map between TX queue group and
+ *	 hot path TX pointers stored in vport. Used in both singleq/splitq.
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use
+ * @next_to_clean: Next descriptor to clean
+ * @netdev: &net_device corresponding to this queue
 * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on
 *		   the TX completion queue, it can be for any TXQ associated
 *		   with that completion queue. This means we can clean up to
@ -625,34 +715,10 @@ union idpf_queue_stats {
 *		   that single call to clean the completion queue. By doing so,
 *		   we can update BQL with aggregate cleaned stats for each TXQ
 *		   only once at the end of the cleaning routine.
+ * @clean_budget: singleq only, queue cleaning budget
 * @cleaned_pkts: Number of packets cleaned for the above said case
- * @rx_hsplit_en: RX headsplit enable
- * @rx_hbuf_size: Header buffer size
- * @rx_buf_size: Buffer size
- * @rx_max_pkt_size: RX max packet size
- * @rx_buf_stride: RX buffer stride
- * @rx_buffer_low_watermark: RX buffer low watermark
- * @rxdids: Supported RX descriptor ids
- * @q_vector: Backreference to associated vector
- * @size: Length of descriptor ring in bytes
- * @dma: Physical address of ring
- * @rx: universal receive descriptor array
- * @single_buf: Rx buffer descriptor array in singleq
- * @split_buf: Rx buffer descriptor array in splitq
- * @base_tx: basic Tx descriptor array
- * @base_ctx: basic Tx context descriptor array
- * @flex_tx: flex Tx descriptor array
- * @flex_ctx: flex Tx context descriptor array
- * @comp: completion descriptor array
- * @desc_ring: virtual descriptor ring address
 * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather
 * @tx_min_pkt_len: Min supported packet length
- * @num_completions: Only relevant for TX completion queue. It tracks the
- *		     number of completions received to compare against the
- *		     number of completions pending, as accumulated by the
- *		     TX queues.
- * @buf_stack: Stack of empty buffers to store buffer info for out of order
- *	       buffer completions. See struct idpf_buf_lifo.
 * @compl_tag_bufid_m: Completion tag buffer id mask
 * @compl_tag_gen_s: Completion tag generation bit
 *	The format of the completion tag will change based on the TXQ
@ -676,74 +742,44 @@ union idpf_queue_stats {
 *	This gives us 8*8160 = 65280 possible unique values.
 * @compl_tag_cur_gen: Used to keep track of current completion tag generation
 * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset
- * @sched_buf_hash: Hash table to stores buffers
+ * @stash: Tx buffer stash for Flow-based scheduling mode
+ * @stats_sync: See struct u64_stats_sync
+ * @q_stats: See union idpf_tx_queue_stats
+ * @q_id: Queue id
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @q_vector: Backreference to associated vector
 */
-struct idpf_queue {
-	struct device *dev;
-	struct idpf_vport *vport;
+struct idpf_tx_queue {
 	union {
-		struct idpf_txq_group *txq_grp;
-		struct idpf_rxq_group *rxq_grp;
-	};
-	u16 idx;
-	void __iomem *tail;
-	union {
-		struct idpf_tx_buf *tx_buf;
-		struct {
-			struct idpf_rx_buf *buf;
-			dma_addr_t hdr_buf_pa;
-			void *hdr_buf_va;
-		} rx_buf;
-	};
-	struct page_pool *pp;
-	struct sk_buff *skb;
-	u16 q_type;
-	u32 q_id;
-	u16 desc_count;
-
-	u16 next_to_use;
-	u16 next_to_clean;
-	u16 next_to_alloc;
-	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
-
-	union idpf_queue_stats q_stats;
-	struct u64_stats_sync stats_sync;
-
-	u32 cleaned_bytes;
-	u16 cleaned_pkts;
-
-	bool rx_hsplit_en;
-	u16 rx_hbuf_size;
-	u16 rx_buf_size;
-	u16 rx_max_pkt_size;
-	u16 rx_buf_stride;
-	u8 rx_buffer_low_watermark;
-	u64 rxdids;
-	struct idpf_q_vector *q_vector;
-	unsigned int size;
-	dma_addr_t dma;
-	union {
-		union virtchnl2_rx_desc *rx;
-
-		struct virtchnl2_singleq_rx_buf_desc *single_buf;
-		struct virtchnl2_splitq_rx_buf_desc *split_buf;
-
 		struct idpf_base_tx_desc *base_tx;
 		struct idpf_base_tx_ctx_desc *base_ctx;
 		union idpf_tx_flex_desc *flex_tx;
 		struct idpf_flex_tx_ctx_desc *flex_ctx;

-		struct idpf_splitq_tx_compl_desc *comp;
-
 		void *desc_ring;
 	};
+	struct idpf_tx_buf *tx_buf;
+	struct idpf_txq_group *txq_grp;
+	struct device *dev;
+	void __iomem *tail;
+
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+	u16 idx;
+	u16 desc_count;
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	struct net_device *netdev;
+
+	union {
+		u32 cleaned_bytes;
+		u32 clean_budget;
+	};
+	u16 cleaned_pkts;

 	u16 tx_max_bufs;
-	u8 tx_min_pkt_len;
-
-	u32 num_completions;
-
-	struct idpf_buf_lifo buf_stack;
+	u16 tx_min_pkt_len;

 	u16 compl_tag_bufid_m;
 	u16 compl_tag_gen_s;
@ -751,45 +787,143 @@ struct idpf_queue {
 	u16 compl_tag_cur_gen;
 	u16 compl_tag_gen_max;

-	DECLARE_HASHTABLE(sched_buf_hash, 12);
-} ____cacheline_internodealigned_in_smp;
+	struct idpf_txq_stash *stash;
+
+	struct u64_stats_sync stats_sync;
+	struct idpf_tx_queue_stats q_stats;
+
+	/* Slowpath */
+	u32 q_id;
+	u32 size;
+	dma_addr_t dma;
+
+	struct idpf_q_vector *q_vector;
+} ____cacheline_aligned;
+
+/**
+ * struct idpf_buf_queue - software structure representing a buffer queue
+ * @split_buf: buffer descriptor array
+ * @rx_buf: Struct with RX buffer related members
+ * @rx_buf.buf: See struct idpf_rx_buf
+ * @rx_buf.hdr_buf_pa: DMA handle
+ * @rx_buf.hdr_buf_va: Virtual address
+ * @pp: Page pool pointer
+ * @tail: Tail offset
+ * @flags: See enum idpf_queue_flags_t
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use
+ * @next_to_clean: Next descriptor to clean
+ * @next_to_alloc: RX buffer to allocate at
+ * @q_id: Queue id
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @q_vector: Backreference to associated vector
+ * @rx_buffer_low_watermark: RX buffer low watermark
+ * @rx_hbuf_size: Header buffer size
+ * @rx_buf_size: Buffer size
+ */
+struct idpf_buf_queue {
+	struct virtchnl2_splitq_rx_buf_desc *split_buf;
+	struct {
+		struct idpf_rx_buf *buf;
+		dma_addr_t hdr_buf_pa;
+		void *hdr_buf_va;
+	} rx_buf;
+	struct page_pool *pp;
+	void __iomem *tail;
+
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+	u16 desc_count;
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+
+	/* Slowpath */
+	u32 q_id;
+	u32 size;
+	dma_addr_t dma;
+
+	struct idpf_q_vector *q_vector;
+
+	u16 rx_buffer_low_watermark;
+	u16 rx_hbuf_size;
+	u16 rx_buf_size;
+} ____cacheline_aligned;
+
+/**
+ * struct idpf_compl_queue - software structure representing a completion queue
+ * @comp: completion descriptor array
+ * @txq_grp: See struct idpf_txq_group
+ * @flags: See enum idpf_queue_flags_t
+ * @desc_count: Number of descriptors
+ * @next_to_use: Next descriptor to use. Relevant in both split & single txq
+ *		 and bufq.
+ * @next_to_clean: Next descriptor to clean
+ * @netdev: &net_device corresponding to this queue
+ * @clean_budget: queue cleaning budget
+ * @num_completions: Only relevant for TX completion queue. It tracks the
+ *		     number of completions received to compare against the
+ *		     number of completions pending, as accumulated by the
+ *		     TX queues.
+ * @q_id: Queue id
+ * @size: Length of descriptor ring in bytes
+ * @dma: Physical address of ring
+ * @q_vector: Backreference to associated vector
+ */
+struct idpf_compl_queue {
+	struct idpf_splitq_tx_compl_desc *comp;
+	struct idpf_txq_group *txq_grp;
+
+	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
+	u16 desc_count;
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	struct net_device *netdev;
+	u32 clean_budget;
+	u32 num_completions;
+
+	/* Slowpath */
+	u32 q_id;
+	u32 size;
+	dma_addr_t dma;
+
+	struct idpf_q_vector *q_vector;
+} ____cacheline_aligned;

 /**
 * struct idpf_sw_queue
- * @next_to_clean: Next descriptor to clean
- * @next_to_alloc: Buffer to allocate at
- * @flags: See enum idpf_queue_flags_t
 * @ring: Pointer to the ring
+ * @flags: See enum idpf_queue_flags_t
 * @desc_count: Descriptor count
- * @dev: Device back pointer for DMA mapping
+ * @next_to_use: Buffer to allocate at
+ * @next_to_clean: Next descriptor to clean
 *
 * Software queues are used in splitq mode to manage buffers between rxq
 * producer and the bufq consumer.  These are required in order to maintain a
 * lockless buffer management system and are strictly software only constructs.
 */
 struct idpf_sw_queue {
-	u16 next_to_clean;
-	u16 next_to_alloc;
+	u32 *ring;
+
 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
-	u16 *ring;
 	u16 desc_count;
-	struct device *dev;
-} ____cacheline_internodealigned_in_smp;
+	u16 next_to_use;
+	u16 next_to_clean;
+} ____cacheline_aligned;

 /**
 * struct idpf_rxq_set
 * @rxq: RX queue
- * @refillq0: Pointer to refill queue 0
- * @refillq1: Pointer to refill queue 1
+ * @refillq: pointers to refill queues
 *
 * Splitq only.  idpf_rxq_set associates an rxq with at an array of refillqs.
 * Each rxq needs a refillq to return used buffers back to the respective bufq.
 * Bufqs then clean these refillqs for buffers to give to hardware.
 */
 struct idpf_rxq_set {
-	struct idpf_queue rxq;
-	struct idpf_sw_queue *refillq0;
-	struct idpf_sw_queue *refillq1;
+	struct idpf_rx_queue rxq;
+	struct idpf_sw_queue *refillq[IDPF_MAX_BUFQS_PER_RXQ_GRP];
 };

 /**
@ -808,7 +942,7 @@ struct idpf_rxq_set {
 * managed by at most two bufqs (depending on performance configuration).
 */
 struct idpf_bufq_set {
-	struct idpf_queue bufq;
+	struct idpf_buf_queue bufq;
 	int num_refillqs;
 	struct idpf_sw_queue *refillqs;
 };
@ -834,7 +968,7 @@ struct idpf_rxq_group {
 	union {
 		struct {
 			u16 num_rxq;
-			struct idpf_queue *rxqs[IDPF_LARGE_MAX_Q];
+			struct idpf_rx_queue *rxqs[IDPF_LARGE_MAX_Q];
 		} singleq;
 		struct {
 			u16 num_rxq_sets;
@ -849,6 +983,7 @@ struct idpf_rxq_group {
 * @vport: Vport back pointer
 * @num_txq: Number of TX queues associated
 * @txqs: Array of TX queue pointers
+ * @stashes: array of OOO stashes for the queues
 * @complq: Associated completion queue pointer, split queue only
 * @num_completions_pending: Total number of completions pending for the
 *			     completion queue, acculumated for all TX queues
@ -862,9 +997,10 @@ struct idpf_txq_group {
 	struct idpf_vport *vport;

 	u16 num_txq;
-	struct idpf_queue *txqs[IDPF_LARGE_MAX_Q];
+	struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q];
+	struct idpf_txq_stash *stashes;

-	struct idpf_queue *complq;
+	struct idpf_compl_queue *complq;

 	u32 num_completions_pending;
 };
@ -1001,28 +1137,26 @@ void idpf_deinit_rss(struct idpf_vport *vport);
 int idpf_rx_bufs_init_all(struct idpf_vport *vport);
 void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
 		      unsigned int size);
-struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
+struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
 				      struct idpf_rx_buf *rx_buf,
 				      unsigned int size);
-bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
-void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
-void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
+void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
 			   bool xmit_more);
 unsigned int idpf_size_to_txd_count(unsigned int size);
-netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb);
-void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
+netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb);
+void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb,
 			   struct idpf_tx_buf *first, u16 ring_idx);
-unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
+unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq,
 					 struct sk_buff *skb);
 bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
 			unsigned int count);
-int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size);
+int idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q, unsigned int size);
 void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
 				 struct net_device *netdev);
 netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
 				  struct net_device *netdev);
-bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
+bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
 				      u16 cleaned_count);
 int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);

--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@ -750,7 +750,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport)
 	int i;

 	for (i = 0; i < vport->num_txq; i++)
-		set_bit(__IDPF_Q_SW_MARKER, vport->txqs[i]->flags);
+		idpf_queue_set(SW_MARKER, vport->txqs[i]);

 	event = wait_event_timeout(vport->sw_marker_wq,
 				   test_and_clear_bit(IDPF_VPORT_SW_MARKER,
@ -758,7 +758,7 @@ static int idpf_wait_for_marker_event(struct idpf_vport *vport)
 				   msecs_to_jiffies(500));

 	for (i = 0; i < vport->num_txq; i++)
-		clear_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+		idpf_queue_clear(POLL_MODE, vport->txqs[i]);

 	if (event)
 		return 0;
@ -1092,7 +1092,6 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
 				 int num_regs, u32 q_type)
 {
 	struct idpf_adapter *adapter = vport->adapter;
-	struct idpf_queue *q;
 	int i, j, k = 0;

 	switch (q_type) {
@ -1111,6 +1110,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
 			u16 num_rxq = rx_qgrp->singleq.num_rxq;

 			for (j = 0; j < num_rxq && k < num_regs; j++, k++) {
+				struct idpf_rx_queue *q;
+
 				q = rx_qgrp->singleq.rxqs[j];
 				q->tail = idpf_get_reg_addr(adapter,
 							    reg_vals[k]);
@ -1123,6 +1124,8 @@ static int __idpf_queue_reg_init(struct idpf_vport *vport, u32 *reg_vals,
 			u8 num_bufqs = vport->num_bufqs_per_qgrp;

 			for (j = 0; j < num_bufqs && k < num_regs; j++, k++) {
+				struct idpf_buf_queue *q;
+
 				q = &rx_qgrp->splitq.bufq_sets[j].bufq;
 				q->tail = idpf_get_reg_addr(adapter,
 							    reg_vals[k]);
@ -1449,19 +1452,19 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)
 			qi[k].model =
 				cpu_to_le16(vport->txq_model);
 			qi[k].type =
-				cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
 			qi[k].ring_len =
 				cpu_to_le16(tx_qgrp->txqs[j]->desc_count);
 			qi[k].dma_ring_addr =
 				cpu_to_le64(tx_qgrp->txqs[j]->dma);
 			if (idpf_is_queue_model_split(vport->txq_model)) {
-				struct idpf_queue *q = tx_qgrp->txqs[j];
+				struct idpf_tx_queue *q = tx_qgrp->txqs[j];

 				qi[k].tx_compl_queue_id =
 					cpu_to_le16(tx_qgrp->complq->q_id);
 				qi[k].relative_queue_id = cpu_to_le16(j);

-				if (test_bit(__IDPF_Q_FLOW_SCH_EN, q->flags))
+				if (idpf_queue_has(FLOW_SCH_EN, q))
 					qi[k].sched_mode =
 					cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW);
 				else
@ -1478,11 +1481,11 @@ static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport)

 		qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
 		qi[k].model = cpu_to_le16(vport->txq_model);
-		qi[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+		qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION);
 		qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count);
 		qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma);

-		if (test_bit(__IDPF_Q_FLOW_SCH_EN, tx_qgrp->complq->flags))
+		if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq))
 			sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
 		else
 			sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
@ -1567,17 +1570,18 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
 			goto setup_rxqs;

 		for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
-			struct idpf_queue *bufq =
+			struct idpf_buf_queue *bufq =
 				&rx_qgrp->splitq.bufq_sets[j].bufq;

 			qi[k].queue_id = cpu_to_le32(bufq->q_id);
 			qi[k].model = cpu_to_le16(vport->rxq_model);
-			qi[k].type = cpu_to_le32(bufq->q_type);
+			qi[k].type =
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
 			qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M);
 			qi[k].ring_len = cpu_to_le16(bufq->desc_count);
 			qi[k].dma_ring_addr = cpu_to_le64(bufq->dma);
 			qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size);
-			qi[k].buffer_notif_stride = bufq->rx_buf_stride;
+			qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE;
 			qi[k].rx_buffer_low_watermark =
 				cpu_to_le16(bufq->rx_buffer_low_watermark);
 			if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW))
@ -1591,7 +1595,7 @@ setup_rxqs:
 			num_rxq = rx_qgrp->singleq.num_rxq;

 		for (j = 0; j < num_rxq; j++, k++) {
-			struct idpf_queue *rxq;
+			struct idpf_rx_queue *rxq;

 			if (!idpf_is_queue_model_split(vport->rxq_model)) {
 				rxq = rx_qgrp->singleq.rxqs[j];
@ -1599,11 +1603,11 @@ setup_rxqs:
 			}
 			rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
 			qi[k].rx_bufq1_id =
-			  cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[0].bufq.q_id);
+			  cpu_to_le16(rxq->bufq_sets[0].bufq.q_id);
 			if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
 				qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
 				qi[k].rx_bufq2_id =
-				  cpu_to_le16(rxq->rxq_grp->splitq.bufq_sets[1].bufq.q_id);
+				  cpu_to_le16(rxq->bufq_sets[1].bufq.q_id);
 			}
 			qi[k].rx_buffer_low_watermark =
 				cpu_to_le16(rxq->rx_buffer_low_watermark);
@ -1611,7 +1615,7 @@ setup_rxqs:
 				qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC);

 common_qi_fields:
-			if (rxq->rx_hsplit_en) {
+			if (idpf_queue_has(HSPLIT_EN, rxq)) {
 				qi[k].qflags |=
 					cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT);
 				qi[k].hdr_buffer_size =
@ -1619,7 +1623,7 @@ common_qi_fields:
 			}
 			qi[k].queue_id = cpu_to_le32(rxq->q_id);
 			qi[k].model = cpu_to_le16(vport->rxq_model);
-			qi[k].type = cpu_to_le32(rxq->q_type);
+			qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
 			qi[k].ring_len = cpu_to_le16(rxq->desc_count);
 			qi[k].dma_ring_addr = cpu_to_le64(rxq->dma);
 			qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size);
@ -1706,7 +1710,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
 		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];

 		for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
-			qc[k].type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+			qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
 			qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);
 			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
 		}
@ -1720,7 +1724,7 @@ static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena)
 	for (i = 0; i < vport->num_txq_grp; i++, k++) {
 		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];

-		qc[k].type = cpu_to_le32(tx_qgrp->complq->q_type);
+		qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION);
 		qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id);
 		qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
 	}
@ -1741,12 +1745,12 @@ setup_rx:
 				qc[k].start_queue_id =
 				cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id);
 				qc[k].type =
-				cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_type);
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
 			} else {
 				qc[k].start_queue_id =
 				cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id);
 				qc[k].type =
-				cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_type);
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
 			}
 			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
 		}
@ -1761,10 +1765,11 @@ setup_rx:
 		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];

 		for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) {
-			struct idpf_queue *q;
+			const struct idpf_buf_queue *q;

 			q = &rx_qgrp->splitq.bufq_sets[j].bufq;
-			qc[k].type = cpu_to_le32(q->q_type);
+			qc[k].type =
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER);
 			qc[k].start_queue_id = cpu_to_le32(q->q_id);
 			qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK);
 		}
@ -1849,7 +1854,8 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
 		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];

 		for (j = 0; j < tx_qgrp->num_txq; j++, k++) {
-			vqv[k].queue_type = cpu_to_le32(tx_qgrp->txqs[j]->q_type);
+			vqv[k].queue_type =
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX);
 			vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id);

 			if (idpf_is_queue_model_split(vport->txq_model)) {
@ -1879,14 +1885,15 @@ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map)
 			num_rxq = rx_qgrp->singleq.num_rxq;

 		for (j = 0; j < num_rxq; j++, k++) {
-			struct idpf_queue *rxq;
+			struct idpf_rx_queue *rxq;

 			if (idpf_is_queue_model_split(vport->rxq_model))
 				rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
 			else
 				rxq = rx_qgrp->singleq.rxqs[j];

-			vqv[k].queue_type = cpu_to_le32(rxq->q_type);
+			vqv[k].queue_type =
+				cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX);
 			vqv[k].queue_id = cpu_to_le32(rxq->q_id);
 			vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx);
 			vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx);
@ -1975,7 +1982,7 @@ int idpf_send_disable_queues_msg(struct idpf_vport *vport)
 	 * queues virtchnl message is sent
 	 */
 	for (i = 0; i < vport->num_txq; i++)
-		set_bit(__IDPF_Q_POLL_MODE, vport->txqs[i]->flags);
+		idpf_queue_set(POLL_MODE, vport->txqs[i]);

 	/* schedule the napi to receive all the marker packets */
 	local_bh_disable();
@ -3242,7 +3249,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
 				       int num_qids,
 				       u32 q_type)
 {
-	struct idpf_queue *q;
 	int i, j, k = 0;

 	switch (q_type) {
@ -3250,11 +3256,8 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
 		for (i = 0; i < vport->num_txq_grp; i++) {
 			struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];

-			for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++) {
+			for (j = 0; j < tx_qgrp->num_txq && k < num_qids; j++, k++)
 				tx_qgrp->txqs[j]->q_id = qids[k];
-				tx_qgrp->txqs[j]->q_type =
-					VIRTCHNL2_QUEUE_TYPE_TX;
-			}
 		}
 		break;
 	case VIRTCHNL2_QUEUE_TYPE_RX:
@ -3268,12 +3271,13 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
 				num_rxq = rx_qgrp->singleq.num_rxq;

 			for (j = 0; j < num_rxq && k < num_qids; j++, k++) {
+				struct idpf_rx_queue *q;
+
 				if (idpf_is_queue_model_split(vport->rxq_model))
 					q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
 				else
 					q = rx_qgrp->singleq.rxqs[j];
 				q->q_id = qids[k];
-				q->q_type = VIRTCHNL2_QUEUE_TYPE_RX;
 			}
 		}
 		break;
@ -3282,8 +3286,6 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
 			struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];

 			tx_qgrp->complq->q_id = qids[k];
-			tx_qgrp->complq->q_type =
-				VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
 		}
 		break;
 	case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
@ -3292,9 +3294,10 @@ static int __idpf_vport_queue_ids_init(struct idpf_vport *vport,
 			u8 num_bufqs = vport->num_bufqs_per_qgrp;

 			for (j = 0; j < num_bufqs && k < num_qids; j++, k++) {
+				struct idpf_buf_queue *q;
+
 				q = &rx_qgrp->splitq.bufq_sets[j].bufq;
 				q->q_id = qids[k];
-				q->q_type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
 			}
 		}
 		break;