1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00
linux/net/sched/sch_fifo.c
Quang Le 647cef20e6 pfifo_tail_enqueue: Drop new packet when sch->limit == 0
Expected behaviour:
In case we reach scheduler's limit, pfifo_tail_enqueue() will drop a
packet in scheduler's queue and decrease scheduler's qlen by one.
Then, pfifo_tail_enqueue() enqueue new packet and increase
scheduler's qlen by one. Finally, pfifo_tail_enqueue() return
`NET_XMIT_CN` status code.

Weird behaviour:
In case we set `sch->limit == 0` and trigger pfifo_tail_enqueue() on a
scheduler that has no packet, the 'drop a packet' step will do nothing.
This means the scheduler's qlen still has value equal 0.
Then, we continue to enqueue new packet and increase scheduler's qlen by
one. In summary, we can leverage pfifo_tail_enqueue() to increase qlen by
one and return `NET_XMIT_CN` status code.

The problem is:
Let's say we have two qdiscs: Qdisc_A and Qdisc_B.
 - Qdisc_A's type must have '->graft()' function to create parent/child relationship.
   Let's say Qdisc_A's type is `hfsc`. Enqueue packet to this qdisc will trigger `hfsc_enqueue`.
 - Qdisc_B's type is pfifo_head_drop. Enqueue packet to this qdisc will trigger `pfifo_tail_enqueue`.
 - Qdisc_B is configured to have `sch->limit == 0`.
 - Qdisc_A is configured to route the enqueued's packet to Qdisc_B.

Enqueue packet through Qdisc_A will lead to:
 - hfsc_enqueue(Qdisc_A) -> pfifo_tail_enqueue(Qdisc_B)
 - Qdisc_B->q.qlen += 1
 - pfifo_tail_enqueue() return `NET_XMIT_CN`
 - hfsc_enqueue() check for `NET_XMIT_SUCCESS` and see `NET_XMIT_CN` => hfsc_enqueue() don't increase qlen of Qdisc_A.

The whole process lead to a situation where Qdisc_A->q.qlen == 0 and Qdisc_B->q.qlen == 1.
Replace 'hfsc' with other type (for example: 'drr') still lead to the same problem.
This violate the design where parent's qlen should equal to the sum of its childrens'qlen.

Bug impact: This issue can be used for user->kernel privilege escalation when it is reachable.

Fixes: 57dbb2d83d ("sched: add head drop fifo queue")
Reported-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Quang Le <quanglex97@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Link: https://patch.msgid.link/20250204005841.223511-2-xiyou.wangcong@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-02-05 18:13:58 -08:00

276 lines
6.2 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/sch_fifo.c The simplest FIFO queue.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
/* 1 band FIFO pseudo-"scheduler" */
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int prev_backlog;
if (unlikely(READ_ONCE(sch->limit) == 0))
return qdisc_drop(skb, sch, to_free);
if (likely(sch->q.qlen < READ_ONCE(sch->limit)))
return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog;
/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q, to_free);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);
qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
return NET_XMIT_CN;
}
static void fifo_offload_init(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_fifo_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_FIFO_REPLACE;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
}
static void fifo_offload_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_fifo_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_FIFO_DESTROY;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
}
static int fifo_offload_dump(struct Qdisc *sch)
{
struct tc_fifo_qopt_offload qopt;
qopt.command = TC_FIFO_STATS;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.stats.bstats = &sch->bstats;
qopt.stats.qstats = &sch->qstats;
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_FIFO, &qopt);
}
static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
bool bypass;
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
WRITE_ONCE(sch->limit, limit);
} else {
struct tc_fifo_qopt *ctl = nla_data(opt);
if (nla_len(opt) < sizeof(*ctl))
return -EINVAL;
WRITE_ONCE(sch->limit, ctl->limit);
}
if (is_bfifo)
bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
else
bypass = sch->limit >= 1;
if (bypass)
sch->flags |= TCQ_F_CAN_BYPASS;
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
static int fifo_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
int err;
err = __fifo_init(sch, opt, extack);
if (err)
return err;
fifo_offload_init(sch);
return 0;
}
static int fifo_hd_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
return __fifo_init(sch, opt, extack);
}
static void fifo_destroy(struct Qdisc *sch)
{
fifo_offload_destroy(sch);
}
static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tc_fifo_qopt opt = { .limit = READ_ONCE(sch->limit) };
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
return skb->len;
nla_put_failure:
return -1;
}
static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
int err;
err = fifo_offload_dump(sch);
if (err)
return err;
return __fifo_dump(sch, skb);
}
static int fifo_hd_dump(struct Qdisc *sch, struct sk_buff *skb)
{
return __fifo_dump(sch, skb);
}
struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
.id = "pfifo",
.priv_size = 0,
.enqueue = pfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_init,
.destroy = fifo_destroy,
.reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(pfifo_qdisc_ops);
struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.id = "bfifo",
.priv_size = 0,
.enqueue = bfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_init,
.destroy = fifo_destroy,
.reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(bfifo_qdisc_ops);
struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
.id = "pfifo_head_drop",
.priv_size = 0,
.enqueue = pfifo_tail_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_hd_init,
.reset = qdisc_reset_queue,
.change = fifo_hd_init,
.dump = fifo_hd_dump,
.owner = THIS_MODULE,
};
/* Pass size change message down to embedded FIFO */
int fifo_set_limit(struct Qdisc *q, unsigned int limit)
{
struct nlattr *nla;
int ret = -ENOMEM;
/* Hack to avoid sending change message to non-FIFO */
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
if (!q->ops->change)
return 0;
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
ret = q->ops->change(q, nla, NULL);
kfree(nla);
}
return ret;
}
EXPORT_SYMBOL(fifo_set_limit);
struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
unsigned int limit,
struct netlink_ext_ack *extack)
{
struct Qdisc *q;
int err = -ENOMEM;
q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1),
extack);
if (q) {
err = fifo_set_limit(q, limit);
if (err < 0) {
qdisc_put(q);
q = NULL;
}
}
return q ? : ERR_PTR(err);
}
EXPORT_SYMBOL(fifo_create_dflt);
MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler");