mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 00:20:52 -05:00
e3ae1f96ac
Recent changes that removed rtnl dependency from rules update path of tc
also made tcf_block_put() function sleeping. This function is called from
ops->destroy() of several Qdisc implementations, which in turn is called by
qdisc_put(). Some Qdiscs call qdisc_put() while holding sch tree spinlock,
which results sleeping-while-atomic BUG.
Steps to reproduce for sfb:
tc qdisc add dev ens1f0 handle 1: root sfb
tc qdisc add dev ens1f0 parent 1:10 handle 50: sfq perturb 10
tc qdisc change dev ens1f0 root handle 1: sfb
Resulting dmesg:
[ 7265.938717] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:909
[ 7265.940152] in_atomic(): 1, irqs_disabled(): 0, pid: 28579, name: tc
[ 7265.941455] INFO: lockdep is turned off.
[ 7265.942744] CPU: 11 PID: 28579 Comm: tc Tainted: G W 5.3.0-rc8+ #721
[ 7265.944065] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 7265.945396] Call Trace:
[ 7265.946709] dump_stack+0x85/0xc0
[ 7265.947994] ___might_sleep.cold+0xac/0xbc
[ 7265.949282] __mutex_lock+0x5b/0x960
[ 7265.950543] ? tcf_chain0_head_change_cb_del.isra.0+0x1b/0xf0
[ 7265.951803] ? tcf_chain0_head_change_cb_del.isra.0+0x1b/0xf0
[ 7265.953022] tcf_chain0_head_change_cb_del.isra.0+0x1b/0xf0
[ 7265.954248] tcf_block_put_ext.part.0+0x21/0x50
[ 7265.955478] tcf_block_put+0x50/0x70
[ 7265.956694] sfq_destroy+0x15/0x50 [sch_sfq]
[ 7265.957898] qdisc_destroy+0x5f/0x160
[ 7265.959099] sfb_change+0x175/0x330 [sch_sfb]
[ 7265.960304] tc_modify_qdisc+0x324/0x840
[ 7265.961503] rtnetlink_rcv_msg+0x170/0x4b0
[ 7265.962692] ? netlink_deliver_tap+0x95/0x400
[ 7265.963876] ? rtnl_dellink+0x2d0/0x2d0
[ 7265.965064] netlink_rcv_skb+0x49/0x110
[ 7265.966251] netlink_unicast+0x171/0x200
[ 7265.967427] netlink_sendmsg+0x224/0x3f0
[ 7265.968595] sock_sendmsg+0x5e/0x60
[ 7265.969753] ___sys_sendmsg+0x2ae/0x330
[ 7265.970916] ? ___sys_recvmsg+0x159/0x1f0
[ 7265.972074] ? do_wp_page+0x9c/0x790
[ 7265.973233] ? __handle_mm_fault+0xcd3/0x19e0
[ 7265.974407] __sys_sendmsg+0x59/0xa0
[ 7265.975591] do_syscall_64+0x5c/0xb0
[ 7265.976753] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 7265.977938] RIP: 0033:0x7f229069f7b8
[ 7265.979117] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 89 5
4
[ 7265.981681] RSP: 002b:00007ffd7ed2d158 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 7265.983001] RAX: ffffffffffffffda RBX: 000000005d813ca1 RCX: 00007f229069f7b8
[ 7265.984336] RDX: 0000000000000000 RSI: 00007ffd7ed2d1c0 RDI: 0000000000000003
[ 7265.985682] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000165c9a0
[ 7265.987021] R10: 0000000000404eda R11: 0000000000000246 R12: 0000000000000001
[ 7265.988309] R13: 000000000047f640 R14: 0000000000000000 R15: 0000000000000000
In sfb_change() function use qdisc_purge_queue() instead of
qdisc_tree_flush_backlog() to properly reset old child Qdisc and save
pointer to it into local temporary variable. Put reference to Qdisc after
sch tree lock is released in order not to call potentially sleeping cls API
in atomic section. This is safe to do because Qdisc has already been reset
by qdisc_purge_queue() inside sch tree lock critical section.
Reported-by: syzbot+ac54455281db908c581e@syzkaller.appspotmail.com
Fixes: c266f64dbf
("net: sched: protect block state with mutex")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
730 lines
17 KiB
C
730 lines
17 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* net/sched/sch_sfb.c Stochastic Fair Blue
|
|
*
|
|
* Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
|
|
* Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
|
|
*
|
|
* W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
|
|
* A New Class of Active Queue Management Algorithms.
|
|
* U. Michigan CSE-TR-387-99, April 1999.
|
|
*
|
|
* http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/random.h>
|
|
#include <linux/jhash.h>
|
|
#include <net/ip.h>
|
|
#include <net/pkt_sched.h>
|
|
#include <net/pkt_cls.h>
|
|
#include <net/inet_ecn.h>
|
|
|
|
/*
|
|
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
|
|
* This implementation uses L = 8 and N = 16
|
|
* This permits us to split one 32bit hash (provided per packet by rxhash or
|
|
* external classifier) into 8 subhashes of 4 bits.
|
|
*/
|
|
#define SFB_BUCKET_SHIFT 4
|
|
#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */
|
|
#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
|
|
#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */
|
|
|
|
/* SFB algo uses a virtual queue, named "bin" */
|
|
struct sfb_bucket {
|
|
u16 qlen; /* length of virtual queue */
|
|
u16 p_mark; /* marking probability */
|
|
};
|
|
|
|
/* We use a double buffering right before hash change
|
|
* (Section 4.4 of SFB reference : moving hash functions)
|
|
*/
|
|
struct sfb_bins {
|
|
u32 perturbation; /* jhash perturbation */
|
|
struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
|
|
};
|
|
|
|
struct sfb_sched_data {
|
|
struct Qdisc *qdisc;
|
|
struct tcf_proto __rcu *filter_list;
|
|
struct tcf_block *block;
|
|
unsigned long rehash_interval;
|
|
unsigned long warmup_time; /* double buffering warmup time in jiffies */
|
|
u32 max;
|
|
u32 bin_size; /* maximum queue length per bin */
|
|
u32 increment; /* d1 */
|
|
u32 decrement; /* d2 */
|
|
u32 limit; /* HARD maximal queue length */
|
|
u32 penalty_rate;
|
|
u32 penalty_burst;
|
|
u32 tokens_avail;
|
|
unsigned long rehash_time;
|
|
unsigned long token_time;
|
|
|
|
u8 slot; /* current active bins (0 or 1) */
|
|
bool double_buffering;
|
|
struct sfb_bins bins[2];
|
|
|
|
struct {
|
|
u32 earlydrop;
|
|
u32 penaltydrop;
|
|
u32 bucketdrop;
|
|
u32 queuedrop;
|
|
u32 childdrop; /* drops in child qdisc */
|
|
u32 marked; /* ECN mark */
|
|
} stats;
|
|
};
|
|
|
|
/*
|
|
* Each queued skb might be hashed on one or two bins
|
|
* We store in skb_cb the two hash values.
|
|
* (A zero value means double buffering was not used)
|
|
*/
|
|
struct sfb_skb_cb {
|
|
u32 hashes[2];
|
|
};
|
|
|
|
static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
|
|
{
|
|
qdisc_cb_private_validate(skb, sizeof(struct sfb_skb_cb));
|
|
return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
|
|
}
|
|
|
|
/*
|
|
* If using 'internal' SFB flow classifier, hash comes from skb rxhash
|
|
* If using external classifier, hash comes from the classid.
|
|
*/
|
|
static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
|
|
{
|
|
return sfb_skb_cb(skb)->hashes[slot];
|
|
}
|
|
|
|
/* Probabilities are coded as Q0.16 fixed-point values,
|
|
* with 0xFFFF representing 65535/65536 (almost 1.0)
|
|
* Addition and subtraction are saturating in [0, 65535]
|
|
*/
|
|
static u32 prob_plus(u32 p1, u32 p2)
|
|
{
|
|
u32 res = p1 + p2;
|
|
|
|
return min_t(u32, res, SFB_MAX_PROB);
|
|
}
|
|
|
|
static u32 prob_minus(u32 p1, u32 p2)
|
|
{
|
|
return p1 > p2 ? p1 - p2 : 0;
|
|
}
|
|
|
|
static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
|
|
{
|
|
int i;
|
|
struct sfb_bucket *b = &q->bins[slot].bins[0][0];
|
|
|
|
for (i = 0; i < SFB_LEVELS; i++) {
|
|
u32 hash = sfbhash & SFB_BUCKET_MASK;
|
|
|
|
sfbhash >>= SFB_BUCKET_SHIFT;
|
|
if (b[hash].qlen < 0xFFFF)
|
|
b[hash].qlen++;
|
|
b += SFB_NUMBUCKETS; /* next level */
|
|
}
|
|
}
|
|
|
|
static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
|
|
{
|
|
u32 sfbhash;
|
|
|
|
sfbhash = sfb_hash(skb, 0);
|
|
if (sfbhash)
|
|
increment_one_qlen(sfbhash, 0, q);
|
|
|
|
sfbhash = sfb_hash(skb, 1);
|
|
if (sfbhash)
|
|
increment_one_qlen(sfbhash, 1, q);
|
|
}
|
|
|
|
static void decrement_one_qlen(u32 sfbhash, u32 slot,
|
|
struct sfb_sched_data *q)
|
|
{
|
|
int i;
|
|
struct sfb_bucket *b = &q->bins[slot].bins[0][0];
|
|
|
|
for (i = 0; i < SFB_LEVELS; i++) {
|
|
u32 hash = sfbhash & SFB_BUCKET_MASK;
|
|
|
|
sfbhash >>= SFB_BUCKET_SHIFT;
|
|
if (b[hash].qlen > 0)
|
|
b[hash].qlen--;
|
|
b += SFB_NUMBUCKETS; /* next level */
|
|
}
|
|
}
|
|
|
|
static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
|
|
{
|
|
u32 sfbhash;
|
|
|
|
sfbhash = sfb_hash(skb, 0);
|
|
if (sfbhash)
|
|
decrement_one_qlen(sfbhash, 0, q);
|
|
|
|
sfbhash = sfb_hash(skb, 1);
|
|
if (sfbhash)
|
|
decrement_one_qlen(sfbhash, 1, q);
|
|
}
|
|
|
|
static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
|
|
{
|
|
b->p_mark = prob_minus(b->p_mark, q->decrement);
|
|
}
|
|
|
|
static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
|
|
{
|
|
b->p_mark = prob_plus(b->p_mark, q->increment);
|
|
}
|
|
|
|
static void sfb_zero_all_buckets(struct sfb_sched_data *q)
|
|
{
|
|
memset(&q->bins, 0, sizeof(q->bins));
|
|
}
|
|
|
|
/*
|
|
* compute max qlen, max p_mark, and avg p_mark
|
|
*/
|
|
static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
|
|
{
|
|
int i;
|
|
u32 qlen = 0, prob = 0, totalpm = 0;
|
|
const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
|
|
|
|
for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
|
|
if (qlen < b->qlen)
|
|
qlen = b->qlen;
|
|
totalpm += b->p_mark;
|
|
if (prob < b->p_mark)
|
|
prob = b->p_mark;
|
|
b++;
|
|
}
|
|
*prob_r = prob;
|
|
*avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
|
|
return qlen;
|
|
}
|
|
|
|
|
|
static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
|
|
{
|
|
q->bins[slot].perturbation = prandom_u32();
|
|
}
|
|
|
|
static void sfb_swap_slot(struct sfb_sched_data *q)
|
|
{
|
|
sfb_init_perturbation(q->slot, q);
|
|
q->slot ^= 1;
|
|
q->double_buffering = false;
|
|
}
|
|
|
|
/* Non elastic flows are allowed to use part of the bandwidth, expressed
|
|
* in "penalty_rate" packets per second, with "penalty_burst" burst
|
|
*/
|
|
static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
|
|
{
|
|
if (q->penalty_rate == 0 || q->penalty_burst == 0)
|
|
return true;
|
|
|
|
if (q->tokens_avail < 1) {
|
|
unsigned long age = min(10UL * HZ, jiffies - q->token_time);
|
|
|
|
q->tokens_avail = (age * q->penalty_rate) / HZ;
|
|
if (q->tokens_avail > q->penalty_burst)
|
|
q->tokens_avail = q->penalty_burst;
|
|
q->token_time = jiffies;
|
|
if (q->tokens_avail < 1)
|
|
return true;
|
|
}
|
|
|
|
q->tokens_avail--;
|
|
return false;
|
|
}
|
|
|
|
static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
|
|
int *qerr, u32 *salt)
|
|
{
|
|
struct tcf_result res;
|
|
int result;
|
|
|
|
result = tcf_classify(skb, fl, &res, false);
|
|
if (result >= 0) {
|
|
#ifdef CONFIG_NET_CLS_ACT
|
|
switch (result) {
|
|
case TC_ACT_STOLEN:
|
|
case TC_ACT_QUEUED:
|
|
case TC_ACT_TRAP:
|
|
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
|
|
/* fall through */
|
|
case TC_ACT_SHOT:
|
|
return false;
|
|
}
|
|
#endif
|
|
*salt = TC_H_MIN(res.classid);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
|
struct sk_buff **to_free)
|
|
{
|
|
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *child = q->qdisc;
|
|
struct tcf_proto *fl;
|
|
int i;
|
|
u32 p_min = ~0;
|
|
u32 minqlen = ~0;
|
|
u32 r, sfbhash;
|
|
u32 slot = q->slot;
|
|
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
|
|
|
|
if (unlikely(sch->q.qlen >= q->limit)) {
|
|
qdisc_qstats_overlimit(sch);
|
|
q->stats.queuedrop++;
|
|
goto drop;
|
|
}
|
|
|
|
if (q->rehash_interval > 0) {
|
|
unsigned long limit = q->rehash_time + q->rehash_interval;
|
|
|
|
if (unlikely(time_after(jiffies, limit))) {
|
|
sfb_swap_slot(q);
|
|
q->rehash_time = jiffies;
|
|
} else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
|
|
time_after(jiffies, limit - q->warmup_time))) {
|
|
q->double_buffering = true;
|
|
}
|
|
}
|
|
|
|
fl = rcu_dereference_bh(q->filter_list);
|
|
if (fl) {
|
|
u32 salt;
|
|
|
|
/* If using external classifiers, get result and record it. */
|
|
if (!sfb_classify(skb, fl, &ret, &salt))
|
|
goto other_drop;
|
|
sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
|
|
} else {
|
|
sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
|
|
}
|
|
|
|
|
|
if (!sfbhash)
|
|
sfbhash = 1;
|
|
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
|
|
|
|
for (i = 0; i < SFB_LEVELS; i++) {
|
|
u32 hash = sfbhash & SFB_BUCKET_MASK;
|
|
struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
|
|
|
|
sfbhash >>= SFB_BUCKET_SHIFT;
|
|
if (b->qlen == 0)
|
|
decrement_prob(b, q);
|
|
else if (b->qlen >= q->bin_size)
|
|
increment_prob(b, q);
|
|
if (minqlen > b->qlen)
|
|
minqlen = b->qlen;
|
|
if (p_min > b->p_mark)
|
|
p_min = b->p_mark;
|
|
}
|
|
|
|
slot ^= 1;
|
|
sfb_skb_cb(skb)->hashes[slot] = 0;
|
|
|
|
if (unlikely(minqlen >= q->max)) {
|
|
qdisc_qstats_overlimit(sch);
|
|
q->stats.bucketdrop++;
|
|
goto drop;
|
|
}
|
|
|
|
if (unlikely(p_min >= SFB_MAX_PROB)) {
|
|
/* Inelastic flow */
|
|
if (q->double_buffering) {
|
|
sfbhash = skb_get_hash_perturb(skb,
|
|
q->bins[slot].perturbation);
|
|
if (!sfbhash)
|
|
sfbhash = 1;
|
|
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
|
|
|
|
for (i = 0; i < SFB_LEVELS; i++) {
|
|
u32 hash = sfbhash & SFB_BUCKET_MASK;
|
|
struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
|
|
|
|
sfbhash >>= SFB_BUCKET_SHIFT;
|
|
if (b->qlen == 0)
|
|
decrement_prob(b, q);
|
|
else if (b->qlen >= q->bin_size)
|
|
increment_prob(b, q);
|
|
}
|
|
}
|
|
if (sfb_rate_limit(skb, q)) {
|
|
qdisc_qstats_overlimit(sch);
|
|
q->stats.penaltydrop++;
|
|
goto drop;
|
|
}
|
|
goto enqueue;
|
|
}
|
|
|
|
r = prandom_u32() & SFB_MAX_PROB;
|
|
|
|
if (unlikely(r < p_min)) {
|
|
if (unlikely(p_min > SFB_MAX_PROB / 2)) {
|
|
/* If we're marking that many packets, then either
|
|
* this flow is unresponsive, or we're badly congested.
|
|
* In either case, we want to start dropping packets.
|
|
*/
|
|
if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
|
|
q->stats.earlydrop++;
|
|
goto drop;
|
|
}
|
|
}
|
|
if (INET_ECN_set_ce(skb)) {
|
|
q->stats.marked++;
|
|
} else {
|
|
q->stats.earlydrop++;
|
|
goto drop;
|
|
}
|
|
}
|
|
|
|
enqueue:
|
|
ret = qdisc_enqueue(skb, child, to_free);
|
|
if (likely(ret == NET_XMIT_SUCCESS)) {
|
|
qdisc_qstats_backlog_inc(sch, skb);
|
|
sch->q.qlen++;
|
|
increment_qlen(skb, q);
|
|
} else if (net_xmit_drop_count(ret)) {
|
|
q->stats.childdrop++;
|
|
qdisc_qstats_drop(sch);
|
|
}
|
|
return ret;
|
|
|
|
drop:
|
|
qdisc_drop(skb, sch, to_free);
|
|
return NET_XMIT_CN;
|
|
other_drop:
|
|
if (ret & __NET_XMIT_BYPASS)
|
|
qdisc_qstats_drop(sch);
|
|
kfree_skb(skb);
|
|
return ret;
|
|
}
|
|
|
|
static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *child = q->qdisc;
|
|
struct sk_buff *skb;
|
|
|
|
skb = child->dequeue(q->qdisc);
|
|
|
|
if (skb) {
|
|
qdisc_bstats_update(sch, skb);
|
|
qdisc_qstats_backlog_dec(sch, skb);
|
|
sch->q.qlen--;
|
|
decrement_qlen(skb, q);
|
|
}
|
|
|
|
return skb;
|
|
}
|
|
|
|
static struct sk_buff *sfb_peek(struct Qdisc *sch)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *child = q->qdisc;
|
|
|
|
return child->ops->peek(child);
|
|
}
|
|
|
|
/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
|
|
|
|
static void sfb_reset(struct Qdisc *sch)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
|
|
qdisc_reset(q->qdisc);
|
|
sch->qstats.backlog = 0;
|
|
sch->q.qlen = 0;
|
|
q->slot = 0;
|
|
q->double_buffering = false;
|
|
sfb_zero_all_buckets(q);
|
|
sfb_init_perturbation(0, q);
|
|
}
|
|
|
|
static void sfb_destroy(struct Qdisc *sch)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
|
|
tcf_block_put(q->block);
|
|
qdisc_put(q->qdisc);
|
|
}
|
|
|
|
static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
|
|
[TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) },
|
|
};
|
|
|
|
static const struct tc_sfb_qopt sfb_default_ops = {
|
|
.rehash_interval = 600 * MSEC_PER_SEC,
|
|
.warmup_time = 60 * MSEC_PER_SEC,
|
|
.limit = 0,
|
|
.max = 25,
|
|
.bin_size = 20,
|
|
.increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
|
|
.decrement = (SFB_MAX_PROB + 3000) / 6000,
|
|
.penalty_rate = 10,
|
|
.penalty_burst = 20,
|
|
};
|
|
|
|
static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct Qdisc *child, *old;
|
|
struct nlattr *tb[TCA_SFB_MAX + 1];
|
|
const struct tc_sfb_qopt *ctl = &sfb_default_ops;
|
|
u32 limit;
|
|
int err;
|
|
|
|
if (opt) {
|
|
err = nla_parse_nested_deprecated(tb, TCA_SFB_MAX, opt,
|
|
sfb_policy, NULL);
|
|
if (err < 0)
|
|
return -EINVAL;
|
|
|
|
if (tb[TCA_SFB_PARMS] == NULL)
|
|
return -EINVAL;
|
|
|
|
ctl = nla_data(tb[TCA_SFB_PARMS]);
|
|
}
|
|
|
|
limit = ctl->limit;
|
|
if (limit == 0)
|
|
limit = qdisc_dev(sch)->tx_queue_len;
|
|
|
|
child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit, extack);
|
|
if (IS_ERR(child))
|
|
return PTR_ERR(child);
|
|
|
|
if (child != &noop_qdisc)
|
|
qdisc_hash_add(child, true);
|
|
sch_tree_lock(sch);
|
|
|
|
qdisc_purge_queue(q->qdisc);
|
|
old = q->qdisc;
|
|
q->qdisc = child;
|
|
|
|
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
|
|
q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
|
|
q->rehash_time = jiffies;
|
|
q->limit = limit;
|
|
q->increment = ctl->increment;
|
|
q->decrement = ctl->decrement;
|
|
q->max = ctl->max;
|
|
q->bin_size = ctl->bin_size;
|
|
q->penalty_rate = ctl->penalty_rate;
|
|
q->penalty_burst = ctl->penalty_burst;
|
|
q->tokens_avail = ctl->penalty_burst;
|
|
q->token_time = jiffies;
|
|
|
|
q->slot = 0;
|
|
q->double_buffering = false;
|
|
sfb_zero_all_buckets(q);
|
|
sfb_init_perturbation(0, q);
|
|
sfb_init_perturbation(1, q);
|
|
|
|
sch_tree_unlock(sch);
|
|
qdisc_put(old);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int sfb_init(struct Qdisc *sch, struct nlattr *opt,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
int err;
|
|
|
|
err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
|
|
if (err)
|
|
return err;
|
|
|
|
q->qdisc = &noop_qdisc;
|
|
return sfb_change(sch, opt, extack);
|
|
}
|
|
|
|
static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct nlattr *opts;
|
|
struct tc_sfb_qopt opt = {
|
|
.rehash_interval = jiffies_to_msecs(q->rehash_interval),
|
|
.warmup_time = jiffies_to_msecs(q->warmup_time),
|
|
.limit = q->limit,
|
|
.max = q->max,
|
|
.bin_size = q->bin_size,
|
|
.increment = q->increment,
|
|
.decrement = q->decrement,
|
|
.penalty_rate = q->penalty_rate,
|
|
.penalty_burst = q->penalty_burst,
|
|
};
|
|
|
|
sch->qstats.backlog = q->qdisc->qstats.backlog;
|
|
opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
|
|
if (opts == NULL)
|
|
goto nla_put_failure;
|
|
if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))
|
|
goto nla_put_failure;
|
|
return nla_nest_end(skb, opts);
|
|
|
|
nla_put_failure:
|
|
nla_nest_cancel(skb, opts);
|
|
return -EMSGSIZE;
|
|
}
|
|
|
|
static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
struct tc_sfb_xstats st = {
|
|
.earlydrop = q->stats.earlydrop,
|
|
.penaltydrop = q->stats.penaltydrop,
|
|
.bucketdrop = q->stats.bucketdrop,
|
|
.queuedrop = q->stats.queuedrop,
|
|
.childdrop = q->stats.childdrop,
|
|
.marked = q->stats.marked,
|
|
};
|
|
|
|
st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
|
|
|
|
return gnet_stats_copy_app(d, &st, sizeof(st));
|
|
}
|
|
|
|
static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
|
|
struct sk_buff *skb, struct tcmsg *tcm)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
|
|
struct Qdisc **old, struct netlink_ext_ack *extack)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
|
|
if (new == NULL)
|
|
new = &noop_qdisc;
|
|
|
|
*old = qdisc_replace(sch, new, &q->qdisc);
|
|
return 0;
|
|
}
|
|
|
|
static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
|
|
return q->qdisc;
|
|
}
|
|
|
|
static unsigned long sfb_find(struct Qdisc *sch, u32 classid)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static void sfb_unbind(struct Qdisc *sch, unsigned long arg)
|
|
{
|
|
}
|
|
|
|
static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
|
|
struct nlattr **tca, unsigned long *arg,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static int sfb_delete(struct Qdisc *sch, unsigned long cl)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
|
|
{
|
|
if (!walker->stop) {
|
|
if (walker->count >= walker->skip)
|
|
if (walker->fn(sch, 1, walker) < 0) {
|
|
walker->stop = 1;
|
|
return;
|
|
}
|
|
walker->count++;
|
|
}
|
|
}
|
|
|
|
static struct tcf_block *sfb_tcf_block(struct Qdisc *sch, unsigned long cl,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
struct sfb_sched_data *q = qdisc_priv(sch);
|
|
|
|
if (cl)
|
|
return NULL;
|
|
return q->block;
|
|
}
|
|
|
|
static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
|
|
u32 classid)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
|
|
static const struct Qdisc_class_ops sfb_class_ops = {
|
|
.graft = sfb_graft,
|
|
.leaf = sfb_leaf,
|
|
.find = sfb_find,
|
|
.change = sfb_change_class,
|
|
.delete = sfb_delete,
|
|
.walk = sfb_walk,
|
|
.tcf_block = sfb_tcf_block,
|
|
.bind_tcf = sfb_bind,
|
|
.unbind_tcf = sfb_unbind,
|
|
.dump = sfb_dump_class,
|
|
};
|
|
|
|
static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
|
|
.id = "sfb",
|
|
.priv_size = sizeof(struct sfb_sched_data),
|
|
.cl_ops = &sfb_class_ops,
|
|
.enqueue = sfb_enqueue,
|
|
.dequeue = sfb_dequeue,
|
|
.peek = sfb_peek,
|
|
.init = sfb_init,
|
|
.reset = sfb_reset,
|
|
.destroy = sfb_destroy,
|
|
.change = sfb_change,
|
|
.dump = sfb_dump,
|
|
.dump_stats = sfb_dump_stats,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
static int __init sfb_module_init(void)
|
|
{
|
|
return register_qdisc(&sfb_qdisc_ops);
|
|
}
|
|
|
|
static void __exit sfb_module_exit(void)
|
|
{
|
|
unregister_qdisc(&sfb_qdisc_ops);
|
|
}
|
|
|
|
module_init(sfb_module_init)
|
|
module_exit(sfb_module_exit)
|
|
|
|
MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
|
|
MODULE_AUTHOR("Juliusz Chroboczek");
|
|
MODULE_AUTHOR("Eric Dumazet");
|
|
MODULE_LICENSE("GPL");
|