kmod-sched-cake: drop out of tree package, use kernel version

CAKE made it to kernel 4.19 and since OpenWrt now at kernel 4.19 we can
drop the out of tree cake package in base repository.

Add kmod-sched-cake to netsupport so package dependencies are still met.
Similarly CAKE is retained as an optional qdisc module to avoid base
scheduler package size implications.

Backport upstream patches from k5.1 to address some small bugs and
support fwmark usage.

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
This commit is contained in:
Kevin Darbyshire-Bryant 2019-07-03 09:22:55 +01:00
parent ea4e1dac71
commit 5c094ff660
6 changed files with 503 additions and 42 deletions

View File

@ -1,42 +0,0 @@
#
# Copyright (C) 2016 LEDE
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
#
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=sched-cake
PKG_RELEASE:=1
PKG_SOURCE_PROTO:=git
PKG_SOURCE_URL:=https://github.com/dtaht/sch_cake.git
PKG_SOURCE_DATE:=2019-03-12
PKG_SOURCE_VERSION:=057c738801e9dc64e8dd72a3fc4f50734214433c
PKG_MIRROR_HASH:=5bf06a804824db36ae393fc174aeec7b12633176e05a765c0931b39df5bd34df
PKG_MAINTAINER:=Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
include $(INCLUDE_DIR)/package.mk
define KernelPackage/sched-cake
SUBMENU:=Network Support
TITLE:=Cake fq_codel/blue derived shaper
URL:=https://github.com/dtaht/sch_cake
FILES:=$(PKG_BUILD_DIR)/sch_cake.ko
AUTOLOAD:=$(call AutoLoad,75,sch_cake)
DEPENDS:=+kmod-ipt-conntrack
endef
include $(INCLUDE_DIR)/kernel-defaults.mk
define KernelPackage/sched-cake/description
Common Applications Kept Enhanced fq_codel/blue derived shaper
endef
define Build/Compile
$(KERNEL_MAKE) SUBDIRS="$(PKG_BUILD_DIR)" modules
endef
$(eval $(call KernelPackage,sched-cake))

View File

@ -789,6 +789,22 @@ endef
$(eval $(call KernelPackage,sched-core))
define KernelPackage/sched-cake
SUBMENU:=$(NETWORK_SUPPORT_MENU)
TITLE:=Cake fq_codel/blue derived shaper
DEPENDS:=+kmod-sched-core
KCONFIG:=CONFIG_NET_SCH_CAKE
FILES:=$(LINUX_DIR)/net/sched/sch_cake.ko
AUTOLOAD:=$(call AutoProbe,sch_cake)
DEPENDS:=+kmod-ipt-conntrack
endef
define KernelPackage/sched-cake/description
Common Applications Kept Enhanced fq_codel/blue derived shaper
endef
$(eval $(call KernelPackage,sched-cake))
define KernelPackage/sched-flower
SUBMENU:=$(NETWORK_SUPPORT_MENU)
TITLE:=Flower traffic classifier

View File

@ -0,0 +1,50 @@
From 8c6c37fdc20ec9ffaa342f827a8e20afe736fb0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 9 Jan 2019 17:09:44 +0100
Subject: [PATCH] sch_cake: Correctly update parent qlen when splitting GSO
packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
To ensure parent qdiscs have the same notion of the number of enqueued
packets even after splitting a GSO packet, update the qdisc tree with the
number of packets that was added due to the split.
Reported-by: Pete Heist <pete@heistp.net>
Tested-by: Pete Heist <pete@heistp.net>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
net/sched/sch_cake.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1666,7 +1666,7 @@ static s32 cake_enqueue(struct sk_buff *
if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
- unsigned int slen = 0;
+ unsigned int slen = 0, numsegs = 0;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
if (IS_ERR_OR_NULL(segs))
@@ -1682,6 +1682,7 @@ static s32 cake_enqueue(struct sk_buff *
flow_queue_add(flow, segs);
sch->q.qlen++;
+ numsegs++;
slen += segs->len;
q->buffer_used += segs->truesize;
b->packets++;
@@ -1695,7 +1696,7 @@ static s32 cake_enqueue(struct sk_buff *
sch->qstats.backlog += slen;
q->avg_window_bytes += slen;
- qdisc_tree_reduce_backlog(sch, 1, len);
+ qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen);
consume_skb(skb);
} else {
/* not splitting */

View File

@ -0,0 +1,217 @@
From 712639929912c5eefb09facccb48d55b3f72c9f8 Mon Sep 17 00:00:00 2001
From: George Amanakis <gamanakis@gmail.com>
Date: Fri, 1 Mar 2019 16:04:05 +0100
Subject: [PATCH] sch_cake: Make the dual modes fairer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
CAKE host fairness does not work well with TCP flows in dual-srchost and
dual-dsthost setup. The reason is that ACKs generated by TCP flows are
classified as sparse flows, and affect flow isolation from other hosts. Fix
this by calculating host_load based only on the bulk flows a host
generates. In a hash collision the host_bulk_flow_count values must be
decremented on the old hosts and incremented on the new ones *if* the queue
is in the bulk set.
Reported-by: Pete Heist <peteheist@gmail.com>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
net/sched/sch_cake.c | 92 ++++++++++++++++++++++++++++++--------------
1 file changed, 63 insertions(+), 29 deletions(-)
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -138,8 +138,8 @@ struct cake_flow {
struct cake_host {
u32 srchost_tag;
u32 dsthost_tag;
- u16 srchost_refcnt;
- u16 dsthost_refcnt;
+ u16 srchost_bulk_flow_count;
+ u16 dsthost_bulk_flow_count;
};
struct cake_heap_entry {
@@ -746,8 +746,10 @@ skip_hash:
* queue, accept the collision, update the host tags.
*/
q->way_collisions++;
- q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ }
allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode);
found:
@@ -767,13 +769,14 @@ found:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].srchost_refcnt)
+ if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].srchost_tag = srchost_hash;
found_src:
srchost_idx = outer_hash + k;
- q->hosts[srchost_idx].srchost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[srchost_idx].srchost_bulk_flow_count++;
q->flows[reduced_hash].srchost = srchost_idx;
}
@@ -789,13 +792,14 @@ found_src:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].dsthost_refcnt)
+ if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
found_dst:
dsthost_idx = outer_hash + k;
- q->hosts[dsthost_idx].dsthost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
q->flows[reduced_hash].dsthost = dsthost_idx;
}
}
@@ -1793,20 +1797,30 @@ static s32 cake_enqueue(struct sk_buff *
b->sparse_flow_count++;
if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
flow->deficit = (b->flow_quantum *
quantum_div[host_load]) >> 16;
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+ struct cake_host *srchost = &b->hosts[flow->srchost];
+ struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
*/
flow->set = CAKE_SET_BULK;
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
}
if (q->buffer_used > q->buffer_max_used)
@@ -1974,23 +1988,8 @@ retry:
dsthost = &b->hosts[flow->dsthost];
host_load = 1;
- if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
-
- if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
-
- WARN_ON(host_load > CAKE_QUEUES);
-
/* flow isolation (DRR++) */
if (flow->deficit <= 0) {
- /* The shifted prandom_u32() is a way to apply dithering to
- * avoid accumulating roundoff errors
- */
- flow->deficit += (b->flow_quantum * quantum_div[host_load] +
- (prandom_u32() >> 16)) >> 16;
- list_move_tail(&flow->flowchain, &b->old_flows);
-
/* Keep all flows with deficits out of the sparse and decaying
* rotations. No non-empty flow can go into the decaying
* rotation, so they can't get deficits
@@ -1999,6 +1998,13 @@ retry:
if (flow->head) {
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
flow->set = CAKE_SET_BULK;
} else {
/* we've moved it to the bulk rotation for
@@ -2008,6 +2014,22 @@ retry:
flow->set = CAKE_SET_SPARSE_WAIT;
}
}
+
+ if (cake_dsrc(q->flow_mode))
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+ if (cake_ddst(q->flow_mode))
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+ WARN_ON(host_load > CAKE_QUEUES);
+
+ /* The shifted prandom_u32() is a way to apply dithering to
+ * avoid accumulating roundoff errors
+ */
+ flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+ (prandom_u32() >> 16)) >> 16;
+ list_move_tail(&flow->flowchain, &b->old_flows);
+
goto retry;
}
@@ -2028,6 +2050,13 @@ retry:
&b->decaying_flows);
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT) {
@@ -2041,14 +2070,19 @@ retry:
if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT)
b->sparse_flow_count--;
- else if (flow->set == CAKE_SET_BULK)
+ else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- else
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
+ } else
b->decaying_flow_count--;
flow->set = CAKE_SET_NONE;
- srchost->srchost_refcnt--;
- dsthost->dsthost_refcnt--;
}
goto begin;
}

View File

@ -0,0 +1,118 @@
From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Fri, 1 Mar 2019 16:04:05 +0100
Subject: [PATCH] sch_cake: Permit use of connmarks as tin classifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add flag 'FWMARK' to enable use of firewall connmarks as tin selector.
The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie.
for diffserv3 the mark needs to be 1->3.
Background
Typically CAKE uses DSCP as the basis for tin selection. DSCP values
are relatively easily changed as part of the egress path, usually with
iptables & the mangle table, ingress is more challenging. CAKE is often
used on the WAN interface of a residential gateway where passthrough of
DSCP from the ISP is either missing or set to unhelpful values thus use
of ingress DSCP values for tin selection isn't helpful in that
environment.
An approach to solving the ingress tin selection problem is to use
CAKE's understanding of tc filters. Naive tc filters could match on
source/destination port numbers and force tin selection that way, but
multiple filters don't scale particularly well as each filter must be
traversed whether it matches or not. e.g. a simple example to map 3
firewall marks to tins:
MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1
tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2
tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3
Another option is to use eBPF cls_act with tc filters e.g.
MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o
This has the disadvantages of a) needing someone to write & maintain
the bpf program, b) a bpf toolchain to compile it and c) needing to
hardcode the major number in the bpf program so it matches the cake
instance (or forcing the cake instance to a particular major number)
since the major number cannot be passed to the bpf program via tc
command line.
As already hinted at by the previous examples, it would be helpful
to associate tins with something that survives the Internet path and
ideally allows tin selection on both egress and ingress. Netfilter's
conntrack permits setting an identifying mark on a connection which
can also be restored to an ingress packet with tc action connmark e.g.
tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \
match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1
Since tc's connmark action has restored any connmark into skb->mark,
any of the previous solutions are based upon it and in one form or
another copy that mark to the skb->priority field where again CAKE
picks this up.
This change cuts out at least one of the (less intuitive &
non-scalable) middlemen and permit direct access to skb->mark.
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
include/uapi/linux/pkt_sched.h | 1 +
net/sched/sch_cake.c | 34 +++++++++++++++++++++++++++-------
2 files changed, 28 insertions(+), 7 deletions(-)
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -991,6 +991,7 @@ enum {
TCA_CAKE_INGRESS,
TCA_CAKE_ACK_FILTER,
TCA_CAKE_SPLIT_GSO,
+ TCA_CAKE_FWMARK,
__TCA_CAKE_MAX
};
#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -258,7 +258,8 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3),
- CAKE_FLAG_SPLIT_GSO = BIT(4)
+ CAKE_FLAG_SPLIT_GSO = BIT(4),
+ CAKE_FLAG_FWMARK = BIT(5)
};
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -2623,6 +2624,13 @@ static int cake_change(struct Qdisc *sch
q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
}
+ if (tb[TCA_CAKE_FWMARK]) {
+ if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
+ q->rate_flags |= CAKE_FLAG_FWMARK;
+ else
+ q->rate_flags &= ~CAKE_FLAG_FWMARK;
+ }
+
if (q->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2782,6 +2790,10 @@ static int cake_dump(struct Qdisc *sch,
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK,
+ !!(q->rate_flags & CAKE_FLAG_FWMARK)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:

View File

@ -0,0 +1,102 @@
From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 14 Mar 2019 23:08:22 +0100
Subject: [PATCH] sch_cake: Interpret fwmark parameter as a bitmask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We initially interpreted the fwmark parameter as a flag that simply turned
on the feature, using the whole skb->mark field as the index into the CAKE
tin_order array. However, it is quite common for different applications to
use different parts of the mask field for their own purposes, each using a
different mask.
Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK
parameter as a bitmask to apply to the fwmark field when reading it. The
result will be right-shifted by the number of unset lower bits of the mask
before looking up the tin.
In the original commit message we also failed to credit Felix Resch with
originally suggesting the fwmark feature back in 2017; so the Suggested-By
in this commit covers the whole fwmark feature.
Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers")
Suggested-by: Felix Resch <fuller@beif.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
---
net/sched/sch_cake.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -211,6 +211,9 @@ struct cake_sched_data {
u8 ack_filter;
u8 atm_mode;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
u16 rate_shft;
ktime_t time_next_packet;
@@ -258,8 +261,7 @@ enum {
CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
CAKE_FLAG_INGRESS = BIT(2),
CAKE_FLAG_WASH = BIT(3),
- CAKE_FLAG_SPLIT_GSO = BIT(4),
- CAKE_FLAG_FWMARK = BIT(5)
+ CAKE_FLAG_SPLIT_GSO = BIT(4)
};
/* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -1554,7 +1556,7 @@ static struct cake_tin_data *cake_select
struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
- u32 tin;
+ u32 tin, mark;
u8 dscp;
/* Tin selection: Default to diffserv-based selection, allow overriding
@@ -1562,6 +1564,7 @@ static struct cake_tin_data *cake_select
*/
dscp = cake_handle_diffserv(skb,
q->rate_flags & CAKE_FLAG_WASH);
+ mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
tin = 0;
@@ -2178,6 +2181,7 @@ static const struct nla_policy cake_poli
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
@@ -2625,10 +2629,8 @@ static int cake_change(struct Qdisc *sch
}
if (tb[TCA_CAKE_FWMARK]) {
- if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
- q->rate_flags |= CAKE_FLAG_FWMARK;
- else
- q->rate_flags &= ~CAKE_FLAG_FWMARK;
+ q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
+ q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
}
if (q->tins) {
@@ -2790,8 +2792,7 @@ static int cake_dump(struct Qdisc *sch,
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_CAKE_FWMARK,
- !!(q->rate_flags & CAKE_FLAG_FWMARK)))
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
goto nla_put_failure;
return nla_nest_end(skb, opts);