remove useless packages

This commit is contained in:
CN_SZTL 2020-02-18 13:30:42 +08:00
parent fc06582d68
commit d2992749ac
No known key found for this signature in database
GPG Key ID: 6850B6345C862176
39 changed files with 0 additions and 14722 deletions

View File

@ -1,46 +0,0 @@
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=tcp-bbr-bbrplus
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
define KernelPackage/$(PKG_NAME)
SUBMENU:=Network Support
TITLE:=Modified bbr tcp congestion control
DEPENDS:=@LINUX_4_14
FILES:=$(PKG_BUILD_DIR)/tcp_bbr_bbrplus.ko
KCONFIG:=
endef
define KernelPackage/$(PKG_NAME)/description
Kernel module of modified BBR tcp congestion control
endef
EXTRA_KCONFIG:= \
EXTRA_CFLAGS:= \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=m,%,$(filter %=m,$(EXTRA_KCONFIG)))) \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=y,%,$(filter %=y,$(EXTRA_KCONFIG)))) \
MAKE_OPTS:= \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
$(EXTRA_KCONFIG)
define Build/Prepare
mkdir -p $(PKG_BUILD_DIR)
$(CP) ./src/* $(PKG_BUILD_DIR)/
endef
define Build/Compile
$(MAKE) -C "$(LINUX_DIR)" \
$(MAKE_OPTS) \
modules
endef
$(eval $(call KernelPackage,$(PKG_NAME)))

View File

@ -1 +0,0 @@
obj-m := tcp_bbr_bbrplus.o

File diff suppressed because it is too large Load Diff

View File

@ -1,47 +0,0 @@
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=tcp-bbr-mod
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
define KernelPackage/$(PKG_NAME)
SUBMENU:=Network Support
TITLE:=Modified bbr tcp congestion control
DEPENDS:=@LINUX_4_14
FILES:=$(PKG_BUILD_DIR)/tcp_bbr_mod.ko
AUTOLOAD:=$(call AutoLoad,99,tcp-bbr-mod)
KCONFIG:=
endef
define KernelPackage/$(PKG_NAME)/description
Kernel module of modified BBR tcp congestion control
endef
EXTRA_KCONFIG:= \
EXTRA_CFLAGS:= \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=m,%,$(filter %=m,$(EXTRA_KCONFIG)))) \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=y,%,$(filter %=y,$(EXTRA_KCONFIG)))) \
MAKE_OPTS:= \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
$(EXTRA_KCONFIG)
define Build/Prepare
mkdir -p $(PKG_BUILD_DIR)
$(CP) ./src/* $(PKG_BUILD_DIR)/
endef
define Build/Compile
$(MAKE) -C "$(LINUX_DIR)" \
$(MAKE_OPTS) \
modules
endef
$(eval $(call KernelPackage,$(PKG_NAME)))

View File

@ -1 +0,0 @@
obj-m := tcp_bbr_mod.o

View File

@ -1,962 +0,0 @@
/* Bottleneck Bandwidth and RTT (BBR) congestion control
*
* BBR congestion control computes the sending rate based on the delivery
* rate (throughput) estimated from ACKs. In a nutshell:
*
* On each ACK, update our model of the network path:
* bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
* min_rtt = windowed_min(rtt, 10 seconds)
* pacing_rate = pacing_gain * bottleneck_bandwidth
* cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
*
* The core algorithm does not react directly to packet losses or delays,
* although BBR may adjust the size of next send per ACK when loss is
* observed, or adjust the sending rate if it estimates there is a
* traffic policer, in order to keep the drop rate reasonable.
*
* Here is a state transition diagram for BBR:
*
* |
* V
* +---> STARTUP ----+
* | | |
* | V |
* | DRAIN ----+
* | | |
* | V |
* +---> PROBE_BW ----+
* | ^ | |
* | | | |
* | +----+ |
* | |
* +---- PROBE_RTT <--+
*
* A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
* When it estimates the pipe is full, it enters DRAIN to drain the queue.
* In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
* A long-lived BBR flow spends the vast majority of its time remaining
* (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
* in a fair manner, with a small, bounded queue. *If* a flow has been
* continuously sending for the entire min_rtt window, and hasn't seen an RTT
* sample that matches or decreases its min_rtt estimate for 10 seconds, then
* it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
* the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
* we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
* otherwise we enter STARTUP to try to fill the pipe.
*
* BBR is described in detail in:
* "BBR: Congestion-Based Congestion Control",
* Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
* Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
*
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
* NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
* otherwise TCP stack falls back to an internal pacing using one high
* resolution timer per TCP socket and may use more resources.
*/
#include <linux/module.h>
#include <net/tcp.h>
#include <linux/inet_diag.h>
#include <linux/inet.h>
#include <linux/random.h>
#include <linux/win_minmax.h>
/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
* estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
* This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
* Since the minimum window is >=4 packets, the lower bound isn't
* an issue. The upper bound isn't an issue with existing technologies.
*/
#define BW_SCALE 24
#define BW_UNIT (1 << BW_SCALE)
#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */
#define BBR_UNIT (1 << BBR_SCALE)
/* BBR has the following modes for deciding how fast to send: */
enum bbr_mode {
BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
BBR_DRAIN, /* drain any queue created during startup */
BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
};
/* BBR congestion control block */
struct bbr {
u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */
u32 min_rtt_stamp; /* timestamp of min_rtt_us */
u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
u64 cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
unused:5,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */
u32 lt_last_delivered; /* LT intvl start: tp->delivered */
u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_reached:1, /* reached full bw in Startup? */
full_bw_cnt:2, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */
/* Window length of bw filter (in rounds): */
static const int bbr_bw_rtts = CYCLE_LEN + 2;
/* Window length of min_rtt filter (in sec): */
static const u32 bbr_min_rtt_win_sec = 10;
/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
static const u32 bbr_probe_rtt_mode_ms = 100;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
* that will allow a smoothly increasing pacing rate that will double each RTT
* and send the same number of packets per RTT that an un-paced, slow-starting
* Reno or CUBIC flow would:
*/
static const int bbr_high_gain = BBR_UNIT * 3000 / 1000 + 1;
/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
* the queue created in BBR_STARTUP in a single round:
*/
static const int bbr_drain_gain = BBR_UNIT * 1000 / 3000;
/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */
static const int bbr_cwnd_gain = BBR_UNIT * 2;
/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
static const int bbr_pacing_gain[] = {
BBR_UNIT * 6 / 4, /* probe for more available bw */
BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */
BBR_UNIT * 5 / 4, BBR_UNIT * 5 / 4, BBR_UNIT * 5 / 4, /* cruise at 1.0*bw to utilize pipe, */
BBR_UNIT * 6 / 4, BBR_UNIT * 6 / 4, BBR_UNIT * 6 / 4 /* without creating excess queue... */
};
/* Randomize the starting gain cycling phase over N phases: */
static const u32 bbr_cycle_rand = 7;
/* Try to keep at least this many packets in flight, if things go smoothly. For
* smooth functioning, a sliding window protocol ACKing every other packet
* needs at least 4 packets in flight:
*/
static const u32 bbr_cwnd_min_target = 4;
/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
/* If bw has increased significantly (1.25x), there may be more bw available: */
static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
static const u32 bbr_full_bw_cnt = 3;
/* "long-term" ("LT") bandwidth estimator parameters... */
/* The minimum number of rounds in an LT bw sampling interval: */
static const u32 bbr_lt_intvl_min_rtts = 4;
/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
static const u32 bbr_lt_loss_thresh = 50;
/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
static const u32 bbr_lt_bw_ratio = BBR_UNIT / 4;
/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
static const u32 bbr_lt_bw_diff = 4000 / 4;
/* If we estimate we're policed, use lt_bw for this many round trips: */
static const u32 bbr_lt_bw_max_rtts = 48;
/* Do we estimate that STARTUP filled the pipe? */
static bool bbr_full_bw_reached(const struct sock *sk)
{
const struct bbr *bbr = inet_csk_ca(sk);
return bbr->full_bw_reached;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
static u32 bbr_max_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return minmax_get(&bbr->bw);
}
/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
static u32 bbr_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
}
/* Return rate in bytes per second, optionally with a gain.
* The order here is chosen carefully to avoid overflow of u64. This should
* work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
*/
static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
{
rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
rate *= gain;
rate >>= BBR_SCALE;
rate *= USEC_PER_SEC;
return rate >> BW_SCALE;
}
/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
{
u64 rate = bw;
rate = bbr_rate_bytes_per_sec(sk, rate, gain);
rate = min_t(u64, rate, sk->sk_max_pacing_rate);
return rate;
}
/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
u32 rtt_us;
if (tp->srtt_us) { /* any RTT sample yet? */
rtt_us = max(tp->srtt_us >> 3, 1U);
bbr->has_seen_rtt = 1;
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
bw = (u64)tp->snd_cwnd * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
* estimated bandwidth. This is an important aspect of the design. In this
* implementation this slightly lower pacing rate is achieved implicitly by not
* including link-layer headers in the packet size used for the pacing rate.
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
bbr_init_pacing_rate_from_rtt(sk);
if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
/* Return count of segments we want in the skbs we send, or 0 for default. */
static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->tso_segs_goal;
}
static void bbr_set_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 min_segs;
min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
static void bbr_save_cwnd(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (event == CA_EVENT_TX_START && tp->app_limited) {
bbr->idle_restart = 1;
/* Avoid pointless buffer overflows: pace at est. bw if we don't
* need more speed (we're restarting from idle and app-limited).
*/
if (bbr->mode == BBR_PROBE_BW)
bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
}
}
/* Find target cwnd. Right-size the cwnd based on min RTT and the
* estimated bottleneck bandwidth:
*
* cwnd = bw * min_rtt * gain = BDP * gain
*
* The key factor, gain, controls the amount of queue. While a small gain
* builds a smaller queue, it becomes more vulnerable to noise in RTT
* measurements (e.g., delayed ACKs or other ACK compression effects). This
* noise may cause BBR to under-estimate the rate.
*
* To achieve full performance in high-speed paths, we budget enough cwnd to
* fit full-sized skbs in-flight on both end hosts to fully utilize the path:
* - one skb in sending host Qdisc,
* - one skb in sending host TSO/GSO engine
* - one skb being received by receiver host LRO/GRO/delayed-ACK engine
* Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
* in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
* which allows 2 outstanding 2-packet sequences, to try to keep pipe
* full even with ACK-every-other-packet delayed ACKs.
*/
static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd;
u64 w;
/* If we've never had a valid RTT sample, cap cwnd at the initial
* default. This should only happen when the connection is not using TCP
* timestamps and has retransmitted all of the SYN/SYNACK/data packets
* ACKed so far. In this case, an RTO can cut cwnd to 1, in which
* case we need to slow-start up toward something safe: TCP_INIT_CWND.
*/
if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */
return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/
w = (u64)bw * bbr->min_rtt_us;
/* Apply a gain to the given value, then remove the BW_SCALE shift. */
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
cwnd += 3 * bbr->tso_segs_goal;
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
return cwnd;
}
/* An optimization in BBR to reduce losses: On the first round of recovery, we
* follow the packet conservation principle: send P packets per P packets acked.
* After that, we slow-start and send at most 2*P packets per P packets acked.
* After recovery finishes, or upon undo, we restore the cwnd we had when
* recovery started (capped by the target cwnd based on estimated BDP).
*
* TODO(ycheng/ncardwell): implement a rate-based approach.
*/
static bool bbr_set_cwnd_to_recover_or_restore(
struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
u32 cwnd = tp->snd_cwnd;
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
* Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
*/
if (rs->losses > 0)
cwnd = max_t(s32, cwnd - rs->losses, 1);
if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
/* Starting 1st round of Recovery, so do packet conservation. */
bbr->packet_conservation = 1;
bbr->next_rtt_delivered = tp->delivered; /* start round now */
/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
cwnd = tcp_packets_in_flight(tp) + acked;
} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
/* Exiting loss recovery; restore cwnd saved before recovery. */
bbr->restore_cwnd = 1;
bbr->packet_conservation = 0;
}
bbr->prev_ca_state = state;
if (bbr->restore_cwnd) {
/* Restore cwnd after exiting loss recovery or PROBE_RTT. */
cwnd = max(cwnd, bbr->prior_cwnd);
bbr->restore_cwnd = 0;
}
if (bbr->packet_conservation) {
*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
return true; /* yes, using packet conservation */
}
*new_cwnd = cwnd;
return false;
}
/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
* has drawn us down below target), or snap down to target if we're above it.
*/
static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
u32 acked, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd = 0, target_cwnd = 0;
if (!acked)
return;
if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
goto done;
/* If we're below target cwnd, slow start cwnd toward target cwnd. */
target_cwnd = bbr_target_cwnd(sk, bw, gain);
if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */
cwnd = min(cwnd + acked, target_cwnd);
else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
cwnd = cwnd + acked;
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
static bool bbr_is_next_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
* use the pipe without increasing the queue.
*/
if (bbr->pacing_gain == BBR_UNIT)
return is_full_length; /* just use wall clock time */
inflight = rs->prior_in_flight; /* what was in-flight before ACK? */
bw = bbr_max_bw(sk);
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
* least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
* small (e.g. on a LAN). We do not persist if packets are lost, since
* a path with small buffers may not hold that much.
*/
if (bbr->pacing_gain > BBR_UNIT)
return is_full_length &&
(rs->losses || /* perhaps pacing_gain*BDP won't fit */
inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
* probing didn't find more bw. If inflight falls to match BDP then we
* estimate queue is drained; persisting would underutilize the pipe.
*/
return is_full_length ||
inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
}
static void bbr_advance_cycle_phase(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT :
bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
static void bbr_update_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->mode == BBR_PROBE_BW && bbr_is_next_cycle_phase(sk, rs))
bbr_advance_cycle_phase(sk);
}
static void bbr_reset_startup_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_STARTUP;
bbr->pacing_gain = bbr_high_gain;
bbr->cwnd_gain = bbr_high_gain;
}
static void bbr_reset_probe_bw_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW;
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = bbr_cwnd_gain;
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
}
static void bbr_reset_mode(struct sock *sk)
{
if (!bbr_full_bw_reached(sk))
bbr_reset_startup_mode(sk);
else
bbr_reset_probe_bw_mode(sk);
}
/* Start a new long-term sampling interval. */
static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
}
/* Completely reset long-term bandwidth sampling. */
static void bbr_reset_lt_bw_sampling(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_bw = 0;
bbr->lt_use_bw = 0;
bbr->lt_is_sampling = false;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Long-term bw sampling interval is done. Estimate whether we're policed. */
static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 diff;
if (bbr->lt_bw) { /* do we have bw from a previous interval? */
/* Is new bw close to the lt_bw from the previous interval? */
diff = abs(bw - bbr->lt_bw);
if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
(bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
bbr_lt_bw_diff)) {
/* All criteria are met; estimate we're policed. */
bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */
bbr->lt_use_bw = 1;
bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */
bbr->lt_rtt_cnt = 0;
return;
}
}
bbr->lt_bw = bw;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
* Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
* explicitly models their policed rate, to reduce unnecessary losses. We
* estimate that we're policed if we see 2 consecutive sampling intervals with
* consistent throughput and high packet loss. If we think we're being policed,
* set lt_bw to the "long-term" average delivery rate from those 2 intervals.
*/
static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
u32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */
bbr_reset_probe_bw_mode(sk); /* restart gain cycling */
}
return;
}
/* Wait for the first loss before sampling, to let the policer exhaust
* its tokens and estimate the steady-state rate allowed by the policer.
* Starting samples earlier includes bursts that over-estimate the bw.
*/
if (!bbr->lt_is_sampling) {
if (!rs->losses)
return;
bbr_reset_lt_bw_sampling_interval(sk);
bbr->lt_is_sampling = true;
}
/* To avoid underestimates, reset sampling if we run out of data. */
if (rs->is_app_limited) {
bbr_reset_lt_bw_sampling(sk);
return;
}
if (bbr->round_start)
bbr->lt_rtt_cnt++; /* count round trips in this interval */
if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
return; /* sampling interval needs to be longer */
if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
bbr_reset_lt_bw_sampling(sk); /* interval is too long */
return;
}
/* End sampling interval when a packet is lost, so we estimate the
* policer tokens were exhausted. Stopping the sampling before the
* tokens are exhausted under-estimates the policed rate.
*/
if (!rs->losses)
return;
/* Calculate packets lost and delivered in sampling interval. */
lost = tp->lost - bbr->lt_last_lost;
delivered = tp->delivered - bbr->lt_last_delivered;
/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
return;
/* Find average delivery rate in this sampling interval. */
t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
if ((s32)t < 1)
return; /* interval is less than one ms, so wait */
/* Check if can multiply without overflow */
if (t >= ~0U / USEC_PER_MSEC) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
t *= USEC_PER_MSEC;
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
}
/* Estimate the bandwidth based on how fast packets are delivered */
static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
bbr->round_start = 0;
if (rs->delivered < 0 || rs->interval_us <= 0)
return; /* Not a valid observation */
/* See if we've reached the next RTT */
if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
bbr->next_rtt_delivered = tp->delivered;
bbr->rtt_cnt++;
bbr->round_start = 1;
bbr->packet_conservation = 0;
}
bbr_lt_bw_sampling(sk, rs);
/* Divide delivered by the interval to find a (lower bound) bottleneck
* bandwidth sample. Delivered is in packets and interval_us in uS and
* ratio will be <<1 for most connections. So delivered is first scaled.
*/
bw = (u64)rs->delivered * BW_UNIT;
do_div(bw, rs->interval_us);
/* If this sample is application-limited, it is likely to have a very
* low delivered count that represents application behavior rather than
* the available network rate. Such a sample could drag down estimated
* bw, causing needless slow-down. Thus, to continue to send at the
* last measured network rate, we filter out app-limited samples unless
* they describe the path bw at least as well as our bw model.
*
* So the goal during app-limited phase is to proceed with the best
* network rate no matter how long. We automatically leave this
* phase when app writes faster than the network can deliver :)
*/
if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
/* Incorporate new sample into our max bw filter. */
minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
}
}
/* Estimate when the pipe is full, using the change in delivery rate: BBR
* estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
* at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
* rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
* higher rwin, 3: we get higher delivery rate samples. Or transient
* cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
* design goal, but uses delay and inter-ACK spacing instead of bandwidth.
*/
static void bbr_check_full_bw_reached(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw_thresh;
if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
return;
bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
if (bbr_max_bw(sk) >= bw_thresh) {
bbr->full_bw = bbr_max_bw(sk);
bbr->full_bw_cnt = 0;
return;
}
++bbr->full_bw_cnt;
bbr->full_bw_reached = bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
}
/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
* periodically drain the bottleneck queue, to converge to measure the true
* min_rtt (unloaded propagation delay). This allows the flows to keep queues
* small (reducing queuing delay and packet loss) and achieve fairness among
* BBR flows.
*
* The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
* we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
* After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
* round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
* re-enter the previous mode. BBR uses 200ms to approximately bound the
* performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
*
* Note that flows need only pay 2% if they are busy sending over the last 10
* seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
* natural silences or low-rate periods within 10 seconds where the rate is low
* enough for long enough to drain its queue in the bottleneck. We pick up
* these min RTT measurements opportunistically with our min_rtt filter. :-)
*/
static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_jiffies32;
}
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = BBR_UNIT;
bbr_save_cwnd(sk); /* note cwnd so we can restore it */
bbr->probe_rtt_done_stamp = 0;
}
if (bbr->mode == BBR_PROBE_RTT) {
/* Ignore low rate samples during this mode. */
tp->app_limited =
(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
bbr->probe_rtt_done_stamp = tcp_jiffies32 +
msecs_to_jiffies(bbr_probe_rtt_mode_ms);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
} else if (bbr->probe_rtt_done_stamp) {
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
bbr->min_rtt_stamp = tcp_jiffies32;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
}
}
bbr->idle_restart = 0;
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
{
bbr_update_bw(sk, rs);
bbr_update_cycle_phase(sk, rs);
bbr_check_full_bw_reached(sk, rs);
bbr_check_drain(sk, rs);
bbr_update_min_rtt(sk, rs);
}
static void bbr_main(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
bbr_update_model(sk, rs);
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
bbr->min_rtt_stamp = tcp_jiffies32;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
bbr->has_seen_rtt = 0;
bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
bbr->full_bw_reached = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
static u32 bbr_sndbuf_expand(struct sock *sk)
{
/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
return 3;
}
/* In theory BBR does not need to undo the cwnd since it does not
* always reduce cwnd on losses (see bbr_main()). Keep it for now.
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
return tcp_sk(sk)->snd_cwnd;
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw = bbr_bw(sk);
bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
memset(&info->bbr, 0, sizeof(info->bbr));
info->bbr.bbr_bw_lo = (u32)bw;
info->bbr.bbr_bw_hi = (u32)(bw >> 32);
info->bbr.bbr_min_rtt = bbr->min_rtt_us;
info->bbr.bbr_pacing_gain = bbr->pacing_gain;
info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
*attr = INET_DIAG_BBRINFO;
return sizeof(info->bbr);
}
return 0;
}
static void bbr_set_state(struct sock *sk, u8 new_state)
{
struct bbr *bbr = inet_csk_ca(sk);
if (new_state == TCP_CA_Loss) {
struct rate_sample rs = { .losses = 1 };
bbr->prev_ca_state = TCP_CA_Loss;
bbr->full_bw = 0;
bbr->round_start = 1; /* treat RTO like end of a round */
bbr_lt_bw_sampling(sk, &rs);
}
}
static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "tcp_bbr_mod",
.owner = THIS_MODULE,
.init = bbr_init,
.cong_control = bbr_main,
.sndbuf_expand = bbr_sndbuf_expand,
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
.tso_segs_goal = bbr_tso_segs_goal,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
static int __init bbr_register(void)
{
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
module_init(bbr_register);
module_exit(bbr_unregister);
MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");

View File

@ -1,47 +0,0 @@
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=tcp-bbr-nanqinlang
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
define KernelPackage/$(PKG_NAME)
SUBMENU:=Network Support
TITLE:=Modified bbr tcp congestion control
DEPENDS:=@LINUX_4_14
FILES:=$(PKG_BUILD_DIR)/tcp_bbr_nanqinlang.ko
AUTOLOAD:=$(call AutoLoad,99,tcp-bbr-nanqinlang)
KCONFIG:=
endef
define KernelPackage/$(PKG_NAME)/description
Kernel module of modified BBR tcp congestion control
endef
EXTRA_KCONFIG:= \
EXTRA_CFLAGS:= \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=m,%,$(filter %=m,$(EXTRA_KCONFIG)))) \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=y,%,$(filter %=y,$(EXTRA_KCONFIG)))) \
MAKE_OPTS:= \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
$(EXTRA_KCONFIG)
define Build/Prepare
mkdir -p $(PKG_BUILD_DIR)
$(CP) ./src/* $(PKG_BUILD_DIR)/
endef
define Build/Compile
$(MAKE) -C "$(LINUX_DIR)" \
$(MAKE_OPTS) \
modules
endef
$(eval $(call KernelPackage,$(PKG_NAME)))

View File

@ -1 +0,0 @@
obj-m := tcp_bbr_nanqinlang.o

View File

@ -1,968 +0,0 @@
/* tcp_nanqinlang
* Debian
* kernel v4.14
× New BBR Congestion Control
* Modified by (C) 2017 nanqinlang
*******************************************************************************
Bottleneck Bandwidth and RTT (BBR) congestion control
*
* BBR congestion control computes the sending rate based on the delivery
* rate (throughput) estimated from ACKs. In a nutshell:
*
* On each ACK, update our model of the network path:
* bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
* min_rtt = windowed_min(rtt, 10 seconds)
* pacing_rate = pacing_gain * bottleneck_bandwidth
* cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
*
* The core algorithm does not react directly to packet losses or delays,
* although BBR may adjust the size of next send per ACK when loss is
* observed, or adjust the sending rate if it estimates there is a
* traffic policer, in order to keep the drop rate reasonable.
*
* Here is a state transition diagram for BBR:
*
* |
* V
* +---> STARTUP ----+
* | | |
* | V |
* | DRAIN ----+
* | | |
* | V |
* +---> PROBE_BW ----+
* | ^ | |
* | | | |
* | +----+ |
* | |
* +---- PROBE_RTT <--+
*
* A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
* When it estimates the pipe is full, it enters DRAIN to drain the queue.
* In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
* A long-lived BBR flow spends the vast majority of its time remaining
* (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
* in a fair manner, with a small, bounded queue. *If* a flow has been
* continuously sending for the entire min_rtt window, and hasn't seen an RTT
* sample that matches or decreases its min_rtt estimate for 10 seconds, then
* it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
* the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
* we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
* otherwise we enter STARTUP to try to fill the pipe.
*
* BBR is described in detail in:
* "BBR: Congestion-Based Congestion Control",
* Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
* Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
*
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
* NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled,
* otherwise TCP stack falls back to an internal pacing using one high
* resolution timer per TCP socket and may use more resources.
*/
#include <linux/module.h>
#include <linux/inet.h>
#include <linux/inet_diag.h>
#include <linux/random.h>
#include <linux/win_minmax.h>
#include <net/tcp.h>
/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
* estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
* This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
* Since the minimum window is >=4 packets, the lower bound isn't
* an issue. The upper bound isn't an issue with existing technologies.
*/
#define BW_SCALE 24
#define BW_UNIT (1 << BW_SCALE)
#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */
#define BBR_UNIT (1 << BBR_SCALE)
#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */
/* BBR has the following modes for deciding how fast to send: */
enum bbr_mode {
BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
BBR_DRAIN, /* drain any queue created during startup */
BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */
};
/* BBR congestion control block */
struct bbr {
u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */
u32 min_rtt_stamp; /* timestamp of min_rtt_us */
u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
u64 cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
unused:5,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */
u32 lt_last_delivered; /* LT intvl start: tp->delivered */
u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
has_seen_rtt:1, /* have we seen an RTT sample yet? */
unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
/* Window length of bw filter (in rounds): */
static const int bbr_bw_rtts = CYCLE_LEN + 2;
/* Window length of min_rtt filter (in sec): */
static const u32 bbr_min_rtt_win_sec = 10;
/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
static const u32 bbr_probe_rtt_mode_ms = 100;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
* that will allow a smoothly increasing pacing rate that will double each RTT
* and send the same number of packets per RTT that an un-paced, slow-starting
* Reno or CUBIC flow would:
*/
static const int bbr_high_gain = BBR_UNIT * 3000 / 1000 + 1;
/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
* the queue created in BBR_STARTUP in a single round:
*/
static const int bbr_drain_gain = BBR_UNIT * 1000 / 3000;
/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */
static const int bbr_cwnd_gain = BBR_UNIT * 2;
/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
static const int bbr_pacing_gain[] = {
BBR_UNIT * 6 / 4, /* probe for more available bw */
BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */
BBR_UNIT * 5 / 4, BBR_UNIT * 5 / 4, BBR_UNIT * 5 / 4, /* cruise at 1.0*bw to utilize pipe, */
BBR_UNIT * 6 / 4, BBR_UNIT * 6 / 4, BBR_UNIT * 6 / 4 /* without creating excess queue... */
};
/* Randomize the starting gain cycling phase over N phases: */
static const u32 bbr_cycle_rand = 7;
/* Try to keep at least this many packets in flight, if things go smoothly. For
* smooth functioning, a sliding window protocol ACKing every other packet
* needs at least 4 packets in flight:
*/
static const u32 bbr_cwnd_min_target = 4;
/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
/* If bw has increased significantly (1.25x), there may be more bw available: */
static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
static const u32 bbr_full_bw_cnt = 3;
/* "long-term" ("LT") bandwidth estimator parameters... */
/* The minimum number of rounds in an LT bw sampling interval: */
static const u32 bbr_lt_intvl_min_rtts = 4;
/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
static const u32 bbr_lt_loss_thresh = 50;
/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
static const u32 bbr_lt_bw_ratio = BBR_UNIT / 4;
/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
static const u32 bbr_lt_bw_diff = 4000 / 4;
/* If we estimate we're policed, use lt_bw for this many round trips: */
static const u32 bbr_lt_bw_max_rtts = 48;
/* Do we estimate that STARTUP filled the pipe? */
static bool bbr_full_bw_reached(const struct sock *sk)
{
const struct bbr *bbr = inet_csk_ca(sk);
return bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
static u32 bbr_max_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return minmax_get(&bbr->bw);
}
/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
static u32 bbr_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
}
/* Return rate in bytes per second, optionally with a gain.
* The order here is chosen carefully to avoid overflow of u64. This should
* work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
*/
static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
{
rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
rate *= gain;
rate >>= BBR_SCALE;
rate *= USEC_PER_SEC;
return rate >> BW_SCALE;
}
/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
{
u64 rate = bw;
rate = bbr_rate_bytes_per_sec(sk, rate, gain);
rate = min_t(u64, rate, sk->sk_max_pacing_rate);
return rate;
}
/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
u32 rtt_us;
if (tp->srtt_us) { /* any RTT sample yet? */
rtt_us = max(tp->srtt_us >> 3, 1U);
bbr->has_seen_rtt = 1;
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
bw = (u64)tp->snd_cwnd * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
* estimated bandwidth. This is an important aspect of the design. In this
* implementation this slightly lower pacing rate is achieved implicitly by not
* including link-layer headers in the packet size used for the pacing rate.
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
bbr_init_pacing_rate_from_rtt(sk);
if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
/* Return count of segments we want in the skbs we send, or 0 for default. */
static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->tso_segs_goal;
}
static void bbr_set_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 min_segs;
min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
static void bbr_save_cwnd(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (event == CA_EVENT_TX_START && tp->app_limited) {
bbr->idle_restart = 1;
/* Avoid pointless buffer overflows: pace at est. bw if we don't
* need more speed (we're restarting from idle and app-limited).
*/
if (bbr->mode == BBR_PROBE_BW)
bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
}
}
/* Find target cwnd. Right-size the cwnd based on min RTT and the
* estimated bottleneck bandwidth:
*
* cwnd = bw * min_rtt * gain = BDP * gain
*
* The key factor, gain, controls the amount of queue. While a small gain
* builds a smaller queue, it becomes more vulnerable to noise in RTT
* measurements (e.g., delayed ACKs or other ACK compression effects). This
* noise may cause BBR to under-estimate the rate.
*
* To achieve full performance in high-speed paths, we budget enough cwnd to
* fit full-sized skbs in-flight on both end hosts to fully utilize the path:
* - one skb in sending host Qdisc,
* - one skb in sending host TSO/GSO engine
* - one skb being received by receiver host LRO/GRO/delayed-ACK engine
* Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
* in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
* which allows 2 outstanding 2-packet sequences, to try to keep pipe
* full even with ACK-every-other-packet delayed ACKs.
*/
static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd;
u64 w;
/* If we've never had a valid RTT sample, cap cwnd at the initial
* default. This should only happen when the connection is not using TCP
* timestamps and has retransmitted all of the SYN/SYNACK/data packets
* ACKed so far. In this case, an RTO can cut cwnd to 1, in which
* case we need to slow-start up toward something safe: TCP_INIT_CWND.
*/
if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */
return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/
w = (u64)bw * bbr->min_rtt_us;
/* Apply a gain to the given value, then remove the BW_SCALE shift. */
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
cwnd += 3 * bbr->tso_segs_goal;
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
return cwnd;
}
/* An optimization in BBR to reduce losses: On the first round of recovery, we
* follow the packet conservation principle: send P packets per P packets acked.
* After that, we slow-start and send at most 2*P packets per P packets acked.
* After recovery finishes, or upon undo, we restore the cwnd we had when
* recovery started (capped by the target cwnd based on estimated BDP).
*
* TODO(ycheng/ncardwell): implement a rate-based approach.
*/
static bool bbr_set_cwnd_to_recover_or_restore(
struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
u32 cwnd = tp->snd_cwnd;
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
* Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
*/
if (rs->losses > 0)
cwnd = max_t(s32, cwnd - rs->losses, 1);
if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
/* Starting 1st round of Recovery, so do packet conservation. */
bbr->packet_conservation = 1;
bbr->next_rtt_delivered = tp->delivered; /* start round now */
/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
cwnd = tcp_packets_in_flight(tp) + acked;
} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
/* Exiting loss recovery; restore cwnd saved before recovery. */
bbr->restore_cwnd = 1;
bbr->packet_conservation = 0;
}
bbr->prev_ca_state = state;
if (bbr->restore_cwnd) {
/* Restore cwnd after exiting loss recovery or PROBE_RTT. */
cwnd = max(cwnd, bbr->prior_cwnd);
bbr->restore_cwnd = 0;
}
if (bbr->packet_conservation) {
*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
return true; /* yes, using packet conservation */
}
*new_cwnd = cwnd;
return false;
}
/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
* has drawn us down below target), or snap down to target if we're above it.
*/
static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
u32 acked, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd = 0, target_cwnd = 0;
if (!acked)
return;
if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
goto done;
/* If we're below target cwnd, slow start cwnd toward target cwnd. */
target_cwnd = bbr_target_cwnd(sk, bw, gain);
if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */
cwnd = min(cwnd + acked, target_cwnd);
else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
cwnd = cwnd + acked;
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
static bool bbr_is_next_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
tcp_stamp_us_delta(tp->delivered_mstamp, bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
* use the pipe without increasing the queue.
*/
if (bbr->pacing_gain == BBR_UNIT)
return is_full_length; /* just use wall clock time */
inflight = rs->prior_in_flight; /* what was in-flight before ACK? */
bw = bbr_max_bw(sk);
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
* least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
* small (e.g. on a LAN). We do not persist if packets are lost, since
* a path with small buffers may not hold that much.
*/
if (bbr->pacing_gain > BBR_UNIT)
return is_full_length &&
(rs->losses || /* perhaps pacing_gain*BDP won't fit */
inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
* probing didn't find more bw. If inflight falls to match BDP then we
* estimate queue is drained; persisting would underutilize the pipe.
*/
return is_full_length ||
inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
}
static void bbr_advance_cycle_phase(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
static void bbr_update_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
bbr_is_next_cycle_phase(sk, rs))
bbr_advance_cycle_phase(sk);
}
static void bbr_reset_startup_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_STARTUP;
bbr->pacing_gain = bbr_high_gain;
bbr->cwnd_gain = bbr_high_gain;
}
static void bbr_reset_probe_bw_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW;
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = bbr_cwnd_gain;
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
}
static void bbr_reset_mode(struct sock *sk)
{
if (!bbr_full_bw_reached(sk))
bbr_reset_startup_mode(sk);
else
bbr_reset_probe_bw_mode(sk);
}
/* Start a new long-term sampling interval. */
static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_last_stamp = div_u64(tp->delivered_mstamp, USEC_PER_MSEC);
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
}
/* Completely reset long-term bandwidth sampling. */
static void bbr_reset_lt_bw_sampling(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_bw = 0;
bbr->lt_use_bw = 0;
bbr->lt_is_sampling = false;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Long-term bw sampling interval is done. Estimate whether we're policed. */
static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 diff;
if (bbr->lt_bw) { /* do we have bw from a previous interval? */
/* Is new bw close to the lt_bw from the previous interval? */
diff = abs(bw - bbr->lt_bw);
if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
(bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
bbr_lt_bw_diff)) {
/* All criteria are met; estimate we're policed. */
bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */
bbr->lt_use_bw = 1;
bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */
bbr->lt_rtt_cnt = 0;
return;
}
}
bbr->lt_bw = bw;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
* Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
* explicitly models their policed rate, to reduce unnecessary losses. We
* estimate that we're policed if we see 2 consecutive sampling intervals with
* consistent throughput and high packet loss. If we think we're being policed,
* set lt_bw to the "long-term" average delivery rate from those 2 intervals.
*/
static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
u32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */
bbr_reset_probe_bw_mode(sk); /* restart gain cycling */
}
return;
}
/* Wait for the first loss before sampling, to let the policer exhaust
* its tokens and estimate the steady-state rate allowed by the policer.
* Starting samples earlier includes bursts that over-estimate the bw.
*/
if (!bbr->lt_is_sampling) {
if (!rs->losses)
return;
bbr_reset_lt_bw_sampling_interval(sk);
bbr->lt_is_sampling = true;
}
/* To avoid underestimates, reset sampling if we run out of data. */
if (rs->is_app_limited) {
bbr_reset_lt_bw_sampling(sk);
return;
}
if (bbr->round_start)
bbr->lt_rtt_cnt++; /* count round trips in this interval */
if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
return; /* sampling interval needs to be longer */
if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
bbr_reset_lt_bw_sampling(sk); /* interval is too long */
return;
}
/* End sampling interval when a packet is lost, so we estimate the
* policer tokens were exhausted. Stopping the sampling before the
* tokens are exhausted under-estimates the policed rate.
*/
if (!rs->losses)
return;
/* Calculate packets lost and delivered in sampling interval. */
lost = tp->lost - bbr->lt_last_lost;
delivered = tp->delivered - bbr->lt_last_delivered;
/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
return;
/* Find average delivery rate in this sampling interval. */
t = div_u64(tp->delivered_mstamp, USEC_PER_MSEC) - bbr->lt_last_stamp;
if ((s32)t < 1)
return; /* interval is less than one ms, so wait */
/* Check if can multiply without overflow */
if (t >= ~0U / USEC_PER_MSEC) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
t *= USEC_PER_MSEC;
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
}
/* Estimate the bandwidth based on how fast packets are delivered */
static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
bbr->round_start = 0;
if (rs->delivered < 0 || rs->interval_us <= 0)
return; /* Not a valid observation */
/* See if we've reached the next RTT */
if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
bbr->next_rtt_delivered = tp->delivered;
bbr->rtt_cnt++;
bbr->round_start = 1;
bbr->packet_conservation = 0;
}
bbr_lt_bw_sampling(sk, rs);
/* Divide delivered by the interval to find a (lower bound) bottleneck
* bandwidth sample. Delivered is in packets and interval_us in uS and
* ratio will be <<1 for most connections. So delivered is first scaled.
*/
bw = (u64)rs->delivered * BW_UNIT;
do_div(bw, rs->interval_us);
/* If this sample is application-limited, it is likely to have a very
* low delivered count that represents application behavior rather than
* the available network rate. Such a sample could drag down estimated
* bw, causing needless slow-down. Thus, to continue to send at the
* last measured network rate, we filter out app-limited samples unless
* they describe the path bw at least as well as our bw model.
*
* So the goal during app-limited phase is to proceed with the best
* network rate no matter how long. We automatically leave this
* phase when app writes faster than the network can deliver :)
*/
if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
/* Incorporate new sample into our max bw filter. */
minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
}
}
/* Estimate when the pipe is full, using the change in delivery rate: BBR
* estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
* at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
* rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
* higher rwin, 3: we get higher delivery rate samples. Or transient
* cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
* design goal, but uses delay and inter-ACK spacing instead of bandwidth.
*/
static void bbr_check_full_bw_reached(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw_thresh;
if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
return;
bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
if (bbr_max_bw(sk) >= bw_thresh) {
bbr->full_bw = bbr_max_bw(sk);
bbr->full_bw_cnt = 0;
return;
}
++bbr->full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
}
/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
* periodically drain the bottleneck queue, to converge to measure the true
* min_rtt (unloaded propagation delay). This allows the flows to keep queues
* small (reducing queuing delay and packet loss) and achieve fairness among
* BBR flows.
*
* The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
* we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
* After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
* round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
* re-enter the previous mode. BBR uses 200ms to approximately bound the
* performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
*
* Note that flows need only pay 2% if they are busy sending over the last 10
* seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
* natural silences or low-rate periods within 10 seconds where the rate is low
* enough for long enough to drain its queue in the bottleneck. We pick up
* these min RTT measurements opportunistically with our min_rtt filter. :-)
*/
static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_jiffies32;
}
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = BBR_UNIT;
bbr_save_cwnd(sk); /* note cwnd so we can restore it */
bbr->probe_rtt_done_stamp = 0;
}
if (bbr->mode == BBR_PROBE_RTT) {
/* Ignore low rate samples during this mode. */
tp->app_limited =
(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
bbr->probe_rtt_done_stamp = tcp_jiffies32 +
msecs_to_jiffies(bbr_probe_rtt_mode_ms);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
} else if (bbr->probe_rtt_done_stamp) {
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
after(tcp_jiffies32, bbr->probe_rtt_done_stamp)) {
bbr->min_rtt_stamp = tcp_jiffies32;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
}
}
bbr->idle_restart = 0;
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
{
bbr_update_bw(sk, rs);
bbr_update_cycle_phase(sk, rs);
bbr_check_full_bw_reached(sk, rs);
bbr_check_drain(sk, rs);
bbr_update_min_rtt(sk, rs);
}
static void bbr_main(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
bbr_update_model(sk, rs);
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
bbr->min_rtt_stamp = tcp_jiffies32;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
bbr->has_seen_rtt = 0;
bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
static u32 bbr_sndbuf_expand(struct sock *sk)
{
/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
return 3;
}
/* In theory BBR does not need to undo the cwnd since it does not
* always reduce cwnd on losses (see bbr_main()). Keep it for now.
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw = bbr_bw(sk);
bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
memset(&info->bbr, 0, sizeof(info->bbr));
info->bbr.bbr_bw_lo = (u32)bw;
info->bbr.bbr_bw_hi = (u32)(bw >> 32);
info->bbr.bbr_min_rtt = bbr->min_rtt_us;
info->bbr.bbr_pacing_gain = bbr->pacing_gain;
info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
*attr = INET_DIAG_BBRINFO;
return sizeof(info->bbr);
}
return 0;
}
static void bbr_set_state(struct sock *sk, u8 new_state)
{
struct bbr *bbr = inet_csk_ca(sk);
if (new_state == TCP_CA_Loss) {
struct rate_sample rs = { .losses = 1 };
bbr->prev_ca_state = TCP_CA_Loss;
bbr->full_bw = 0;
bbr->round_start = 1; /* treat RTO like end of a round */
bbr_lt_bw_sampling(sk, &rs);
}
}
static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "nanqinlang",
.owner = THIS_MODULE,
.init = bbr_init,
.cong_control = bbr_main,
.sndbuf_expand = bbr_sndbuf_expand,
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
.tso_segs_goal = bbr_tso_segs_goal,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
static int __init bbr_register(void)
{
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
module_init(bbr_register);
module_exit(bbr_unregister);
MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
MODULE_AUTHOR("Nanqinlang <https://sometimesnaive.org>");

View File

@ -1,46 +0,0 @@
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=tcp-bbr-tsunami
PKG_RELEASE:=1
include $(INCLUDE_DIR)/package.mk
define KernelPackage/$(PKG_NAME)
SUBMENU:=Network Support
TITLE:=Modified bbr tcp congestion control
DEPENDS:=@LINUX_4_14
FILES:=$(PKG_BUILD_DIR)/tcp_bbr_tsunami.ko
KCONFIG:=
endef
define KernelPackage/$(PKG_NAME)/description
Kernel module of modified BBR tcp congestion control
endef
EXTRA_KCONFIG:= \
EXTRA_CFLAGS:= \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=m,%,$(filter %=m,$(EXTRA_KCONFIG)))) \
$(patsubst CONFIG_%, -DCONFIG_%=1, $(patsubst %=y,%,$(filter %=y,$(EXTRA_KCONFIG)))) \
MAKE_OPTS:= \
ARCH="$(LINUX_KARCH)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
SUBDIRS="$(PKG_BUILD_DIR)" \
EXTRA_CFLAGS="$(EXTRA_CFLAGS)" \
$(EXTRA_KCONFIG)
define Build/Prepare
mkdir -p $(PKG_BUILD_DIR)
$(CP) ./src/* $(PKG_BUILD_DIR)/
endef
define Build/Compile
$(MAKE) -C "$(LINUX_DIR)" \
$(MAKE_OPTS) \
modules
endef
$(eval $(call KernelPackage,$(PKG_NAME)))

View File

@ -1 +0,0 @@
obj-m := tcp_bbr_tsunami.o

View File

@ -1,905 +0,0 @@
/* Bottleneck Bandwidth and RTT (BBR) congestion control
*
* BBR congestion control computes the sending rate based on the delivery
* rate (throughput) estimated from ACKs. In a nutshell:
*
* On each ACK, update our model of the network path:
* bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
* min_rtt = windowed_min(rtt, 10 seconds)
* pacing_rate = pacing_gain * bottleneck_bandwidth
* cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
*
* The core algorithm does not react directly to packet losses or delays,
* although BBR may adjust the size of next send per ACK when loss is
* observed, or adjust the sending rate if it estimates there is a
* traffic policer, in order to keep the drop rate reasonable.
*
* BBR is described in detail in:
* "BBR: Congestion-Based Congestion Control",
* Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
* Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
*
* There is a public e-mail list for discussing BBR development and testing:
* https://groups.google.com/forum/#!forum/bbr-dev
*
* NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
* since pacing is integral to the BBR design and implementation.
* BBR without pacing would not function properly, and may incur unnecessary
* high packet loss rates.
*/
#include <linux/module.h>
#include <net/tcp.h>
#include <linux/inet_diag.h>
#include <linux/inet.h>
#include <linux/random.h>
#include <linux/win_minmax.h>
/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
* estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
* This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
* Since the minimum window is >=4 packets, the lower bound isn't
* an issue. The upper bound isn't an issue with existing technologies.
*/
#define BW_SCALE 24
#define BW_UNIT (1 << BW_SCALE)
#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */
#define BBR_UNIT (1 << BBR_SCALE)
/* BBR has the following modes for deciding how fast to send: */
enum bbr_mode {
BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */
BBR_DRAIN, /* drain any queue created during startup */
BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */
BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */
};
/* BBR congestion control block */
struct bbr {
u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */
//deprecated u32 rtt_us;
u32 min_rtt_stamp; /* timestamp of min_rtt_us */
u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */
//deprecated struct minmax max_rtt;
struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */
u32 rtt_cnt; /* count of packet-timed rounds elapsed */
u32 next_rtt_delivered; /* scb->tx.delivered at end of round */
struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */
u32 mode:3, /* current bbr_mode in state machine */
prev_ca_state:3, /* CA state on previous ACK */
packet_conservation:1, /* use packet conservation? */
restore_cwnd:1, /* decided to revert cwnd to old value */
round_start:1, /* start of packet-timed tx->ack round? */
tso_segs_goal:7, /* segments we want in each skb we send */
idle_restart:1, /* restarting after idle? */
probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */
unused:5,
lt_is_sampling:1, /* taking long-term ("LT") samples now? */
lt_rtt_cnt:7, /* round trips in long-term interval */
lt_use_bw:1; /* use lt_bw as our bw estimate? */
u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */
u32 lt_last_delivered; /* LT intvl start: tp->delivered */
u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */
u32 lt_last_lost; /* LT intvl start: tp->lost */
u32 pacing_gain:10, /* current gain for setting pacing rate */
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
unused_b:6;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */
/* Window length of bw filter (in rounds): */
static const int bbr_bw_rtts = CYCLE_LEN + 7;
/* Window length of min_rtt filter (in sec): */
static const u32 bbr_min_rtt_win_sec = 20;
/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
static const u32 bbr_probe_rtt_mode_ms = 200;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
* that will allow a smoothly increasing pacing rate that will double each RTT
* and send the same number of packets per RTT that an un-paced, slow-starting
* Reno or CUBIC flow would:
*/
static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1;
/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
* the queue created in BBR_STARTUP in a single round:
*/
static const int bbr_drain_gain = BBR_UNIT * 1200 / 2885;
/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */
static const int bbr_cwnd_gain = BBR_UNIT * 2;
/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
static const int bbr_pacing_gain[] = {
BBR_UNIT * 3 / 2, /* probe for more available bw */
BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */
BBR_UNIT * 9 / 8, BBR_UNIT * 9 / 8, BBR_UNIT * 9 / 8, /* cruise at 1.0*bw to utilize pipe, */
BBR_UNIT * 9 / 8, BBR_UNIT * 9 / 8, BBR_UNIT * 9 / 8 /* without creating excess queue... */
};
/* Randomize the starting gain cycling phase over N phases: */
static const u32 bbr_cycle_rand = 7;
/* Try to keep at least this many packets in flight, if things go smoothly. For
* smooth functioning, a sliding window protocol ACKing every other packet
* needs at least 4 packets in flight:
*/
static const u32 bbr_cwnd_min_target = 4;
/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
/* If bw has increased significantly (1.25x), there may be more bw available: */
static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
static const u32 bbr_full_bw_cnt = 3;
/* "long-term" ("LT") bandwidth estimator parameters... */
/* The minimum number of rounds in an LT bw sampling interval: */
static const u32 bbr_lt_intvl_min_rtts = 4;
/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
static const u32 bbr_lt_loss_thresh = 50;
/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
static const u32 bbr_lt_bw_diff = 4000 / 8;
/* If we estimate we're policed, use lt_bw for this many round trips: */
static const u32 bbr_lt_bw_max_rtts = 48;
/* Do we estimate that STARTUP filled the pipe? */
static bool bbr_full_bw_reached(const struct sock *sk)
{
const struct bbr *bbr = inet_csk_ca(sk);
return bbr->full_bw_cnt >= bbr_full_bw_cnt;
}
/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
static u32 bbr_max_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return minmax_get(&bbr->bw);
}
/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
static u32 bbr_bw(const struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
}
/* Return rate in bytes per second, optionally with a gain.
* The order here is chosen carefully to avoid overflow of u64. This should
* work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
*/
static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
{
rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
rate *= gain;
rate >>= BBR_SCALE;
rate *= USEC_PER_SEC;
return rate >> BW_SCALE;
}
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
* estimated bandwidth. This is an important aspect of the design. In this
* implementation this slightly lower pacing rate is achieved implicitly by not
* including link-layer headers in the packet size used for the pacing rate.
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
struct bbr *bbr = inet_csk_ca(sk);
u64 rate = bw;
rate = bbr_rate_bytes_per_sec(sk, rate, gain);
rate = min_t(u64, rate, sk->sk_max_pacing_rate);
if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
/* Return count of segments we want in the skbs we send, or 0 for default. */
static u32 bbr_tso_segs_goal(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
return bbr->tso_segs_goal;
}
static void bbr_set_tso_segs_goal(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 min_segs;
min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
0x7FU);
}
/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
static void bbr_save_cwnd(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
if (event == CA_EVENT_TX_START && tp->app_limited) {
bbr->idle_restart = 1;
/* Avoid pointless buffer overflows: pace at est. bw if we don't
* need more speed (we're restarting from idle and app-limited).
*/
if (bbr->mode == BBR_PROBE_BW)
bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
}
}
/* Find target cwnd. Right-size the cwnd based on min RTT and the
* estimated bottleneck bandwidth:
*
* cwnd = bw * min_rtt * gain = BDP * gain
*
* The key factor, gain, controls the amount of queue. While a small gain
* builds a smaller queue, it becomes more vulnerable to noise in RTT
* measurements (e.g., delayed ACKs or other ACK compression effects). This
* noise may cause BBR to under-estimate the rate.
*
* To achieve full performance in high-speed paths, we budget enough cwnd to
* fit full-sized skbs in-flight on both end hosts to fully utilize the path:
* - one skb in sending host Qdisc,
* - one skb in sending host TSO/GSO engine
* - one skb being received by receiver host LRO/GRO/delayed-ACK engine
* Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
* in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
* which allows 2 outstanding 2-packet sequences, to try to keep pipe
* full even with ACK-every-other-packet delayed ACKs.
*/
static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd;
u64 w;
/* If we've never had a valid RTT sample, cap cwnd at the initial
* default. This should only happen when the connection is not using TCP
* timestamps and has retransmitted all of the SYN/SYNACK/data packets
* ACKed so far. In this case, an RTO can cut cwnd to 1, in which
* case we need to slow-start up toward something safe: TCP_INIT_CWND.
*/
if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */
return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/
w = (u64)bw * bbr->min_rtt_us;
/* Apply a gain to the given value, then remove the BW_SCALE shift. */
cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
/* Allow enough full-sized skbs in flight to utilize end systems. */
cwnd += 3 * bbr->tso_segs_goal;
/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
cwnd = (cwnd + 1) & ~1U;
return cwnd;
}
/* An optimization in BBR to reduce losses: On the first round of recovery, we
* follow the packet conservation principle: send P packets per P packets acked.
* After that, we slow-start and send at most 2*P packets per P packets acked.
* After recovery finishes, or upon undo, we restore the cwnd we had when
* recovery started (capped by the target cwnd based on estimated BDP).
*
* TODO(ycheng/ncardwell): implement a rate-based approach.
*/
static bool bbr_set_cwnd_to_recover_or_restore(
struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
u32 cwnd = tp->snd_cwnd;
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
* Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
*/
if (rs->losses > 0)
cwnd = max_t(s32, cwnd - rs->losses, 1);
if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
/* Starting 1st round of Recovery, so do packet conservation. */
bbr->packet_conservation = 1;
bbr->next_rtt_delivered = tp->delivered; /* start round now */
/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
cwnd = tcp_packets_in_flight(tp) + acked;
} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
/* Exiting loss recovery; restore cwnd saved before recovery. */
bbr->restore_cwnd = 1;
bbr->packet_conservation = 0;
}
bbr->prev_ca_state = state;
if (bbr->restore_cwnd) {
/* Restore cwnd after exiting loss recovery or PROBE_RTT. */
cwnd = max(cwnd, bbr->prior_cwnd);
bbr->restore_cwnd = 0;
}
if (bbr->packet_conservation) {
*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
return true; /* yes, using packet conservation */
}
*new_cwnd = cwnd;
return false;
}
/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
* has drawn us down below target), or snap down to target if we're above it.
*/
static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
u32 acked, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 cwnd = 0, target_cwnd = 0;
if (!acked)
return;
if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
goto done;
/* If we're below target cwnd, slow start cwnd toward target cwnd. */
target_cwnd = bbr_target_cwnd(sk, bw, gain);
if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */
cwnd = min(cwnd + acked, target_cwnd);
else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
cwnd = cwnd + acked;
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
tp->snd_cwnd = max(tp->snd_cwnd >> 1, bbr_cwnd_min_target);
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
static bool bbr_is_next_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bool is_full_length =
skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
bbr->min_rtt_us;
u32 inflight, bw;
/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
* use the pipe without increasing the queue.
*/
if (bbr->pacing_gain == BBR_UNIT)
return is_full_length; /* just use wall clock time */
inflight = rs->prior_in_flight; /* what was in-flight before ACK? */
bw = bbr_max_bw(sk);
/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
* least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
* small (e.g. on a LAN). We do not persist if packets are lost, since
* a path with small buffers may not hold that much.
*/
if (bbr->pacing_gain > BBR_UNIT)
return is_full_length &&
(rs->losses || /* perhaps pacing_gain*BDP won't fit */
inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
* probing didn't find more bw. If inflight falls to match BDP then we
* estimate queue is drained; persisting would underutilize the pipe.
*/
return is_full_length ||
inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
}
static void bbr_advance_cycle_phase(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
bbr->cycle_mstamp = tp->delivered_mstamp;
bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
}
/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
static void bbr_update_cycle_phase(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
bbr_is_next_cycle_phase(sk, rs))
bbr_advance_cycle_phase(sk);
}
static void bbr_reset_startup_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_STARTUP;
bbr->pacing_gain = bbr_high_gain;
bbr->cwnd_gain = bbr_high_gain;
}
static void bbr_reset_probe_bw_mode(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW;
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = bbr_cwnd_gain;
bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
}
static void bbr_reset_mode(struct sock *sk)
{
if (!bbr_full_bw_reached(sk))
bbr_reset_startup_mode(sk);
else
bbr_reset_probe_bw_mode(sk);
}
/* Start a new long-term sampling interval. */
static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
bbr->lt_last_delivered = tp->delivered;
bbr->lt_last_lost = tp->lost;
bbr->lt_rtt_cnt = 0;
}
/* Completely reset long-term bandwidth sampling. */
static void bbr_reset_lt_bw_sampling(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
bbr->lt_bw = 0;
bbr->lt_use_bw = 0;
bbr->lt_is_sampling = false;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Long-term bw sampling interval is done. Estimate whether we're policed. */
static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 diff;
if (bbr->lt_bw) { /* do we have bw from a previous interval? */
/* Is new bw close to the lt_bw from the previous interval? */
diff = abs(bw - bbr->lt_bw);
if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
(bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
bbr_lt_bw_diff)) {
/* All criteria are met; estimate we're policed. */
bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */
bbr->lt_use_bw = 1;
bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */
bbr->lt_rtt_cnt = 0;
return;
}
}
bbr->lt_bw = bw;
bbr_reset_lt_bw_sampling_interval(sk);
}
/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
* Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
* explicitly models their policed rate, to reduce unnecessary losses. We
* estimate that we're policed if we see 2 consecutive sampling intervals with
* consistent throughput and high packet loss. If we think we're being policed,
* set lt_bw to the "long-term" average delivery rate from those 2 intervals.
*/
static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u32 lost, delivered;
u64 bw;
s32 t;
if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */
if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */
bbr_reset_probe_bw_mode(sk); /* restart gain cycling */
}
return;
}
/* Wait for the first loss before sampling, to let the policer exhaust
* its tokens and estimate the steady-state rate allowed by the policer.
* Starting samples earlier includes bursts that over-estimate the bw.
*/
if (!bbr->lt_is_sampling) {
if (!rs->losses)
return;
bbr_reset_lt_bw_sampling_interval(sk);
bbr->lt_is_sampling = true;
}
/* To avoid underestimates, reset sampling if we run out of data. */
if (rs->is_app_limited) {
bbr_reset_lt_bw_sampling(sk);
return;
}
if (bbr->round_start)
bbr->lt_rtt_cnt++; /* count round trips in this interval */
if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
return; /* sampling interval needs to be longer */
if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
bbr_reset_lt_bw_sampling(sk); /* interval is too long */
return;
}
/* End sampling interval when a packet is lost, so we estimate the
* policer tokens were exhausted. Stopping the sampling before the
* tokens are exhausted under-estimates the policed rate.
*/
if (!rs->losses)
return;
/* Calculate packets lost and delivered in sampling interval. */
lost = tp->lost - bbr->lt_last_lost;
delivered = tp->delivered - bbr->lt_last_delivered;
/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
return;
/* Find average delivery rate in this sampling interval. */
t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
if (t < 1)
return; /* interval is less than one jiffy, so wait */
t = jiffies_to_usecs(t);
/* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
if (t < 1) {
bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */
return;
}
bw = (u64)delivered * BW_UNIT;
do_div(bw, t);
bbr_lt_bw_interval_done(sk, bw);
}
/* Estimate the bandwidth based on how fast packets are delivered */
static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
bbr->round_start = 0;
if (rs->delivered < 0 || rs->interval_us <= 0)
return; /* Not a valid observation */
/* See if we've reached the next RTT */
if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
bbr->next_rtt_delivered = tp->delivered;
bbr->rtt_cnt++;
bbr->round_start = 1;
bbr->packet_conservation = 0;
}
bbr_lt_bw_sampling(sk, rs);
/* Divide delivered by the interval to find a (lower bound) bottleneck
* bandwidth sample. Delivered is in packets and interval_us in uS and
* ratio will be <<1 for most connections. So delivered is first scaled.
*/
bw = (u64)rs->delivered * BW_UNIT;
do_div(bw, rs->interval_us);
/* If this sample is application-limited, it is likely to have a very
* low delivered count that represents application behavior rather than
* the available network rate. Such a sample could drag down estimated
* bw, causing needless slow-down. Thus, to continue to send at the
* last measured network rate, we filter out app-limited samples unless
* they describe the path bw at least as well as our bw model.
*
* So the goal during app-limited phase is to proceed with the best
* network rate no matter how long. We automatically leave this
* phase when app writes faster than the network can deliver :)
*/
if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
/* Incorporate new sample into our max bw filter. */
minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
}
}
/* Estimate when the pipe is full, using the change in delivery rate: BBR
* estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
* at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
* rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
* higher rwin, 3: we get higher delivery rate samples. Or transient
* cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
* design goal, but uses delay and inter-ACK spacing instead of bandwidth.
*/
static void bbr_check_full_bw_reached(struct sock *sk,
const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw_thresh;
if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
return;
bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
if (bbr_max_bw(sk) >= bw_thresh) {
bbr->full_bw = bbr_max_bw(sk);
bbr->full_bw_cnt = 0;
return;
}
++bbr->full_bw_cnt;
}
/* If pipe is probably full, drain the queue and then enter steady-state. */
static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
bbr->mode = BBR_DRAIN; /* drain queue we created */
bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */
bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */
} /* fall through to check if in-flight is already small: */
if (bbr->mode == BBR_DRAIN &&
tcp_packets_in_flight(tcp_sk(sk)) <=
bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */
}
/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
* periodically drain the bottleneck queue, to converge to measure the true
* min_rtt (unloaded propagation delay). This allows the flows to keep queues
* small (reducing queuing delay and packet loss) and achieve fairness among
* BBR flows.
*
* The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
* we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
* After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
* round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
* re-enter the previous mode. BBR uses 200ms to approximately bound the
* performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
*
* Note that flows need only pay 2% if they are busy sending over the last 10
* seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
* natural silences or low-rate periods within 10 seconds where the rate is low
* enough for long enough to drain its queue in the bottleneck. We pick up
* these min RTT measurements opportunistically with our min_rtt filter. :-)
*/
static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
//deprecated u32 rtt_prior = 0;
bool filter_expired;
/* Track min RTT seen in the min_rtt_win_sec filter window: */
filter_expired = after(tcp_time_stamp,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
(rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_time_stamp;
//deprecated bbr->rtt_us = rs->rtt_us;
}
//deprecated bbr->rtt_us = rs->rtt_us;
//deprecated rtt_prior = minmax_get(&bbr->max_rtt);
//deprecated bbr->rtt_us = min(bbr->rtt_us, rtt_prior);
//deprecated minmax_running_max(&bbr->max_rtt, bbr_bw_rtts, bbr->rtt_cnt, rs->rtt_us);
if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
!bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */
bbr->pacing_gain = BBR_UNIT;
bbr->cwnd_gain = BBR_UNIT;
bbr_save_cwnd(sk); /* note cwnd so we can restore it */
bbr->probe_rtt_done_stamp = 0;
}
if (bbr->mode == BBR_PROBE_RTT) {
/* Ignore low rate samples during this mode. */
tp->app_limited =
(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
/* Maintain min packets in flight for max(200 ms, 1 round). */
if (!bbr->probe_rtt_done_stamp &&
tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
bbr->probe_rtt_done_stamp = tcp_time_stamp +
msecs_to_jiffies(bbr_probe_rtt_mode_ms >> 1);
bbr->probe_rtt_round_done = 0;
bbr->next_rtt_delivered = tp->delivered;
} else if (bbr->probe_rtt_done_stamp) {
if (bbr->round_start)
bbr->probe_rtt_round_done = 1;
if (bbr->probe_rtt_round_done &&
after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
bbr->min_rtt_stamp = tcp_time_stamp;
bbr->restore_cwnd = 1; /* snap to prior_cwnd */
bbr_reset_mode(sk);
}
}
}
bbr->idle_restart = 0;
}
static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
{
bbr_update_bw(sk, rs);
bbr_update_cycle_phase(sk, rs);
bbr_check_full_bw_reached(sk, rs);
bbr_check_drain(sk, rs);
bbr_update_min_rtt(sk, rs);
}
static void bbr_main(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
bbr_update_model(sk, rs);
bw = bbr_bw(sk);
bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
bbr_set_tso_segs_goal(sk);
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
bbr->rtt_cnt = 0;
bbr->next_rtt_delivered = 0;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
bbr->probe_rtt_done_stamp = 0;
bbr->probe_rtt_round_done = 0;
bbr->min_rtt_us = tcp_min_rtt(tp);
bbr->min_rtt_stamp = tcp_time_stamp;
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
bw = (u64)tp->snd_cwnd * BW_UNIT;
do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
bbr_set_pacing_rate(sk, bw, bbr_high_gain);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
bbr->idle_restart = 0;
bbr->full_bw = 0;
bbr->full_bw_cnt = 0;
bbr->cycle_mstamp.v64 = 0;
bbr->cycle_idx = 0;
bbr_reset_lt_bw_sampling(sk);
bbr_reset_startup_mode(sk);
}
static u32 bbr_sndbuf_expand(struct sock *sk)
{
/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
return 3;
}
/* In theory BBR does not need to undo the cwnd since it does not
* always reduce cwnd on losses (see bbr_main()). Keep it for now.
*/
static u32 bbr_undo_cwnd(struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */
}
static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u64 bw = bbr_bw(sk);
bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
memset(&info->bbr, 0, sizeof(info->bbr));
info->bbr.bbr_bw_lo = (u32)bw;
info->bbr.bbr_bw_hi = (u32)(bw >> 32);
info->bbr.bbr_min_rtt = bbr->min_rtt_us;
info->bbr.bbr_pacing_gain = bbr->pacing_gain;
info->bbr.bbr_cwnd_gain = bbr->cwnd_gain;
*attr = INET_DIAG_BBRINFO;
return sizeof(info->bbr);
}
return 0;
}
static void bbr_set_state(struct sock *sk, u8 new_state)
{
struct bbr *bbr = inet_csk_ca(sk);
if (new_state == TCP_CA_Loss) {
struct rate_sample rs = { .losses = 1 };
bbr->prev_ca_state = TCP_CA_Loss;
bbr->full_bw = 0;
bbr->round_start = 1; /* treat RTO like end of a round */
bbr_lt_bw_sampling(sk, &rs);
}
}
static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
.flags = TCP_CONG_NON_RESTRICTED,
.name = "tsunami",
.owner = THIS_MODULE,
.init = bbr_init,
.cong_control = bbr_main,
.sndbuf_expand = bbr_sndbuf_expand,
.undo_cwnd = bbr_undo_cwnd,
.cwnd_event = bbr_cwnd_event,
.ssthresh = bbr_ssthresh,
.tso_segs_goal = bbr_tso_segs_goal,
.get_info = bbr_get_info,
.set_state = bbr_set_state,
};
static int __init bbr_register(void)
{
BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
return tcp_register_congestion_control(&tcp_bbr_cong_ops);
}
static void __exit bbr_unregister(void)
{
tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
}
module_init(bbr_register);
module_exit(bbr_unregister);
MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");

View File

@ -1,78 +0,0 @@
#
# Copyright (C) 2016-2017 Jian Chang <aa65535@live.com>
#
# This is free software, licensed under the GNU General Public License v3.
# See /LICENSE for more information.
#
include $(TOPDIR)/rules.mk
PKG_NAME:=luci-app-macvlan
PKG_VERSION:=1.0.0
PKG_RELEASE:=1
PKG_LICENSE:=GPLv3
PKG_LICENSE_FILES:=LICENSE
PKG_MAINTAINER:=Chen Minqiang <ptpt52@gmail.com>
PKG_BUILD_DIR:=$(BUILD_DIR)/$(PKG_NAME)
include $(INCLUDE_DIR)/package.mk
define Package/luci-app-macvlan
CATEGORY:=LuCI
SUBMENU:=3. Applications
TITLE:=LuCI Support for macvlan
PKGARCH:=all
DEPENDS:=+kmod-macvlan
endef
define Package/luci-app-macvlan/description
LuCI Support for macvlan.
endef
define Build/Prepare
$(foreach po,$(wildcard ${CURDIR}/files/luci/i18n/*.po), \
po2lmo $(po) $(PKG_BUILD_DIR)/$(patsubst %.po,%.lmo,$(notdir $(po)));)
endef
define Build/Configure
endef
define Build/Compile
endef
define Package/luci-app-macvlan/postinst
#!/bin/sh
if [ -z "$${IPKG_INSTROOT}" ]; then
rm -rf /tmp/luci-indexcache /tmp/luci-modulecache
fi
if [ -z "$$IPKG_INSTROOT" ]; then
( . /etc/uci-defaults/40_luci-app-macvlan )
rm -f /etc/uci-defaults/40_luci-app-macvlan
fi
exit 0
endef
define Package/luci-app-macvlan/conffiles
/etc/config/macvlan
endef
define Package/luci-app-macvlan/install
$(INSTALL_DIR) $(1)/usr/lib/lua/luci/i18n
$(INSTALL_DATA) $(PKG_BUILD_DIR)/macvlan.*.lmo $(1)/usr/lib/lua/luci/i18n/
$(INSTALL_DIR) $(1)/usr/lib/lua/luci/controller
$(INSTALL_DATA) ./files/luci/controller/*.lua $(1)/usr/lib/lua/luci/controller/
$(INSTALL_DIR) $(1)/usr/lib/lua/luci/model/cbi/macvlan
$(INSTALL_DATA) ./files/luci/model/cbi/macvlan/*.lua $(1)/usr/lib/lua/luci/model/cbi/macvlan/
$(INSTALL_DIR) $(1)/etc/config
$(INSTALL_DATA) ./files/root/etc/config/macvlan $(1)/etc/config/macvlan
$(INSTALL_DIR) $(1)/etc/init.d
$(INSTALL_BIN) ./files/root/etc/init.d/macvlan $(1)/etc/init.d/macvlan
$(INSTALL_DIR) $(1)/etc/uci-defaults
$(INSTALL_DATA) ./files/root/etc/uci-defaults/40_luci-app-macvlan $(1)/etc/uci-defaults/40_luci-app-macvlan
endef
$(eval $(call BuildPackage,luci-app-macvlan))

View File

@ -1,12 +0,0 @@
-- Copyright 2008 Steven Barth <steven@midlink.org>
-- Copyright 2008 Jo-Philipp Wich <jow@openwrt.org>
-- Licensed to the public under the Apache License 2.0.
module("luci.controller.macvlan", package.seeall)
function index()
local page
page = entry({"admin", "services", "macvlan"}, cbi("macvlan/macvlan"), _("Macvlan"))
page.leaf = true
end

View File

@ -1,14 +0,0 @@
msgid ""
msgstr "Content-Type: text/plain; charset=UTF-8\n"
msgid "Macvlan"
msgstr "Macvlan"
msgid "Macvlan Settings"
msgstr "Macvlan设置"
msgid "Interface"
msgstr "网络接口"
msgid "Index"
msgstr "序号"

View File

@ -1,20 +0,0 @@
-- Copyright 2008 Steven Barth <steven@midlink.org>
-- Copyright 2010-2015 Jo-Philipp Wich <jow@openwrt.org>
-- Licensed to the public under the Apache License 2.0.
m = Map("macvlan", translate("Macvlan"))
s = m:section(TypedSection, "macvlan", translate("Macvlan Settings"))
s.addremove = true
s.anonymous = true
s.template = "cbi/tblsection"
hn = s:option(Value, "ifname", translate("Interface"))
hn.datatype = "string"
hn.rmempty = false
ip = s:option(Value, "macvlan", translate("Index"))
ip.datatype = "and(uinteger,min(0),max(31))"
ip.rmempty = false
return m

View File

@ -1,59 +0,0 @@
#!/bin/sh /etc/rc.common
START=19
MC=/tmp/macvlan_cleanup.sh
stop_macvlan() {
local idx=0
test -f $MC && {
cat $MC | while read line; do
echo "$idx#$line"
idx=$((idx+1))
done | sort -nr | while read line; do
cmd=`echo "$line" | cut -d"#" -f2`
$cmd
done
rm -f $MC
}
}
add_macvlan() {
local cfg="$1"
local ifname macvlan
local mvname
config_get ifname "$cfg" ifname
config_get macvlan "$cfg" macvlan
test -n "$ifname" || return 1
test -n "$macvlan" || return 1
mvname="mvlan${macvlan}${ifname}"
echo "$ifname" | grep -q "^[a-zA-Z].*\.[0-9]*$" && {
ifn=`echo $ifname | cut -d"." -f 1`
iid=`echo $ifname | cut -d"." -f 2`
test -n "$ifn" && ifconfig $ifn >/dev/null 2>&1 || return 1
ifconfig $ifn up 2>/dev/null
vconfig set_name_type DEV_PLUS_VID_NO_PAD
vconfig add $ifn $iid 2>/dev/null && echo ip link del $ifname >>$MC
mvname="mvlan${macvlan}${ifn}t${iid}"
}
ifconfig $ifname >/dev/null 2>&1 || return 1
ifconfig $ifname up 2>/dev/null
ip link add link $ifname $mvname type macvlan 2>/dev/null && echo ip link del $mvname >>$MC
}
start() {
stop_macvlan
echo -n >$MC
config_load macvlan
config_foreach add_macvlan macvlan
}
stop() {
stop_macvlan
}

View File

@ -1,11 +0,0 @@
#!/bin/sh
uci -q batch <<-EOF >/dev/null
delete ucitrack.@macvlan[-1]
add ucitrack macvlan
set ucitrack.@macvlan[-1].init=macvlan
commit ucitrack
EOF
rm -f /tmp/luci-indexcache
exit 0

View File

@ -1,43 +0,0 @@
include $(TOPDIR)/rules.mk
PKG_NAME:=luci-app-shadowsocksr-alex
PKG_VERSION=1.7.62
PKG_RELEASE:=1
PKG_MAINTAINER:=Alex Zhuo <1886090@gmail.com>
PKG_BUILD_DIR:=$(BUILD_DIR)/$(PKG_NAME)
include $(INCLUDE_DIR)/package.mk
define Package/$(PKG_NAME)
SECTION:=luci
CATEGORY:=LuCI
SUBMENU:=3. Applications
PKGARCH:=all
TITLE:=luci for shadowsocksR
DEPENDS:=+shadowsocks-libev-ss-local +shadowsocks-libev-ss-redir +shadowsocks-libev-ss-tunnel +dnsforwarder +ipset +ip +iptables-mod-tproxy +kmod-ipt-tproxy +iptables-mod-nat-extra +coreutils-nohup
endef
define Package/$(PKG_NAME)/description
A luci app for shadowsocksR (Alex Zhuo)
endef
define Build/Prepare
$(foreach po,$(wildcard ${CURDIR}/i18n/zh_Hans/*.po), \
po2lmo $(po) $(PKG_BUILD_DIR)/$(patsubst %.po,%.lmo,$(notdir $(po)));)
endef
define Build/Configure
endef
define Build/Compile
endef
define Package/$(PKG_NAME)/install
$(INSTALL_DIR) $(1)/usr/lib/lua/luci/i18n
$(INSTALL_DATA) $(PKG_BUILD_DIR)/shadowsocksR.*.lmo $(1)/usr/lib/lua/luci/i18n/
$(CP) ./files/* $(1)/
endef
$(eval $(call BuildPackage,$(PKG_NAME)))

View File

@ -1,98 +0,0 @@
OpenWrt LuCI for ShadowsocksR-libev
===
用于科学上网的智能透明代理工具
简介
---
本软件包是 [shadowsocksR-libev][openwrt-shadowsocksR] 的 LuCI 控制界面,自带GFWList国内路由表等分流功能。
特性
1、支持基于GFWList的智能分流
2、支持基于国内路由表的智能分流
3、支持国外翻回国内看优酷等视频网站
4、支持基于GFWList的智能DNS解析
5、支持`auth_sha1_v4`,`auth_aes128_md5`,`auth_aes128_sha1`,`auth_chain_a`等新型混淆协议,`none`加密协议,
6、支持混淆参数和协议参数
7、支持游戏模式全局+国内分流UDP转发
8、支持Adbyby和KoolProxy兼容模式
9、支持GFWlist黑名单和国内路由表手动更新
10、配合[dnsforwarder][dnsforwarder]实现TCP协议DNS代理解析
11、支持填写服务器域名或者服务器IP
12、可配合HAProxy实现多服务器负载均衡也可以设置多个备用服务器实现高可用[详情][haproxy]
13、可配合KCPTUN提高网络质量[详情][kcptun]
14、支持LAN访问控制Adbyby/KoolProxy模式需要配合以上二者自己的访问控制功能使用否则会冲突
15、支持一键升级国内路由表和GFWList
16、支持用户自定义强制走代理的IP强制不走代理的IP强制走代理的域名强制不走代理的域名
17、支持同时开启ss-local建立socks5代理端口
18、内置[Redsocks2][redsocks2]的支持可透明代理Socks4、Socks5、HTTP端口。Redsocks2需要另行编译
依赖
---
软件包的正常使用需要依赖 `iptables``ipset`用于流量重定向
`dnsforwarder`用于TCP协议请求DNS便于转发至SSR服务器请到[openwrt-dnsforwarder][dnsforwarder]编译
`ip-full` `iptables-mod-tproxy` `kmod-ipt-tproxy` `iptables-mod-nat-extra` 用于实现UDP转发
配置
---
软件包的配置文件路径: `/etc/config/shadowsocksr`
一般情况下只需填写服务器IP或者域名端口密码加密方式混淆协议即可使用默认的只能模式科学上网兼顾国内外分流。无需其他复杂操作
编译
---
从 OpenWrt 的 [SDK][openwrt-sdk] 编译
```bash
# 解压下载好的 SDK
tar xjf OpenWrt-SDK-ar71xx-for-linux-x86_64-gcc-4.8-linaro_uClibc-0.9.33.2.tar.bz2
cd OpenWrt-SDK-ar71xx-*
# Clone 项目
git clone https://github.com/AlexZhuo/luci-app-shadowsocksR.git package/luci-app-shadowsocksR
# 编译 po2lmo (如果有po2lmo可跳过)
pushd package/luci-app-shadowsocksR/tools/po2lmo
make && sudo make install
popd
# 选择要编译的包 NetWork -> LuCI -> luci-app-shadowsocksR
make menuconfig
# 开始编译
make package/luci-app-shadowsocksR/compile V=99
```
软件截图
---
![demo](https://github.com/AlexZhuo/luci-app-shadowsocksR/raw/master/screencapture1.png)
![demo](https://github.com/AlexZhuo/luci-app-shadowsocksR/raw/master/screencapture2.png)
[O]: http://www.right.com.cn/forum/thread-198649-1-1.html
[openwrt-shadowsocksR]: https://github.com/AlexZhuo/openwrt-shadowsocksr
[openwrt-sdk]: https://wiki.openwrt.org/doc/howto/obtain.firmware.sdk
[haproxy]: https://github.com/AlexZhuo/luci-app-haproxy-tcp
[kcptun]: https://github.com/AlexZhuo/luci-app-kcptun
[dnsforwarder]: https://github.com/AlexZhuo/openwrt-dnsforwarder
[redsocks2]: https://github.com/AlexZhuo/openwrt-redsocks2

View File

@ -1,17 +0,0 @@
config shadowsocksr
option gfwlist 'china-banned'
option safe_dns_tcp '0'
option enabled '0'
option server '1.2.3.4'
option server_port '443'
option password 'Alex666666'
option method 'rc4-md5'
option protocol 'origin'
option obfs 'plain'
option more '1'
option proxy_mode 'M'
option safe_dns '8.8.4.4'
option safe_dns_port '53'
option dns_mode 'tcp_gfwlist'
option adbyby '0'

View File

@ -1,614 +0,0 @@
#!/bin/sh /etc/rc.common
START=99
SS_REDIR_PORT=7070
SS_TUNNEL_PORT=7071
SS_LOCAL_PORT=7072
SS_REDIR_PIDFILE=/var/run/ssrr-redir-go.pid
SS_TUNNEL_PIDFILE=/var/run/ssrr-tunnel-go.pid
SS_LOCAL_PIDFILE=/var/run/ssrr-local-go.pid
PDNSD_LOCAL_PORT=5053 #alex:防止和单独的pdnsd服务冲突
SSR_CONF=/etc/ssrr/shadowsocksr.json
dnsforwarder_pid=/var/run/dnsforwarder/dns.pid
CRON_FILE=/etc/crontabs/root
vt_gfwlist=china-banned
vt_np_ipset="chinaip" # Must be global variable
vt_local_ipset="localip"
vt_remote_ipset="remoteip"
WHITE_SET=whiteset #强制不走代理的ipset
start()
{
local vt_enabled=`uci get ssrr.@shadowsocksr[0].enabled 2>/dev/null`
local vt_server_addr=`uci get ssrr.@shadowsocksr[0].server`
local vt_server_port=`uci get ssrr.@shadowsocksr[0].server_port`
local vt_password=`uci get ssrr.@shadowsocksr[0].password 2>/dev/null`
local vt_method=`uci get ssrr.@shadowsocksr[0].method 2>/dev/null`
local vt_protocol=`uci get ssrr.@shadowsocksr[0].protocol 2>/dev/null`
local vt_obfs=`uci get ssrr.@shadowsocksr[0].obfs 2>/dev/null`
local vt_obfs_param=`uci get ssrr.@shadowsocksr[0].obfs_param 2>/dev/null`
local vt_protocol_param=`uci get ssrr.@shadowsocksr[0].protocol_param 2>/dev/null`
local vt_timeout=`uci get ssrr.@shadowsocksr[0].timeout 2>/dev/null`
local vt_safe_dns=`uci get ssrr.@shadowsocksr[0].safe_dns 2>/dev/null`
local vt_safe_dns_port=`uci get ssrr.@shadowsocksr[0].safe_dns_port 2>/dev/null`
local vt_proxy_mode=`uci get ssrr.@shadowsocksr[0].proxy_mode 2>/dev/null`
local vt_dns_mode=`uci get ssrr.@shadowsocksr[0].dns_mode 2>/dev/null`
local adbyby=`uci get ssrr.@shadowsocksr[0].adbyby 2>/dev/null`
local white=`uci get ssrr.@shadowsocksr[0].white 2>/dev/null`
local tool=`uci get ssrr.@shadowsocksr[0].tool 2>/dev/null`
local red_type=`uci get ssrr.@shadowsocksr[0].red_type 2>/dev/null`
local username=`uci get ssrr.@shadowsocksr[0].username 2>/dev/null`
local enable_local=`uci get ssrr.@shadowsocksr[0].enable_local 2>/dev/null`
local ssr_local_port=`uci get ssrr.@shadowsocksr[0].ssr_local_port 2>/dev/null`
# $covered_subnets, $local_addresses are not required
local covered_subnets=`uci get ssrr.@shadowsocksr[0].covered_subnets 2>/dev/null`
local local_addresses=`uci get ssrr.@shadowsocksr[0].local_addresses 2>/dev/null`
# -----------------------------------------------------------------
if [ "$vt_enabled" = 0 ]; then
echo "WARNING: Shadowsocksr is disabled."
return 1
fi
if [ -z "$vt_server_addr" -o -z "$vt_server_port" ]; then
echo "WARNING: Shadowsocksr not fully configured, not starting."
return 1
fi
[ -z "$vt_proxy_mode" ] && vt_proxy_mode=S #默认是境外IP模式
[ -z "$vt_dns_mode" ] && vt_dns_mode=tcp_gfwlist #默认是GFWList的DNS模式
[ -z "$vt_method" ] && vt_method=table
[ -z "$vt_timeout" ] && vt_timeout=60
[ -z "$tool" ] && tool=ShadowsocksR
case "$vt_proxy_mode" in
M|S|G|GAME)
[ -z "$vt_safe_dns" ] && vt_safe_dns="208.67.222.222"
;;
esac
[ -z "$vt_safe_dns_port" ] && vt_safe_dns_port=443
# Get LAN settings as default parameters
[ -f /lib/functions/network.sh ] && . /lib/functions/network.sh
[ -z "$covered_subnets" ] && network_get_subnet covered_subnets lan
[ -z "$local_addresses" ] && network_get_ipaddr local_addresses lan
# -----------------------------------------------------------------
case "$tool" in
ShadowsocksR)
###### shadowsocksr ######
cat > $SSR_CONF <<EOF
{
"server": "$vt_server_addr",
"server_port": $vt_server_port,
"local_address": "0.0.0.0",
"password": "$vt_password",
"method": "$vt_method",
"timeout": "$vt_timeout",
"protocol": "$vt_protocol",
"protocol_param": "$vt_protocol_param",
"obfs": "$vt_obfs",
"obfs_param": "$vt_obfs_param",
"fast_open": false
}
EOF
sleep 1
/usr/bin/ssr-redir -c $SSR_CONF -u -b0.0.0.0 -l$SS_REDIR_PORT -s$vt_server_addr -p$vt_server_port \
-k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_REDIR_PIDFILE || return 1
[ $enable_local = 1 ] && [ "$ssr_local_port" -gt "1" ] && {
echo ssrr-local enabled!
/usr/bin/ssr-local -c $SSR_CONF -u -b0.0.0.0 -l$ssr_local_port -s$vt_server_addr -p$vt_server_port \
-k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_LOCAL_PIDFILE || return 1
}
;;
Shadowsocks)
cat > $SSR_CONF <<EOF
{
"server": "$vt_server_addr",
"server_port": $vt_server_port,
"local_address": "0.0.0.0",
"password": "$vt_password",
"method": "$vt_method",
"timeout": "$vt_timeout",
"fast_open": false
}
EOF
sleep 1
/usr/bin/ss-redir -c $SSR_CONF -u -b0.0.0.0 -l$SS_REDIR_PORT -s$vt_server_addr -p$vt_server_port \
-k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_REDIR_PIDFILE || return 1
[ $enable_local = 1 ] && [ "$ssr_local_port" -gt "1" ] && {
echo ssrr-local enabled!
/usr/bin/ss-local -u -b0.0.0.0 -l$ssr_local_port -s$vt_server_addr -p$vt_server_port \
-k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_LOCAL_PIDFILE || return 1
}
;;
Redsocks2)
cat > $SSR_CONF <<EOF
base {
log_debug = off;
log_info = on;
daemon = on;
redirector= iptables;
}
redsocks {
local_ip = 0.0.0.0;
local_port = $SS_REDIR_PORT;
ip = $vt_server_addr;
port = $vt_server_port;
type = $red_type;
autoproxy = 0;
timeout = 13;
EOF
[ ! -z $username ] && {
echo "login = $username;" >> $SSR_CONF
echo "password = $vt_password;" >> $SSR_CONF
}
echo "}" >> $SSR_CONF
if [ "$red_type" = "socks5" ]; then
echo enable redsocks udp
cat >> $SSR_CONF <<EOF
redudp {
local_ip = 0.0.0.0;
local_port = $SS_REDIR_PORT;
ip = $vt_server_addr;
port = $vt_server_port;
type = $red_type;
udp_timeout = 20;
EOF
[ ! -z $username ] && {
echo "login = $username;" >> $SSR_CONF
echo "password = $vt_password;" >> $SSR_CONF
}
echo "}" >> $SSR_CONF
fi
redsocks2 -c $SSR_CONF -p $SS_REDIR_PIDFILE || return 1
;;
esac
# IPv4 firewall rules
iptables -t nat -N ssrr_pre
iptables -t nat -F ssrr_pre
iptables -t mangle -N SSRUDP
iptables -t mangle -F SSRUDP
china_file="/etc/ssrr/china_route"
user_local_file="/etc/ssrr/user_local_ip"
user_remote_file="/etc/ssrr/user_remote_ip"
[ -f $user_local_file ] && {
echo add local ip $user_local_file $vt_local_ipset
ipset create $vt_local_ipset hash:net family inet hashsize 1024 maxelem 65536
awk '{system("ipset add localip "$0)}' $user_local_file
}
[ -f $user_remote_file ] && {
echo add remote ip $user_remote_file $vt_remote_ipset
ipset create $vt_remote_ipset hash:net family inet hashsize 1024 maxelem 65536
awk '{system("ipset add remoteip "$0)}' $user_remote_file
}
[ -f $china_file ] && {
ipset create $vt_np_ipset hash:net family inet hashsize 1024 maxelem 65536
}
iptables -t nat -A ssrr_pre -m set --match-set $vt_local_ipset dst -j RETURN || { #应对没有安装ipset的用户
iptables -t nat -A ssrr_pre -d 10.0.0.0/8 -j RETURN
iptables -t nat -A ssrr_pre -d 127.0.0.0/8 -j RETURN
iptables -t nat -A ssrr_pre -d 172.16.0.0/12 -j RETURN
iptables -t nat -A ssrr_pre -d 192.168.0.0/16 -j RETURN
iptables -t nat -A ssrr_pre -d 127.0.0.0/8 -j RETURN
iptables -t nat -A ssrr_pre -d 224.0.0.0/3 -j RETURN
}
iptables -t mangle -A SSRUDP -m set --match-set $vt_local_ipset dst -j RETURN || { #应对没有安装ipset的用户
iptables -t mangle -A SSRUDP -d 10.0.0.0/8 -j RETURN
iptables -t mangle -A SSRUDP -d 127.0.0.0/8 -j RETURN
iptables -t mangle -A SSRUDP -d 172.16.0.0/12 -j RETURN
iptables -t mangle -A SSRUDP -d 192.168.0.0/16 -j RETURN
iptables -t mangle -A SSRUDP -d 127.0.0.0/8 -j RETURN
iptables -t mangle -A SSRUDP -d 224.0.0.0/3 -j RETURN
}
if [ "$white" = 1 ]; then #强制不代理域名
ipset create $WHITE_SET hash:net family inet hashsize 1024 maxelem 65536 2>/dev/null
iptables -t nat -A ssrr_pre -m set --match-set $WHITE_SET dst -j RETURN
iptables -t mangle -A SSRUDP -m set --match-set $WHITE_SET dst -j RETURN
fi
ip rule add fwmark 1 lookup 100
ip route add local default dev lo table 100
iptables -t nat -A ssrr_pre -d $vt_server_addr -j RETURN
iptables -t nat -A ssrr_pre -p tcp -m set --match-set $vt_remote_ipset dst -j REDIRECT --to $SS_REDIR_PORT #强制走代理的IP
iptables -t mangle -A SSRUDP -d $vt_server_addr -j RETURN
iptables -t mangle -A SSRUDP -p udp --dport 53 -j RETURN
COUNTER=0 #添加内网访问控制
while true
do
local host=`uci get ssrr.@lan_hosts[$COUNTER].host 2>/dev/null`
local lan_enable=`uci get ssrr.@lan_hosts[$COUNTER].enable 2>/dev/null`
local mType=`uci get ssrr.@lan_hosts[$COUNTER].type 2>/dev/null`
if [ -z "$host" ] || [ -z "$mType" ]; then
echo $COUNTER lan devices
break
fi
echo now is $host
COUNTER=$(($COUNTER+1))
if [ "$lan_enable" = "0" ]; then
continue
fi
case $mType in
direct)
iptables -t nat -A ssrr_pre -s $host -j RETURN
iptables -t mangle -A SSRUDP -s $host -j RETURN
;;
gfwlist)
ipset create $vt_gfwlist hash:net family inet hashsize 1024 maxelem 65536 2>/dev/null
iptables -t nat -A ssrr_pre -s $host -m set ! --match-set $vt_gfwlist dst -j RETURN
iptables -t nat -A ssrr_pre -s $host -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t nat -A ssrr_pre -s $host -p udp --dport 53 -j REDIRECT --to-ports 53
iptables -t mangle -A SSRUDP -s $host -j RETURN
echo this $host is gfwlist
#开启dnsforwarder
start_dnsforwarder "$vt_safe_dns" "$vt_dns_mode"
;;
nochina)#绕过中国大陆IP
iptables -t nat -A ssrr_pre -s $host -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -s $host -j RETURN
#开启dnsforwarder
start_dnsforwarder "$vt_safe_dns" "$vt_dns_mode"
;;
oversea)#只代理中国大陆IP
iptables -t nat -A ssrr_pre -s $host -m set ! --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -s $host -j RETURN
;;
game)
iptables -t nat -A ssrr_pre -s $host -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -s $host -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -s $host -p udp -j TPROXY --on-port $SS_REDIR_PORT --tproxy-mark 0x01/0x01
;;
all)
;;
esac
iptables -t nat -A ssrr_pre -s $host -p tcp -j REDIRECT --to $SS_REDIR_PORT #内网访问控制
done
case "$vt_proxy_mode" in
G) #全局
;;
S)#alex:所有境外IP
iptables -t nat -A ssrr_pre -m set --match-set $vt_np_ipset dst -j RETURN
;;
M)#alex:gfwlist
ipset create $vt_gfwlist hash:net family inet hashsize 1024 maxelem 65536 2>/dev/null
iptables -t nat -A ssrr_pre -m set ! --match-set $vt_gfwlist dst -j RETURN
iptables -t nat -A ssrr_pre -m set --match-set $vt_np_ipset dst -j RETURN
;;
V)#ross:只代理中国大陆IP
iptables -t nat -A ssrr_pre -m set ! --match-set $vt_np_ipset dst -j RETURN
;;
GAME)#alex:游戏模式
iptables -t nat -A ssrr_pre -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t mangle -A SSRUDP -p udp -j TPROXY --on-port $SS_REDIR_PORT --tproxy-mark 0x01/0x01
;;
DIRECT)#alex添加访问控制
iptables -t nat -A ssrr_pre -p tcp -j RETURN
;;
esac
local subnet
for subnet in $covered_subnets; do
iptables -t nat -A ssrr_pre -s $subnet -p tcp -j REDIRECT --to $SS_REDIR_PORT
done
if [ "$adbyby" = '1' ];then
iptables -t nat -A OUTPUT -p tcp -m multiport --dports 80,443 -j ssrr_pre
PR_NU=`iptables -nvL PREROUTING -t nat |sed 1,2d | sed -n '/KOOLPROXY/='`
if [ -z "$PR_NU" ]; then
PR_NU=1
else
let PR_NU+=1
fi
iptables -t nat -I PREROUTING $PR_NU -j ssrr_pre
else
iptables -t nat -I prerouting_rule -j ssrr_pre
fi
iptables -t mangle -A PREROUTING -j SSRUDP
# -----------------------------------------------------------------
###### Anti-pollution configuration ######
case "$vt_dns_mode" in
tcp_gfwlist)
start_dnsforwarder "$vt_safe_dns" "$vt_dns_mode"
;;
tcp_proxy)
start_dnsforwarder "$vt_safe_dns" "$vt_dns_mode"
;;
tunnel_gfwlist) #废弃
case "$tool" in
ShadowsocksR)
/usr/bin/ssr-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
awk -vs="127.0.0.1#$SS_TUNNEL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/$vt_gfwlist > /var/etc/dnsmasq-go.d/01-pollution.conf
awk -vs="127.0.0.1#$PDNSD_LOCAL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/userlist >> /var/etc/dnsmasq-go.d/01-pollution.conf
uci set dhcp.@dnsmasq[0].resolvfile=/tmp/resolv.conf.auto
uci delete dhcp.@dnsmasq[0].noresolv
uci commit dhcp
;;
Shadowsocks)
/usr/bin/ss-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
awk -vs="127.0.0.1#$SS_TUNNEL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/$vt_gfwlist > /var/etc/dnsmasq-go.d/01-pollution.conf
awk -vs="127.0.0.1#$PDNSD_LOCAL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/userlist >> /var/etc/dnsmasq-go.d/01-pollution.conf
uci set dhcp.@dnsmasq[0].resolvfile=/tmp/resolv.conf.auto
uci delete dhcp.@dnsmasq[0].noresolv
uci commit dhcp
;;
Redsocks2)
/usr/bin/ssr-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
awk -vs="127.0.0.1#$SS_TUNNEL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/$vt_gfwlist > /var/etc/dnsmasq-go.d/01-pollution.conf
awk -vs="127.0.0.1#$PDNSD_LOCAL_PORT" '!/^$/&&!/^#/{printf("server=/%s/%s\n",$0,s)}' \
/etc/gfwlist/userlist >> /var/etc/dnsmasq-go.d/01-pollution.conf
uci set dhcp.@dnsmasq[0].resolvfile=/tmp/resolv.conf.auto
uci delete dhcp.@dnsmasq[0].noresolv
uci commit dhcp
;;
esac
;;
safe_only) #直接全部发到用户指定安全DNS
iptables -t nat -A ssrr_pre --dport 53 -j DNAT --to-destination $vt_safe_dns:$vt_safe_dns_port
;;
tunnel_all) #废弃
case "$tool" in
ShadowsocksR)
/usr/bin/ssr-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
echo server=127.0.0.1#$SS_TUNNEL_PORT > /var/etc/dnsmasq-go.d/01-pollution.conf
uci delete dhcp.@dnsmasq[0].resolvfile
uci set dhcp.@dnsmasq[0].noresolv=1
uci commit dhcp
;;
Shadowsocks)
/usr/bin/ss-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
echo server=127.0.0.1#$SS_TUNNEL_PORT > /var/etc/dnsmasq-go.d/01-pollution.conf
uci delete dhcp.@dnsmasq[0].resolvfile
uci set dhcp.@dnsmasq[0].noresolv=1
uci commit dhcp
;;
Redsocks2)
/usr/bin/ssr-tunnel -c $SSR_CONF -u -b0.0.0.0 -l$SS_TUNNEL_PORT -s$vt_server_addr -p$vt_server_port -k"$vt_password" -m$vt_method -t$vt_timeout -f $SS_TUNNEL_PIDFILE -L $vt_safe_dns:$vt_safe_dns_port
echo server=127.0.0.1#$SS_TUNNEL_PORT > /var/etc/dnsmasq-go.d/01-pollution.conf
uci delete dhcp.@dnsmasq[0].resolvfile
uci set dhcp.@dnsmasq[0].noresolv=1
uci commit dhcp
;;
esac
;;
esac
local ipcount=`ipset list $vt_np_ipset | wc -l`
echo china ips count is $ipcount
[ $ipcount -lt "100" ] && {
echo add china ip $china_file $vt_np_ipset
awk '{system("ipset add chinaip "$0)}' $china_file
}
if [ "$vt_enabled" = 1 ]; then
start_cron
fi
}
stop()
{
# -----------------------------------------------------------------
if iptables -t nat -F ssrr_pre 2>/dev/null; then
while iptables -t nat -D prerouting_rule -j ssrr_pre 2>/dev/null; do :; done
while iptables -t nat -D PREROUTING -j ssrr_pre 2>/dev/null; do :; done
while iptables -t nat -D OUTPUT -p tcp -m multiport --dports 80,443 -j ssrr_pre 2>/dev/null; do :; done
iptables -t nat -X ssrr_pre 2>/dev/null
fi
#alex:添加游戏模式
if iptables -t mangle -F SSRUDP 2>/dev/null; then
while iptables -t mangle -D PREROUTING -j SSRUDP 2>/dev/null; do :; done
iptables -t mangle -X SSRUDP 2>/dev/null
fi
echo clearing ipset
ipset destroy $vt_local_ipset
ipset destroy $vt_remote_ipset
[ $keep_chinaip = 0 ] && ipset destroy $vt_np_ipset
stop_dnsforwarder
if [ -f $SS_REDIR_PIDFILE ]; then
kill -9 `cat $SS_REDIR_PIDFILE`
rm -f $SS_REDIR_PIDFILE
fi
if [ -f $SS_TUNNEL_PIDFILE ]; then
kill -9 `cat $SS_TUNNEL_PIDFILE`
rm -f $SS_TUNNEL_PIDFILE
fi
if [ -f $SS_LOCAL_PIDFILE ]; then
kill -9 `cat $SS_LOCAL_PIDFILE`
rm -f $SS_LOCAL_PIDFILE
fi
stop_cron
}
keep_chinaip=0
restart()
{
keep_chinaip=1
stop
start
}
# $1: upstream DNS server
start_dnsforwarder()
{
echo reday to start dnsforwarder by ssr
local safe_dns="$1"
local dns_mode="$2"
case "$dns_mode" in
tcp_gfwlist)
if iptables -t nat -N pdnsd_output; then
echo gfwlist dns mode
iptables -t nat -A pdnsd_output -p tcp -j REDIRECT --to $SS_REDIR_PORT
iptables -t nat -I OUTPUT -p tcp --dport 53 -j pdnsd_output
iptables -t nat -A ssrr_pre -p udp --dport 53 -j REDIRECT --to-ports 53
fi
;;
tcp_proxy)
if iptables -t nat -N pdnsd_output; then
echo gfwlist dns mode
iptables -t nat -A pdnsd_output -m set --match-set $vt_np_ipset dst -j RETURN
iptables -t nat -A pdnsd_output -p tcp -j REDIRECT --to $SS_REDIR_PORT
iptables -t nat -I OUTPUT -p tcp --dport 53 -j pdnsd_output
iptables -t nat -A ssrr_pre -p udp --dport 53 -j REDIRECT --to-ports $PDNSD_LOCAL_PORT
fi
;;
esac
uci set dnsforwarder.@arguments[0].enabled=1
uci set dnsforwarder.@arguments[0].dnsmasq=1
uci set dnsforwarder.@arguments[0].addr=127.0.0.1:$PDNSD_LOCAL_PORT
uci set dnsforwarder.@arguments[0].mode=gfw_user
uci set dnsforwarder.@arguments[0].ipset=1
uci set dnsforwarder.@arguments[0].ipset_name=china-banned
[ "$white" = 1 ] && { #启用强制不代理列表
uci set dnsforwarder.@arguments[0].white=1
uci set dnsforwarder.@arguments[0].whiteset=$WHITE_SET
uci set dnsforwarder.@arguments[0].whitedns=114.114.114.114
}
uci commit dnsforwarder
dns_pid1=`ps | awk '$5 ~ /\[dnsforwarder\]/ {print $1}'`
dns_pid2=`cat $dnsforwarder_pid 2>/dev/null`
[ "$dns_pid1" -gt 1 ] && {
echo dnsforwarder is running,need not start!
return
}
[ "$dns_pid2" -gt 1 ] && {
echo dnsforwarder has been started,need not start!
return
}
echo safe dns = $safe_dns dns mode is $dns_mode
local white=`uci get ssrr.@shadowsocksr[0].white 2>/dev/null`
local tcp_dns_list="1.0.0.1,1.1.1.1,4.2.2.1,4.2.2.2,8.8.4.4,8.8.8.8,9.9.9.9,208.67.222.222,208.67.220.220,1.0.0.1:443,1.1.1.1:443,1.0.0.1:853,1.1.1.1:853,208.67.222.222:443,208.67.220.220:443,208.67.222.222:5353,208.67.220.220:5353,2001:4860:4860::8844,2001:4860:4860::8888,2606:4700:4700::1111,2606:4700:4700::1001" #alex:给pdnsd使用的可靠的国外dns服务器
case "$dns_mode" in
tcp_gfwlist)
[ -n "$safe_dns" ] && tcp_dns_list="$safe_dns,$tcp_dns_list"
safe_dns="114.114.114.114,223.5.5.5,114.114.115.115,223.6.6.6"
;;
tcp_proxy)
[ -n "$safe_dns" ] && tcp_dns_list="$safe_dns,$tcp_dns_list"
;;
esac
[ ! -f "/etc/dnsforwarder/dnsforwarder.conf.bak" ] && {
cp /etc/dnsforwarder/dnsforwarder.conf /etc/dnsforwarder/dnsforwarder.conf.bak
}
cat > /etc/dnsforwarder/dnsforwarder.conf <<EOF
LogOn true
LogFileThresholdLength 102400
LogFileFolder /var/log
UDPLocal 0.0.0.0:$PDNSD_LOCAL_PORT
TCPGroup $tcp_dns_list * no
GroupFile
BlockIP 243.185.187.39,46.82.174.68,37.61.54.158,93.46.8.89,59.24.3.173,203.98.7.65,8.7.198.45,78.16.49.15,159.106.121.75,69.63.187.12,31.13.76.8,31.13.64.49
IPSubstituting
BlockNegativeResponse false
Hosts
HostsUpdateInterval 18000
HostsDownloadPath
HostsScript
HostsRetryInterval 30
AppendHosts
BlockIpv6WhenIpv4Exists false
UseCache true
CacheSize 1048576
MemoryCache true
CacheFile
IgnoreTTL false
OverrideTTL -1
MultipleTTL 1
ReloadCache false
OverwriteCache false
DisabledType
DisabledDomain
DisabledList
DomainStatistic false
DomainStatisticTempletFile
StatisticUpdateInterval 29
EOF
/etc/init.d/dnsforwarder restart
}
stop_dnsforwarder()
{
if iptables -t nat -F pdnsd_output 2>/dev/null; then
while iptables -t nat -D OUTPUT -p tcp --dport 53 -j pdnsd_output 2>/dev/null; do :; done
iptables -t nat -X pdnsd_output 2>/dev/null
fi
uci set dnsforwarder.@arguments[0].enabled=0
uci set dnsforwarder.@arguments[0].dnsmasq=0
uci commit dnsforwarder
/etc/init.d/dnsforwarder restart
[ -f "/etc/dnsforwarder/dnsforwarder.conf.bak" ] && cp /etc/dnsforwarder/dnsforwarder.conf.bak /etc/dnsforwarder/dnsforwarder.conf
rm -f /etc/dnsforwarder/dnsforwarder.conf.bak
}
start_cron(){
sed -i '/shadowsocksr_watchdog.log/d' $CRON_FILE
echo '0 */1 * * * /etc/ssrr/ssrr_watchdog >> /tmp/shadowsocksr_watchdog.log 2>&1' >> $CRON_FILE
echo '0 1 * * 0 echo "" > /tmp/shadowsocksr_watchdog.log' >> $CRON_FILE
crontab $CRON_FILE
}
stop_cron(){
sed -i '/shadowsocksr_watchdog.log/d' $CRON_FILE
/etc/init.d/cron restart
}

View File

@ -1,14 +0,0 @@
{
"server": "hk1.betaclouds.org",
"server_port": 14094,
"local_address": "0.0.0.0",
"local_port": 7070,
"password": "a9edF2",
"method": "chacha20",
"timeout": "60",
"protocol": "auth_chain_a",
"protocol_param": "",
"obfs": "tls1.2_ticket_auth",
"obfs_param": "",
"fast_open": false
}

View File

@ -1,18 +0,0 @@
#!/bin/sh
LOGTIME=$(date "+%Y-%m-%d %H:%M:%S")
/usr/bin/wget --spider --quiet --tries=1 --timeout=3 www.gstatic.com/generate_204
if [ "$?" == "0" ]; then
echo '['$LOGTIME'] shadowsocksr No Problem.'
else
/usr/bin/wget --spider --quiet --tries=1 --timeout=3 www.baidu.com
if [ "$?" == "0" ]; then
echo '['$LOGTIME'] Problem decteted, restarting shadowsocksr...'
/etc/init.d/ssrr restart
else
echo '['$LOGTIME'] Network Problem. Do nothing.'
fi
fi

View File

@ -1,22 +0,0 @@
#!/bin/sh
TMP_DIR="/tmp"
SSR_DIR="/etc/ssrr"
FILE_NAME="china_route"
echo ready to update china route!!
logger -t alex ready to update china route!!
wget -O- 'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest' | grep ipv4 | grep CN | awk -F\| '{ printf("%s/%d\n", $4, 32-log($5)/log(2)) }' > $TMP_DIR/$FILE_NAME
[ -f $TMP_DIR/$FILE_NAME ] && {
count=`grep "" -c $TMP_DIR/$FILE_NAME`
if [ "$count" -gt "1000" ];then
echo download completed!
logger -t alex echo download completed!
cp $TMP_DIR/$FILE_NAME $SSR_DIR/
echo update completed!
logger -t alex update completed!
rm -f $TMP_DIR/$FILE_NAME
fi
} || {
logger -t alex download failed!
echo download failed!
exit
}

View File

@ -1,6 +0,0 @@
172.17.0.0/24
10.0.0.0/8
127.0.0.0/8
172.16.0.0/12
192.168.0.0/16
224.0.0.0/3

View File

@ -1,7 +0,0 @@
149.154.160.0/20
149.154.164.0/22
149.154.168.0/21
67.198.55.0/24
91.108.4.0/22
91.108.56.0/22
109.239.140.0/24

View File

@ -1,10 +0,0 @@
module("luci.controller.ssrr", package.seeall)
function index()
if not nixio.fs.access("/etc/config/ssrr") then
return
end
entry({"admin", "services", "ssrr"},alias("admin", "services", "ssrr","general"),_("ShadowsocksR")).dependent = true
entry({"admin", "services", "ssrr","general"}, cbi("shadowsocksr/general"),_("General"),10).leaf = true
entry({"admin", "services", "ssrr","gfwlist"}, cbi("shadowsocksr/gfwlist"),_("GFWlist"),20).leaf = true
end

View File

@ -1,221 +0,0 @@
--[[
Shadowsocksr LuCI Configuration Page.
References:
https://github.com/ravageralpha/my_openwrt_mod - by RA-MOD
http://www.v2ex.com/t/139438 - by imcczy
https://github.com/rssnsj/network-feeds - by Justin Liu
]]--
local fs = require "nixio.fs"
local state_msg = ""
local ssr_redir_on = (luci.sys.call("pidof ssr-redir > /dev/null") == 0)
local ss_redir_on = (luci.sys.call("pidof ss-redir > /dev/null") == 0)
local redsocks2_on = (luci.sys.call("pidof redsocks2 > /dev/null") == 0)
if ssr_redir_on then
state_msg = "<b><font color=\"green\">" .. translate("SSR is Running") .. "</font></b>"
elseif ss_redir_on then
state_msg = "<b><font color=\"green\">" .. translate("SS is Running") .. "</font></b>"
elseif redsocks2_on then
state_msg = state_msg .. "<b> <font color=\"green\">" .. translate("Redsocks2 is Running") .. "</font></b>"
else
state_msg = "<b><font color=\"red\">" .. translate("Not running") .. "</font></b>"
end
m = Map("ssrr", translate("Shadowsocksr Transparent Proxy"),
translate("A fast secure tunnel proxy that help you get through firewalls on your router").."<br><br>状态 - "..state_msg)
s = m:section(TypedSection, "shadowsocksr", translate("Settings"))
s.anonymous = true
-- ---------------------------------------------------
switch = s:option(Flag, "enabled", translate("Enable"))
switch.rmempty = false
server = s:option(Value, "server", translate("Server Address"))
server.optional = false
server.datatype = "host"
server.rmempty = false
server_port = s:option(Value, "server_port", translate("Server Port"))
server_port.datatype = "range(1,65535)"
server_port.optional = false
server_port.rmempty = false
tool = s:option(ListValue, "tool", translate("Proxy Tool"))
tool:value("ShadowsocksR")
tool:value("Shadowsocks")
tool:value("Redsocks2")
red_type=s:option(ListValue,"red_type",translate("Proxy Server Type"))
red_type:value("socks5",translate("Socks5"))
red_type:value("socks4",translate("Socks4代理"))
red_type:value("http-relay",translate("http代理"))
red_type:value("http-connect",translate("Https代理"))
red_type:depends({tool="Redsocks2"})
username = s:option(Value, "username", translate("Proxy User Name"))
username:depends({tool="Redsocks2"})
password = s:option(Value, "password", translate("Password"))
password.password = true
method = s:option(ListValue, "method", translate("Encryption Method"))
method:depends("tool","ShadowsocksR")
method:depends("tool","Shadowsocks")
method:value("table")
method:value("none")
method:value("aes-128-ctr")
method:value("aes-192-ctr")
method:value("aes-256-ctr")
method:value("aes-128-cfb")
method:value("aes-192-cfb")
method:value("aes-256-cfb")
method:value("aes-128-gcm")
method:value("aes-192-gcm")
method:value("aes-256-gcm")
method:value("rc4")
method:value("rc4-md5")
method:value("rc4-md5-6")
method:value("salsa20")
method:value("chacha20")
method:value("chacha20-ietf")
method:value("chacha20-ietf-poly1305")
method:value("xchacha20-ietf-poly1305")
protocol = s:option(ListValue, "protocol", translate("Protocol"))
protocol:depends("tool","ShadowsocksR")
protocol:value("origin")
protocol:value("verify_deflate")
protocol:value("auth_sha1_v4")
protocol:value("auth_aes128_md5")
protocol:value("auth_aes128_sha1")
protocol:value("auth_chain_a")
protocol:value("auth_chain_b")
protocol:value("auth_chain_c")
protocol:value("auth_chain_d")
protocol:value("auth_chain_e")
protocol:value("auth_chain_f")
protocol_param = s:option(Value, "protocol_param", translate("Protocol Param"),
translate("leave it empty is well"))
protocol_param:depends({tool="ShadowsocksR"})
obfs = s:option(ListValue, "obfs", translate("Obfs"))
obfs:depends("tool","ShadowsocksR")
obfs:value("plain")
obfs:value("http_simple")
obfs:value("http_post")
obfs:value("random_head")
obfs:value("tls1.2_ticket_auth")
obfs:value("tls1.2_ticket_fastauth")
obfs_param= s:option(Value, "obfs_param", translate("Obfs Param"),
translate("leave it empty is well"))
obfs_param:depends({tool="ShadowsocksR"})
enable_local = s:option(Flag, "enable_local", translate("Enable ssr-local") ,translate("Open ssr-local port as well"))
enable_local.rmempty = false
enable_local:depends("tool","ShadowsocksR")
enable_local:depends("tool","Shadowsocks")
ssr_local_port = s:option(Value, "ssr_local_port", translate("ssr-local port"))
ssr_local_port:depends("enable_local","1")
ssr_local_port.default="1080"
s:option(Flag, "more", translate("More Options"),
translate("Options for advanced users"))
timeout = s:option(Value, "timeout", translate("Timeout"))
timeout.datatype = "range(0,10000)"
timeout.placeholder = "60"
timeout.optional = false
timeout:depends("more", "1")
-- fast_open = s:option(Flag, "fast_open", translate("TCP Fast Open"),
-- translate("Enable TCP fast open, only available on kernel > 3.7.0"))
proxy_mode = s:option(ListValue, "proxy_mode", translate("Proxy Mode"),
translate("GFW-List mode requires flushing DNS cache") .. "<br /> " ..
"<a href=\"" .. luci.dispatcher.build_url("admin", "services","dnsforwarder","gfwlist") .. "\">" ..
translate("Click here to customize your GFW-List") ..
"</a>")
proxy_mode:value("S", translate("All non-China IPs"))
proxy_mode:value("M", translate("GFW-List based auto-proxy"))
proxy_mode:value("V", translate("Oversea Mode"))
proxy_mode:value("G", translate("All Public IPs"))
proxy_mode:value("GAME", translate("Game Mode"))--alex:添加游戏模式
proxy_mode:value("DIRECT", translate("Direct (No Proxy)"))--alex:添加访问控制
proxy_mode:depends("more", "1")
safe_dns = s:option(Value, "safe_dns", translate("Safe DNS"),
translate("recommend OpenDNS"))
safe_dns.datatype = "ip4addr"
safe_dns.optional = false
safe_dns.placeholder = "8.8.4.4"
safe_dns:depends("more", "1")
safe_dns_port = s:option(Value, "safe_dns_port", translate("Safe DNS Port"),
translate("Foreign DNS on UDP port 53 might be polluted"))
safe_dns_port.datatype = "range(1,65535)"
safe_dns_port.placeholder = "53"
safe_dns_port.optional = false
safe_dns_port:depends("more", "1")
dns_mode = s:option(ListValue, "dns_mode", translate("DNS Mode"),
translate("Suggest using GFW-List based auto-proxy"))
dns_mode:value("tcp_gfwlist", translate("GFW-List based auto-proxy"))
dns_mode:value("tcp_proxy", translate("Remote TCP mode"))
dns_mode:value("safe_only", translate("Local safe DNS"))
dns_mode:value("local", translate("System default"))
dns_mode:depends("more", "1")
adbyby=s:option(Flag,"adbyby",translate("配合Adbyby或koolproxy使用"),translate("未开启Adbyby或koolproxy时请不要勾选此项"))
adbyby:depends("more", "1")
adbyby.rmempty=false
whitedomin=s:option(Flag,"white",translate("启用强制不代理网站列表"),translate("需要配合dnsforwarder的强制不代理列表使用"))
whitedomin:depends("more", "1")
-- [[ LAN Hosts ]]--
s = m:section(TypedSection, "lan_hosts", translate("LAN Hosts"))
s.template = "cbi/tblsection"
s.addremove = true
s.anonymous = true
o = s:option(Value, "host", translate("Host"))
luci.ip.neighbors({family = 4}, function(neighbor)
if neighbor.reachable then
o:value(neighbor.dest:string(), "%s (%s)" %{neighbor.dest:string(), neighbor.mac})
end
end)
o.datatype = "ip4addr"
o.rmempty = false
o = s:option(ListValue, "type", translate("Proxy Mode"))
o:value("direct", translate("Direct (No Proxy)"))
o:value("normal", translate("Normal"))
o:value("gfwlist", translate("GFW-List based auto-proxy"))
o:value("nochina", translate("All non-China IPs"))
o:value("oversea", translate("Oversea Mode"))
o:value("game", translate("Game Mode"))
o:value("all", translate("All Public IPs"))
o.rmempty = false
o = s:option(Flag, "enable", translate("Enable"))
o.default = "1"
o.rmempty = false
-- ---------------------------------------------------
local apply = luci.http.formvalue("cbi.apply")
if apply then
os.execute("/etc/init.d/ssrr restart >/dev/null 2>&1 &")
end
return m

View File

@ -1,69 +0,0 @@
local fs = require "nixio.fs"
local china_file = "/etc/ssrr/china_route"
local user_local_file = "/etc/ssrr/user_local_ip"
local user_remote_file = "/etc/ssrr/user_remote_ip"
function sync_value_to_file(value, file)
value = value:gsub("\r\n?", "\n")
local old_value = nixio.fs.readfile(file)
if value ~= old_value then
nixio.fs.writefile(file, value)
end
end
m = Map("ssrr", translate("路由表"),translate("指定国内外路由表,并且可以设置强制走和不走的网段"))
s = m:section(TypedSection, "shadowsocksr", translate("Settings"))
s.anonymous = true
button_update_route = s:option (Button, "_button_update_chinaroute", translate("更新国内路由表"),translate("点击后请静待30秒,如非特殊需要,不用更新该表"))
local route_count = luci.sys.exec("grep -c '' " .. china_file)
button_update_route.inputtitle = translate ( "当前规则数目" .. route_count .. ",点击更新")
button_update_route.inputstyle = "apply"
function button_update_route.write (self, section, value)
luci.sys.call ( "nohup sh /etc/ssrr/update_chinaroute.sh > /tmp/gfwupdate.log 2>&1 &")
end
china_route = s:option(TextValue, "china_route", translate("国内IP网段"), nil)
china_route.description = translate("该列表是国内外分流的主要依据,内容会随着更新而被覆盖")
china_route.rows = 13
china_route.wrap = "off"
china_route.cfgvalue = function(self, section)
return fs.readfile(china_file) or ""
end
china_route.write = function(self, section, value)
fs.writefile(china_file, value:gsub("\r\n", "\n"))
end
user_local = s:option(TextValue, "user_local", translate("强制不走代理的网段"), nil)
user_local.description = translate("请不要随意删除,请填写内网网段")
user_local.rows = 13
user_local.wrap = "off"
user_local.cfgvalue = function(self, section)
return fs.readfile(user_local_file) or ""
end
user_local.write = function(self, section, value)
fs.writefile(user_local_file, value:gsub("\r\n", "\n"))
end
user_remote = s:option(TextValue, "user_remote", translate("强制走代理的网段"), nil)
user_remote.description = translate("该规则优先权低于强制不走IP的网段,一般需要填写telegram这样软件服务器的IP")
user_remote.rows = 13
user_remote.wrap = "off"
user_remote.cfgvalue = function(self, section)
return fs.readfile(user_remote_file) or ""
end
user_remote.write = function(self, section, value)
fs.writefile(user_remote_file, value:gsub("\r\n", "\n"))
end
-- ---------------------------------------------------
local apply = luci.http.formvalue("cbi.apply")
if apply then
os.execute("/etc/init.d/ssrr restart >/dev/null 2>&1 &")
end
return m

View File

@ -1,161 +0,0 @@
msgid "Settings"
msgstr "配置"
msgid "Enable"
msgstr "启用"
msgid "Server Address"
msgstr "服务器地址"
msgid "Server Port"
msgstr "服务器端口"
msgid "Password"
msgstr "密码"
msgid "Encryption Method"
msgstr "加密方式"
msgid "Protocol"
msgstr "混淆协议"
msgid "Obfs"
msgstr "混淆方式"
msgid "Clear Client Log"
msgstr "清理客户端日志"
msgid "Timeout"
msgstr "超时时间(秒)"
msgid "Proxy Mode"
msgstr "代理方式"
msgid "GFW-List based auto-proxy"
msgstr "基于GFWList的自动代理"
msgid "All non-China IPs"
msgstr "所有非大陆IP"
msgid "All Public IPs"
msgstr "全局代理"
msgid "Watching Youku overseas"
msgstr "海外看优酷"
msgid "Game Mode"
msgstr "游戏模式"
msgid "Clients List"
msgstr "客户端列表"
msgid "Safe DNS"
msgstr "安全DNS服务器地址"
msgid "recommend OpenDNS"
msgstr "建议使用OpenDNS的服务器"
msgid "Safe DNS Port"
msgstr "安全DNS的端口"
msgid "Foreign DNS on UDP port 53 might be polluted"
msgstr "使用53端口可能会被污染"
msgid "DNS Mode"
msgstr "DNS 解析模式"
msgid "Protocol Param"
msgstr "协议参数"
msgid "Obfs Param"
msgstr "混淆参数"
msgid "userlist"
msgstr "用户自定义网站黑名单"
msgid "china-banned"
msgstr "GFWList黑名单"
msgid "unblock-youku"
msgstr "海外看视频网站名单"
msgid "Domain Lists"
msgstr "黑/白名单列表"
msgid "Running"
msgstr "运行中"
msgid "More Options"
msgstr "更多选项"
msgid "Not running"
msgstr "未运行"
msgid "Shadowsocksr Transparent Proxy"
msgstr "影梭透明代理"
msgid "A fast secure tunnel proxy that help you get through firewalls on your router"
msgstr "一个帮你翻墙的快速代理工具"
msgid "Domain Lists Settings"
msgstr "黑/白名单配置"
msgid "ShadowsocksR"
msgstr "影梭SSR"
msgid "General"
msgstr "代理设置"
msgid "GFWlist"
msgstr "黑/白名单"
msgid "Options for advanced users"
msgstr "供高级玩家使用,普通用户不推荐修改"
msgid "Click here to customize your GFW-List"
msgstr "点击此处自定义网站黑名单"
msgid "GFW-List mode requires flushing DNS cache"
msgstr "GFWList模式需要客户端清空DNS缓存"
msgid "Direct (No Proxy)"
msgstr "直接连接(不代理)"
msgid "Local TCP mode"
msgstr "本地TCP解析模式"
msgid "Normal"
msgstr "正常代理"
msgid "Remote TCP mode"
msgstr "全局代理TCP解析模式"
msgid "Tunnel mode based on GFWLIST"
msgstr "基于GFWList隧道模式"
msgid "Tunnel mode for all"
msgstr "全局隧道模式"
msgid "Local safe DNS"
msgstr "本地请求安全DNS"
msgid "System default"
msgstr "系统默认模式(会污染)"
msgid "Suggest using GFW-List based auto-proxy"
msgstr "推荐使用GFWList自动模式,可分流国内外"
msgid "leave it empty is well"
msgstr "一般不填"
msgid "Enable ssr-local"
msgstr "同时开启ssr-local"
msgid "Open ssr-local port as well"
msgstr "开启ssr-local有利于排查问题"
msgid "LAN Hosts"
msgstr "局域网主机"
msgid "Oversea Mode"
msgstr "海外模式"

View File

@ -1,12 +0,0 @@
INSTALL = install
PREFIX = /usr/bin
po2lmo: src/po2lmo.o src/template_lmo.o
$(CC) $(LDFLAGS) -o src/po2lmo src/po2lmo.o src/template_lmo.o
install:
$(INSTALL) -m 755 src/po2lmo $(PREFIX)
clean:
$(RM) src/po2lmo src/*.o

View File

@ -1,247 +0,0 @@
/*
* lmo - Lua Machine Objects - PO to LMO conversion tool
*
* Copyright (C) 2009-2012 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "template_lmo.h"
static void die(const char *msg)
{
fprintf(stderr, "Error: %s\n", msg);
exit(1);
}
static void usage(const char *name)
{
fprintf(stderr, "Usage: %s input.po output.lmo\n", name);
exit(1);
}
static void print(const void *ptr, size_t size, size_t nmemb, FILE *stream)
{
if( fwrite(ptr, size, nmemb, stream) == 0 )
die("Failed to write stdout");
}
static int extract_string(const char *src, char *dest, int len)
{
int pos = 0;
int esc = 0;
int off = -1;
for( pos = 0; (pos < strlen(src)) && (pos < len); pos++ )
{
if( (off == -1) && (src[pos] == '"') )
{
off = pos + 1;
}
else if( off >= 0 )
{
if( esc == 1 )
{
switch (src[pos])
{
case '"':
case '\\':
off++;
break;
}
dest[pos-off] = src[pos];
esc = 0;
}
else if( src[pos] == '\\' )
{
dest[pos-off] = src[pos];
esc = 1;
}
else if( src[pos] != '"' )
{
dest[pos-off] = src[pos];
}
else
{
dest[pos-off] = '\0';
break;
}
}
}
return (off > -1) ? strlen(dest) : -1;
}
static int cmp_index(const void *a, const void *b)
{
uint32_t x = ((const lmo_entry_t *)a)->key_id;
uint32_t y = ((const lmo_entry_t *)b)->key_id;
if (x < y)
return -1;
else if (x > y)
return 1;
return 0;
}
static void print_uint32(uint32_t x, FILE *out)
{
uint32_t y = htonl(x);
print(&y, sizeof(uint32_t), 1, out);
}
static void print_index(void *array, int n, FILE *out)
{
lmo_entry_t *e;
qsort(array, n, sizeof(*e), cmp_index);
for (e = array; n > 0; n--, e++)
{
print_uint32(e->key_id, out);
print_uint32(e->val_id, out);
print_uint32(e->offset, out);
print_uint32(e->length, out);
}
}
int main(int argc, char *argv[])
{
char line[4096];
char key[4096];
char val[4096];
char tmp[4096];
int state = 0;
int offset = 0;
int length = 0;
int n_entries = 0;
void *array = NULL;
lmo_entry_t *entry = NULL;
uint32_t key_id, val_id;
FILE *in;
FILE *out;
if( (argc != 3) || ((in = fopen(argv[1], "r")) == NULL) || ((out = fopen(argv[2], "w")) == NULL) )
usage(argv[0]);
memset(line, 0, sizeof(key));
memset(key, 0, sizeof(val));
memset(val, 0, sizeof(val));
while( (NULL != fgets(line, sizeof(line), in)) || (state >= 2 && feof(in)) )
{
if( state == 0 && strstr(line, "msgid \"") == line )
{
switch(extract_string(line, key, sizeof(key)))
{
case -1:
die("Syntax error in msgid");
case 0:
state = 1;
break;
default:
state = 2;
}
}
else if( state == 1 || state == 2 )
{
if( strstr(line, "msgstr \"") == line || state == 2 )
{
switch(extract_string(line, val, sizeof(val)))
{
case -1:
state = 4;
break;
default:
state = 3;
}
}
else
{
switch(extract_string(line, tmp, sizeof(tmp)))
{
case -1:
state = 2;
break;
default:
strcat(key, tmp);
}
}
}
else if( state == 3 )
{
switch(extract_string(line, tmp, sizeof(tmp)))
{
case -1:
state = 4;
break;
default:
strcat(val, tmp);
}
}
if( state == 4 )
{
if( strlen(key) > 0 && strlen(val) > 0 )
{
key_id = sfh_hash(key, strlen(key));
val_id = sfh_hash(val, strlen(val));
if( key_id != val_id )
{
n_entries++;
array = realloc(array, n_entries * sizeof(lmo_entry_t));
entry = (lmo_entry_t *)array + n_entries - 1;
if (!array)
die("Out of memory");
entry->key_id = key_id;
entry->val_id = val_id;
entry->offset = offset;
entry->length = strlen(val);
length = strlen(val) + ((4 - (strlen(val) % 4)) % 4);
print(val, length, 1, out);
offset += length;
}
}
state = 0;
memset(key, 0, sizeof(key));
memset(val, 0, sizeof(val));
}
memset(line, 0, sizeof(line));
}
print_index(array, n_entries, out);
if( offset > 0 )
{
print_uint32(offset, out);
fsync(fileno(out));
fclose(out);
}
else
{
fclose(out);
unlink(argv[2]);
}
fclose(in);
return(0);
}

View File

@ -1,328 +0,0 @@
/*
* lmo - Lua Machine Objects - Base functions
*
* Copyright (C) 2009-2010 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "template_lmo.h"
/*
* Hash function from http://www.azillionmonkeys.com/qed/hash.html
* Copyright (C) 2004-2008 by Paul Hsieh
*/
uint32_t sfh_hash(const char *data, int len)
{
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += sfh_get16(data);
tmp = (sfh_get16(data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof(uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += sfh_get16(data);
hash ^= hash << 16;
hash ^= data[sizeof(uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += sfh_get16(data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
uint32_t lmo_canon_hash(const char *str, int len)
{
char res[4096];
char *ptr, prev;
int off;
if (!str || len >= sizeof(res))
return 0;
for (prev = ' ', ptr = res, off = 0; off < len; prev = *str, off++, str++)
{
if (isspace(*str))
{
if (!isspace(prev))
*ptr++ = ' ';
}
else
{
*ptr++ = *str;
}
}
if ((ptr > res) && isspace(*(ptr-1)))
ptr--;
return sfh_hash(res, ptr - res);
}
lmo_archive_t * lmo_open(const char *file)
{
int in = -1;
uint32_t idx_offset = 0;
struct stat s;
lmo_archive_t *ar = NULL;
if (stat(file, &s) == -1)
goto err;
if ((in = open(file, O_RDONLY)) == -1)
goto err;
if ((ar = (lmo_archive_t *)malloc(sizeof(*ar))) != NULL)
{
memset(ar, 0, sizeof(*ar));
ar->fd = in;
ar->size = s.st_size;
fcntl(ar->fd, F_SETFD, fcntl(ar->fd, F_GETFD) | FD_CLOEXEC);
if ((ar->mmap = mmap(NULL, ar->size, PROT_READ, MAP_SHARED, ar->fd, 0)) == MAP_FAILED)
goto err;
idx_offset = ntohl(*((const uint32_t *)
(ar->mmap + ar->size - sizeof(uint32_t))));
if (idx_offset >= ar->size)
goto err;
ar->index = (lmo_entry_t *)(ar->mmap + idx_offset);
ar->length = (ar->size - idx_offset - sizeof(uint32_t)) / sizeof(lmo_entry_t);
ar->end = ar->mmap + ar->size;
return ar;
}
err:
if (in > -1)
close(in);
if (ar != NULL)
{
if ((ar->mmap != NULL) && (ar->mmap != MAP_FAILED))
munmap(ar->mmap, ar->size);
free(ar);
}
return NULL;
}
void lmo_close(lmo_archive_t *ar)
{
if (ar != NULL)
{
if ((ar->mmap != NULL) && (ar->mmap != MAP_FAILED))
munmap(ar->mmap, ar->size);
close(ar->fd);
free(ar);
ar = NULL;
}
}
lmo_catalog_t *_lmo_catalogs = NULL;
lmo_catalog_t *_lmo_active_catalog = NULL;
int lmo_load_catalog(const char *lang, const char *dir)
{
DIR *dh = NULL;
char pattern[16];
char path[PATH_MAX];
struct dirent *de = NULL;
lmo_archive_t *ar = NULL;
lmo_catalog_t *cat = NULL;
if (!lmo_change_catalog(lang))
return 0;
if (!dir || !(dh = opendir(dir)))
goto err;
if (!(cat = malloc(sizeof(*cat))))
goto err;
memset(cat, 0, sizeof(*cat));
snprintf(cat->lang, sizeof(cat->lang), "%s", lang);
snprintf(pattern, sizeof(pattern), "*.%s.lmo", lang);
while ((de = readdir(dh)) != NULL)
{
if (!fnmatch(pattern, de->d_name, 0))
{
snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
ar = lmo_open(path);
if (ar)
{
ar->next = cat->archives;
cat->archives = ar;
}
}
}
closedir(dh);
cat->next = _lmo_catalogs;
_lmo_catalogs = cat;
if (!_lmo_active_catalog)
_lmo_active_catalog = cat;
return 0;
err:
if (dh) closedir(dh);
if (cat) free(cat);
return -1;
}
int lmo_change_catalog(const char *lang)
{
lmo_catalog_t *cat;
for (cat = _lmo_catalogs; cat; cat = cat->next)
{
if (!strncmp(cat->lang, lang, sizeof(cat->lang)))
{
_lmo_active_catalog = cat;
return 0;
}
}
return -1;
}
static lmo_entry_t * lmo_find_entry(lmo_archive_t *ar, uint32_t hash)
{
unsigned int m, l, r;
uint32_t k;
l = 0;
r = ar->length - 1;
while (1)
{
m = l + ((r - l) / 2);
if (r < l)
break;
k = ntohl(ar->index[m].key_id);
if (k == hash)
return &ar->index[m];
if (k > hash)
{
if (!m)
break;
r = m - 1;
}
else
{
l = m + 1;
}
}
return NULL;
}
int lmo_translate(const char *key, int keylen, char **out, int *outlen)
{
uint32_t hash;
lmo_entry_t *e;
lmo_archive_t *ar;
if (!key || !_lmo_active_catalog)
return -2;
hash = lmo_canon_hash(key, keylen);
for (ar = _lmo_active_catalog->archives; ar; ar = ar->next)
{
if ((e = lmo_find_entry(ar, hash)) != NULL)
{
*out = ar->mmap + ntohl(e->offset);
*outlen = ntohl(e->length);
return 0;
}
}
return -1;
}
void lmo_close_catalog(const char *lang)
{
lmo_archive_t *ar, *next;
lmo_catalog_t *cat, *prev;
for (prev = NULL, cat = _lmo_catalogs; cat; prev = cat, cat = cat->next)
{
if (!strncmp(cat->lang, lang, sizeof(cat->lang)))
{
if (prev)
prev->next = cat->next;
else
_lmo_catalogs = cat->next;
for (ar = cat->archives; ar; ar = next)
{
next = ar->next;
lmo_close(ar);
}
free(cat);
break;
}
}
}

View File

@ -1,92 +0,0 @@
/*
* lmo - Lua Machine Objects - General header
*
* Copyright (C) 2009-2012 Jo-Philipp Wich <xm@subsignal.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _TEMPLATE_LMO_H_
#define _TEMPLATE_LMO_H_
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <errno.h>
#include <fnmatch.h>
#include <dirent.h>
#include <ctype.h>
#include <limits.h>
#if (defined(__GNUC__) && defined(__i386__))
#define sfh_get16(d) (*((const uint16_t *) (d)))
#else
#define sfh_get16(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
struct lmo_entry {
uint32_t key_id;
uint32_t val_id;
uint32_t offset;
uint32_t length;
} __attribute__((packed));
typedef struct lmo_entry lmo_entry_t;
struct lmo_archive {
int fd;
int length;
uint32_t size;
lmo_entry_t *index;
char *mmap;
char *end;
struct lmo_archive *next;
};
typedef struct lmo_archive lmo_archive_t;
struct lmo_catalog {
char lang[6];
struct lmo_archive *archives;
struct lmo_catalog *next;
};
typedef struct lmo_catalog lmo_catalog_t;
uint32_t sfh_hash(const char *data, int len);
uint32_t lmo_canon_hash(const char *data, int len);
lmo_archive_t * lmo_open(const char *file);
void lmo_close(lmo_archive_t *ar);
extern lmo_catalog_t *_lmo_catalogs;
extern lmo_catalog_t *_lmo_active_catalog;
int lmo_load_catalog(const char *lang, const char *dir);
int lmo_change_catalog(const char *lang);
int lmo_translate(const char *key, int keylen, char **out, int *outlen);
void lmo_close_catalog(const char *lang);
#endif