aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/patches-3.3/041-codel-use-Newton-method-instead-of-sqrt-and-divides.patch
blob: f91b42eda22136ac130c911711fdf679c5d1977d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
From 4a8056dfeef49b306ad6af24a5563d7d6867aae0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 12 May 2012 03:32:13 +0000
Subject: [PATCH] codel: use Newton method instead of sqrt() and divides

commit 536edd67109df5e0cdb2c4ee759e9bade7976367 upstream.

As Van pointed out, interval/sqrt(count) can be implemented using
multiplies only.

http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots

This patch implements the Newton method and reciprocal divide.

Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit
kernel).

There is a small 'error' for count values < 5, but we don't really care.

I reuse a hole in struct codel_vars :
 - pack the dropping boolean into one bit
 - use 31bit to store the reciprocal value of sqrt(count).

Suggested-by: Van Jacobson <van@pollere.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Dave Taht <dave.taht@bufferbloat.net>
Cc: Kathleen Nichols <nichols@pollere.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel.h |   68 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 31 deletions(-)

--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -46,6 +46,7 @@
 #include <linux/skbuff.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
+#include <linux/reciprocal_div.h>
 
 /* Controlling Queue Delay (CoDel) algorithm
  * =========================================
@@ -123,6 +124,7 @@ struct codel_params {
  *			entered dropping state
  * @lastcount:		count at entry to dropping state
  * @dropping:		set to true if in dropping state
+ * @rec_inv_sqrt:	reciprocal value of sqrt(count) >> 1
  * @first_above_time:	when we went (or will go) continuously above target
  *			for interval
  * @drop_next:		time to drop next packet, or when we dropped last
@@ -131,7 +133,8 @@ struct codel_params {
 struct codel_vars {
 	u32		count;
 	u32		lastcount;
-	bool		dropping;
+	bool		dropping:1;
+	u32		rec_inv_sqrt:31;
 	codel_time_t	first_above_time;
 	codel_time_t	drop_next;
 	codel_time_t	ldelay;
@@ -158,11 +161,7 @@ static void codel_params_init(struct cod
 
 static void codel_vars_init(struct codel_vars *vars)
 {
-	vars->drop_next = 0;
-	vars->first_above_time = 0;
-	vars->dropping = false; /* exit dropping state */
-	vars->count = 0;
-	vars->lastcount = 0;
+	memset(vars, 0, sizeof(*vars));
 }
 
 static void codel_stats_init(struct codel_stats *stats)
@@ -170,38 +169,37 @@ static void codel_stats_init(struct code
 	stats->maxpacket = 256;
 }
 
-/* return interval/sqrt(x) with good precision
- * relies on int_sqrt(unsigned long x) kernel implementation
+/*
+ * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots
+ * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
+ *
+ * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa)
  */
-static u32 codel_inv_sqrt(u32 _interval, u32 _x)
+static void codel_Newton_step(struct codel_vars *vars)
 {
-	u64 interval = _interval;
-	unsigned long x = _x;
-
-	/* Scale operands for max precision */
+	u32 invsqrt = vars->rec_inv_sqrt;
+	u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31;
+	u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2);
 
-#if BITS_PER_LONG == 64
-	x <<= 32; /* On 64bit arches, we can prescale x by 32bits */
-	interval <<= 16;
-#endif
+	val = (val * invsqrt) >> 32;
 
-	while (x < (1UL << (BITS_PER_LONG - 2))) {
-		x <<= 2;
-		interval <<= 1;
-	}
-	do_div(interval, int_sqrt(x));
-	return (u32)interval;
+	vars->rec_inv_sqrt = val;
 }
 
+/*
+ * CoDel control_law is t + interval/sqrt(count)
+ * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
+ * both sqrt() and divide operation.
+ */
 static codel_time_t codel_control_law(codel_time_t t,
 				      codel_time_t interval,
-				      u32 count)
+				      u32 rec_inv_sqrt)
 {
-	return t + codel_inv_sqrt(interval, count);
+	return t + reciprocal_divide(interval, rec_inv_sqrt << 1);
 }
 
 
-static bool codel_should_drop(struct sk_buff *skb,
+static bool codel_should_drop(const struct sk_buff *skb,
 			      unsigned int *backlog,
 			      struct codel_vars *vars,
 			      struct codel_params *params,
@@ -274,14 +272,16 @@ static struct sk_buff *codel_dequeue(str
 			 */
 			while (vars->dropping &&
 			       codel_time_after_eq(now, vars->drop_next)) {
-				if (++vars->count == 0) /* avoid zero divides */
-					vars->count = ~0U;
+				vars->count++; /* dont care of possible wrap
+						* since there is no more divide
+						*/
+				codel_Newton_step(vars);
 				if (params->ecn && INET_ECN_set_ce(skb)) {
 					stats->ecn_mark++;
 					vars->drop_next =
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->count);
+								  vars->rec_inv_sqrt);
 					goto end;
 				}
 				qdisc_drop(skb, sch);
@@ -296,7 +296,7 @@ static struct sk_buff *codel_dequeue(str
 					vars->drop_next =
 						codel_control_law(vars->drop_next,
 								  params->interval,
-								  vars->count);
+								  vars->rec_inv_sqrt);
 				}
 			}
 		}
@@ -319,12 +319,18 @@ static struct sk_buff *codel_dequeue(str
 		if (codel_time_before(now - vars->drop_next,
 				      16 * params->interval)) {
 			vars->count = (vars->count - vars->lastcount) | 1;
+			/* we dont care if rec_inv_sqrt approximation
+			 * is not very precise :
+			 * Next Newton steps will correct it quadratically.
+			 */
+			codel_Newton_step(vars);
 		} else {
 			vars->count = 1;
+			vars->rec_inv_sqrt = 0x7fffffff;
 		}
 		vars->lastcount = vars->count;
 		vars->drop_next = codel_control_law(now, params->interval,
-						    vars->count);
+						    vars->rec_inv_sqrt);
 	}
 end:
 	return skb;