Commit | Line | Data |
---|---|---|
1d8206b9 TT |
1 | /* |
2 | * linux/net/sunrpc/svc_xprt.c | |
3 | * | |
4 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
5 | */ | |
6 | ||
7 | #include <linux/sched.h> | |
8 | #include <linux/errno.h> | |
9 | #include <linux/fcntl.h> | |
10 | #include <linux/net.h> | |
11 | #include <linux/in.h> | |
12 | #include <linux/inet.h> | |
13 | #include <linux/udp.h> | |
14 | #include <linux/tcp.h> | |
15 | #include <linux/unistd.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/netdevice.h> | |
18 | #include <linux/skbuff.h> | |
19 | #include <linux/file.h> | |
20 | #include <linux/freezer.h> | |
21 | #include <net/sock.h> | |
22 | #include <net/checksum.h> | |
23 | #include <net/ip.h> | |
24 | #include <net/ipv6.h> | |
25 | #include <net/tcp_states.h> | |
26 | #include <linux/uaccess.h> | |
27 | #include <asm/ioctls.h> | |
28 | ||
29 | #include <linux/sunrpc/types.h> | |
30 | #include <linux/sunrpc/clnt.h> | |
31 | #include <linux/sunrpc/xdr.h> | |
1d8206b9 TT |
32 | #include <linux/sunrpc/stats.h> |
33 | #include <linux/sunrpc/svc_xprt.h> | |
34 | ||
35 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
36 | ||
0f0257ea TT |
37 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); |
38 | static int svc_deferred_recv(struct svc_rqst *rqstp); | |
39 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | |
40 | static void svc_age_temp_xprts(unsigned long closure); | |
41 | ||
42 | /* apparently the "standard" is that clients close | |
43 | * idle connections after 5 minutes, servers after | |
44 | * 6 minutes | |
45 | * http://www.connectathon.org/talks96/nfstcp.pdf | |
46 | */ | |
47 | static int svc_conn_age_period = 6*60; | |
48 | ||
1d8206b9 TT |
49 | /* List of registered transport classes */ |
50 | static DEFINE_SPINLOCK(svc_xprt_class_lock); | |
51 | static LIST_HEAD(svc_xprt_class_list); | |
52 | ||
0f0257ea TT |
53 | /* SMP locking strategy: |
54 | * | |
55 | * svc_pool->sp_lock protects most of the fields of that pool. | |
56 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | |
57 | * when both need to be taken (rare), svc_serv->sv_lock is first. | |
58 | * BKL protects svc_serv->sv_nrthread. | |
59 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | |
60 | * and the ->sk_info_authunix cache. | |
61 | * | |
62 | * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being | |
63 | * enqueued multiply. During normal transport processing this bit | |
64 | * is set by svc_xprt_enqueue and cleared by svc_xprt_received. | |
65 | * Providers should not manipulate this bit directly. | |
66 | * | |
67 | * Some flags can be set to certain values at any time | |
68 | * providing that certain rules are followed: | |
69 | * | |
70 | * XPT_CONN, XPT_DATA: | |
71 | * - Can be set or cleared at any time. | |
72 | * - After a set, svc_xprt_enqueue must be called to enqueue | |
73 | * the transport for processing. | |
74 | * - After a clear, the transport must be read/accepted. | |
75 | * If this succeeds, it must be set again. | |
76 | * XPT_CLOSE: | |
77 | * - Can set at any time. It is never cleared. | |
78 | * XPT_DEAD: | |
79 | * - Can only be set while XPT_BUSY is held which ensures | |
80 | * that no other thread will be using the transport or will | |
81 | * try to set XPT_DEAD. | |
82 | */ | |
83 | ||
1d8206b9 TT |
84 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) |
85 | { | |
86 | struct svc_xprt_class *cl; | |
87 | int res = -EEXIST; | |
88 | ||
89 | dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); | |
90 | ||
91 | INIT_LIST_HEAD(&xcl->xcl_list); | |
92 | spin_lock(&svc_xprt_class_lock); | |
93 | /* Make sure there isn't already a class with the same name */ | |
94 | list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { | |
95 | if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) | |
96 | goto out; | |
97 | } | |
98 | list_add_tail(&xcl->xcl_list, &svc_xprt_class_list); | |
99 | res = 0; | |
100 | out: | |
101 | spin_unlock(&svc_xprt_class_lock); | |
102 | return res; | |
103 | } | |
104 | EXPORT_SYMBOL_GPL(svc_reg_xprt_class); | |
105 | ||
106 | void svc_unreg_xprt_class(struct svc_xprt_class *xcl) | |
107 | { | |
108 | dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); | |
109 | spin_lock(&svc_xprt_class_lock); | |
110 | list_del_init(&xcl->xcl_list); | |
111 | spin_unlock(&svc_xprt_class_lock); | |
112 | } | |
113 | EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); | |
114 | ||
e1b3157f TT |
115 | static void svc_xprt_free(struct kref *kref) |
116 | { | |
117 | struct svc_xprt *xprt = | |
118 | container_of(kref, struct svc_xprt, xpt_ref); | |
119 | struct module *owner = xprt->xpt_class->xcl_owner; | |
def13d74 TT |
120 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) |
121 | && xprt->xpt_auth_cache != NULL) | |
122 | svcauth_unix_info_release(xprt->xpt_auth_cache); | |
e1b3157f TT |
123 | xprt->xpt_ops->xpo_free(xprt); |
124 | module_put(owner); | |
125 | } | |
126 | ||
127 | void svc_xprt_put(struct svc_xprt *xprt) | |
128 | { | |
129 | kref_put(&xprt->xpt_ref, svc_xprt_free); | |
130 | } | |
131 | EXPORT_SYMBOL_GPL(svc_xprt_put); | |
132 | ||
1d8206b9 TT |
133 | /* |
134 | * Called by transport drivers to initialize the transport independent | |
135 | * portion of the transport instance. | |
136 | */ | |
bb5cf160 TT |
137 | void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, |
138 | struct svc_serv *serv) | |
1d8206b9 TT |
139 | { |
140 | memset(xprt, 0, sizeof(*xprt)); | |
141 | xprt->xpt_class = xcl; | |
142 | xprt->xpt_ops = xcl->xcl_ops; | |
e1b3157f | 143 | kref_init(&xprt->xpt_ref); |
bb5cf160 | 144 | xprt->xpt_server = serv; |
7a182083 TT |
145 | INIT_LIST_HEAD(&xprt->xpt_list); |
146 | INIT_LIST_HEAD(&xprt->xpt_ready); | |
8c7b0172 | 147 | INIT_LIST_HEAD(&xprt->xpt_deferred); |
a50fea26 | 148 | mutex_init(&xprt->xpt_mutex); |
def13d74 | 149 | spin_lock_init(&xprt->xpt_lock); |
4e5caaa5 | 150 | set_bit(XPT_BUSY, &xprt->xpt_flags); |
1d8206b9 TT |
151 | } |
152 | EXPORT_SYMBOL_GPL(svc_xprt_init); | |
b700cbb1 TT |
153 | |
154 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | |
155 | int flags) | |
156 | { | |
157 | struct svc_xprt_class *xcl; | |
b700cbb1 TT |
158 | struct sockaddr_in sin = { |
159 | .sin_family = AF_INET, | |
160 | .sin_addr.s_addr = INADDR_ANY, | |
161 | .sin_port = htons(port), | |
162 | }; | |
163 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | |
164 | spin_lock(&svc_xprt_class_lock); | |
165 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | |
4e5caaa5 TT |
166 | struct svc_xprt *newxprt; |
167 | ||
168 | if (strcmp(xprt_name, xcl->xcl_name)) | |
169 | continue; | |
170 | ||
171 | if (!try_module_get(xcl->xcl_owner)) | |
172 | goto err; | |
173 | ||
174 | spin_unlock(&svc_xprt_class_lock); | |
175 | newxprt = xcl->xcl_ops-> | |
176 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | |
177 | flags); | |
178 | if (IS_ERR(newxprt)) { | |
179 | module_put(xcl->xcl_owner); | |
180 | return PTR_ERR(newxprt); | |
b700cbb1 | 181 | } |
4e5caaa5 TT |
182 | |
183 | clear_bit(XPT_TEMP, &newxprt->xpt_flags); | |
184 | spin_lock_bh(&serv->sv_lock); | |
185 | list_add(&newxprt->xpt_list, &serv->sv_permsocks); | |
186 | spin_unlock_bh(&serv->sv_lock); | |
187 | clear_bit(XPT_BUSY, &newxprt->xpt_flags); | |
188 | return svc_xprt_local_port(newxprt); | |
b700cbb1 | 189 | } |
4e5caaa5 | 190 | err: |
b700cbb1 TT |
191 | spin_unlock(&svc_xprt_class_lock); |
192 | dprintk("svc: transport %s not found\n", xprt_name); | |
4e5caaa5 | 193 | return -ENOENT; |
b700cbb1 TT |
194 | } |
195 | EXPORT_SYMBOL_GPL(svc_create_xprt); | |
9dbc240f TT |
196 | |
197 | /* | |
198 | * Copy the local and remote xprt addresses to the rqstp structure | |
199 | */ | |
200 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) | |
201 | { | |
202 | struct sockaddr *sin; | |
203 | ||
204 | memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); | |
205 | rqstp->rq_addrlen = xprt->xpt_remotelen; | |
206 | ||
207 | /* | |
208 | * Destination address in request is needed for binding the | |
209 | * source address in RPC replies/callbacks later. | |
210 | */ | |
211 | sin = (struct sockaddr *)&xprt->xpt_local; | |
212 | switch (sin->sa_family) { | |
213 | case AF_INET: | |
214 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | |
215 | break; | |
216 | case AF_INET6: | |
217 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | |
218 | break; | |
219 | } | |
220 | } | |
221 | EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); | |
222 | ||
0f0257ea TT |
223 | /** |
224 | * svc_print_addr - Format rq_addr field for printing | |
225 | * @rqstp: svc_rqst struct containing address to print | |
226 | * @buf: target buffer for formatted address | |
227 | * @len: length of target buffer | |
228 | * | |
229 | */ | |
230 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | |
231 | { | |
232 | return __svc_print_addr(svc_addr(rqstp), buf, len); | |
233 | } | |
234 | EXPORT_SYMBOL_GPL(svc_print_addr); | |
235 | ||
236 | /* | |
237 | * Queue up an idle server thread. Must have pool->sp_lock held. | |
238 | * Note: this is really a stack rather than a queue, so that we only | |
239 | * use as many different threads as we need, and the rest don't pollute | |
240 | * the cache. | |
241 | */ | |
242 | static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | |
243 | { | |
244 | list_add(&rqstp->rq_list, &pool->sp_threads); | |
245 | } | |
246 | ||
247 | /* | |
248 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | |
249 | */ | |
250 | static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | |
251 | { | |
252 | list_del(&rqstp->rq_list); | |
253 | } | |
254 | ||
255 | /* | |
256 | * Queue up a transport with data pending. If there are idle nfsd | |
257 | * processes, wake 'em up. | |
258 | * | |
259 | */ | |
260 | void svc_xprt_enqueue(struct svc_xprt *xprt) | |
261 | { | |
262 | struct svc_serv *serv = xprt->xpt_server; | |
263 | struct svc_pool *pool; | |
264 | struct svc_rqst *rqstp; | |
265 | int cpu; | |
266 | ||
267 | if (!(xprt->xpt_flags & | |
268 | ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) | |
269 | return; | |
270 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | |
271 | return; | |
272 | ||
273 | cpu = get_cpu(); | |
274 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
275 | put_cpu(); | |
276 | ||
277 | spin_lock_bh(&pool->sp_lock); | |
278 | ||
279 | if (!list_empty(&pool->sp_threads) && | |
280 | !list_empty(&pool->sp_sockets)) | |
281 | printk(KERN_ERR | |
282 | "svc_xprt_enqueue: " | |
283 | "threads and transports both waiting??\n"); | |
284 | ||
285 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { | |
286 | /* Don't enqueue dead transports */ | |
287 | dprintk("svc: transport %p is dead, not enqueued\n", xprt); | |
288 | goto out_unlock; | |
289 | } | |
290 | ||
291 | /* Mark transport as busy. It will remain in this state until | |
292 | * the provider calls svc_xprt_received. We update XPT_BUSY | |
293 | * atomically because it also guards against trying to enqueue | |
294 | * the transport twice. | |
295 | */ | |
296 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { | |
297 | /* Don't enqueue transport while already enqueued */ | |
298 | dprintk("svc: transport %p busy, not enqueued\n", xprt); | |
299 | goto out_unlock; | |
300 | } | |
301 | BUG_ON(xprt->xpt_pool != NULL); | |
302 | xprt->xpt_pool = pool; | |
303 | ||
304 | /* Handle pending connection */ | |
305 | if (test_bit(XPT_CONN, &xprt->xpt_flags)) | |
306 | goto process; | |
307 | ||
308 | /* Handle close in-progress */ | |
309 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
310 | goto process; | |
311 | ||
312 | /* Check if we have space to reply to a request */ | |
313 | if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { | |
314 | /* Don't enqueue while not enough space for reply */ | |
315 | dprintk("svc: no write space, transport %p not enqueued\n", | |
316 | xprt); | |
317 | xprt->xpt_pool = NULL; | |
318 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | |
319 | goto out_unlock; | |
320 | } | |
321 | ||
322 | process: | |
323 | if (!list_empty(&pool->sp_threads)) { | |
324 | rqstp = list_entry(pool->sp_threads.next, | |
325 | struct svc_rqst, | |
326 | rq_list); | |
327 | dprintk("svc: transport %p served by daemon %p\n", | |
328 | xprt, rqstp); | |
329 | svc_thread_dequeue(pool, rqstp); | |
330 | if (rqstp->rq_xprt) | |
331 | printk(KERN_ERR | |
332 | "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", | |
333 | rqstp, rqstp->rq_xprt); | |
334 | rqstp->rq_xprt = xprt; | |
335 | svc_xprt_get(xprt); | |
336 | rqstp->rq_reserved = serv->sv_max_mesg; | |
337 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | |
338 | BUG_ON(xprt->xpt_pool != pool); | |
339 | wake_up(&rqstp->rq_wait); | |
340 | } else { | |
341 | dprintk("svc: transport %p put into queue\n", xprt); | |
342 | list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); | |
343 | BUG_ON(xprt->xpt_pool != pool); | |
344 | } | |
345 | ||
346 | out_unlock: | |
347 | spin_unlock_bh(&pool->sp_lock); | |
348 | } | |
349 | EXPORT_SYMBOL_GPL(svc_xprt_enqueue); | |
350 | ||
351 | /* | |
352 | * Dequeue the first transport. Must be called with the pool->sp_lock held. | |
353 | */ | |
354 | static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) | |
355 | { | |
356 | struct svc_xprt *xprt; | |
357 | ||
358 | if (list_empty(&pool->sp_sockets)) | |
359 | return NULL; | |
360 | ||
361 | xprt = list_entry(pool->sp_sockets.next, | |
362 | struct svc_xprt, xpt_ready); | |
363 | list_del_init(&xprt->xpt_ready); | |
364 | ||
365 | dprintk("svc: transport %p dequeued, inuse=%d\n", | |
366 | xprt, atomic_read(&xprt->xpt_ref.refcount)); | |
367 | ||
368 | return xprt; | |
369 | } | |
370 | ||
371 | /* | |
372 | * svc_xprt_received conditionally queues the transport for processing | |
373 | * by another thread. The caller must hold the XPT_BUSY bit and must | |
374 | * not thereafter touch transport data. | |
375 | * | |
376 | * Note: XPT_DATA only gets cleared when a read-attempt finds no (or | |
377 | * insufficient) data. | |
378 | */ | |
379 | void svc_xprt_received(struct svc_xprt *xprt) | |
380 | { | |
381 | BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); | |
382 | xprt->xpt_pool = NULL; | |
383 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | |
384 | svc_xprt_enqueue(xprt); | |
385 | } | |
386 | EXPORT_SYMBOL_GPL(svc_xprt_received); | |
387 | ||
388 | /** | |
389 | * svc_reserve - change the space reserved for the reply to a request. | |
390 | * @rqstp: The request in question | |
391 | * @space: new max space to reserve | |
392 | * | |
393 | * Each request reserves some space on the output queue of the transport | |
394 | * to make sure the reply fits. This function reduces that reserved | |
395 | * space to be the amount of space used already, plus @space. | |
396 | * | |
397 | */ | |
398 | void svc_reserve(struct svc_rqst *rqstp, int space) | |
399 | { | |
400 | space += rqstp->rq_res.head[0].iov_len; | |
401 | ||
402 | if (space < rqstp->rq_reserved) { | |
403 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
404 | atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); | |
405 | rqstp->rq_reserved = space; | |
406 | ||
407 | svc_xprt_enqueue(xprt); | |
408 | } | |
409 | } | |
410 | ||
411 | static void svc_xprt_release(struct svc_rqst *rqstp) | |
412 | { | |
413 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
414 | ||
415 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | |
416 | ||
417 | svc_free_res_pages(rqstp); | |
418 | rqstp->rq_res.page_len = 0; | |
419 | rqstp->rq_res.page_base = 0; | |
420 | ||
421 | /* Reset response buffer and release | |
422 | * the reservation. | |
423 | * But first, check that enough space was reserved | |
424 | * for the reply, otherwise we have a bug! | |
425 | */ | |
426 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | |
427 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | |
428 | rqstp->rq_reserved, | |
429 | rqstp->rq_res.len); | |
430 | ||
431 | rqstp->rq_res.head[0].iov_len = 0; | |
432 | svc_reserve(rqstp, 0); | |
433 | rqstp->rq_xprt = NULL; | |
434 | ||
435 | svc_xprt_put(xprt); | |
436 | } | |
437 | ||
438 | /* | |
439 | * External function to wake up a server waiting for data | |
440 | * This really only makes sense for services like lockd | |
441 | * which have exactly one thread anyway. | |
442 | */ | |
443 | void svc_wake_up(struct svc_serv *serv) | |
444 | { | |
445 | struct svc_rqst *rqstp; | |
446 | unsigned int i; | |
447 | struct svc_pool *pool; | |
448 | ||
449 | for (i = 0; i < serv->sv_nrpools; i++) { | |
450 | pool = &serv->sv_pools[i]; | |
451 | ||
452 | spin_lock_bh(&pool->sp_lock); | |
453 | if (!list_empty(&pool->sp_threads)) { | |
454 | rqstp = list_entry(pool->sp_threads.next, | |
455 | struct svc_rqst, | |
456 | rq_list); | |
457 | dprintk("svc: daemon %p woken up.\n", rqstp); | |
458 | /* | |
459 | svc_thread_dequeue(pool, rqstp); | |
460 | rqstp->rq_xprt = NULL; | |
461 | */ | |
462 | wake_up(&rqstp->rq_wait); | |
463 | } | |
464 | spin_unlock_bh(&pool->sp_lock); | |
465 | } | |
466 | } | |
467 | ||
468 | int svc_port_is_privileged(struct sockaddr *sin) | |
469 | { | |
470 | switch (sin->sa_family) { | |
471 | case AF_INET: | |
472 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | |
473 | < PROT_SOCK; | |
474 | case AF_INET6: | |
475 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | |
476 | < PROT_SOCK; | |
477 | default: | |
478 | return 0; | |
479 | } | |
480 | } | |
481 | ||
482 | /* | |
483 | * Make sure that we don't have too many active connections. If we | |
484 | * have, something must be dropped. | |
485 | * | |
486 | * There's no point in trying to do random drop here for DoS | |
487 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | |
488 | * attacker can easily beat that. | |
489 | * | |
490 | * The only somewhat efficient mechanism would be if drop old | |
491 | * connections from the same IP first. But right now we don't even | |
492 | * record the client IP in svc_sock. | |
493 | */ | |
494 | static void svc_check_conn_limits(struct svc_serv *serv) | |
495 | { | |
496 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | |
497 | struct svc_xprt *xprt = NULL; | |
498 | spin_lock_bh(&serv->sv_lock); | |
499 | if (!list_empty(&serv->sv_tempsocks)) { | |
500 | if (net_ratelimit()) { | |
501 | /* Try to help the admin */ | |
502 | printk(KERN_NOTICE "%s: too many open " | |
503 | "connections, consider increasing the " | |
504 | "number of nfsd threads\n", | |
505 | serv->sv_name); | |
506 | } | |
507 | /* | |
508 | * Always select the oldest connection. It's not fair, | |
509 | * but so is life | |
510 | */ | |
511 | xprt = list_entry(serv->sv_tempsocks.prev, | |
512 | struct svc_xprt, | |
513 | xpt_list); | |
514 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
515 | svc_xprt_get(xprt); | |
516 | } | |
517 | spin_unlock_bh(&serv->sv_lock); | |
518 | ||
519 | if (xprt) { | |
520 | svc_xprt_enqueue(xprt); | |
521 | svc_xprt_put(xprt); | |
522 | } | |
523 | } | |
524 | } | |
525 | ||
526 | /* | |
527 | * Receive the next request on any transport. This code is carefully | |
528 | * organised not to touch any cachelines in the shared svc_serv | |
529 | * structure, only cachelines in the local svc_pool. | |
530 | */ | |
531 | int svc_recv(struct svc_rqst *rqstp, long timeout) | |
532 | { | |
533 | struct svc_xprt *xprt = NULL; | |
534 | struct svc_serv *serv = rqstp->rq_server; | |
535 | struct svc_pool *pool = rqstp->rq_pool; | |
536 | int len, i; | |
537 | int pages; | |
538 | struct xdr_buf *arg; | |
539 | DECLARE_WAITQUEUE(wait, current); | |
540 | ||
541 | dprintk("svc: server %p waiting for data (to = %ld)\n", | |
542 | rqstp, timeout); | |
543 | ||
544 | if (rqstp->rq_xprt) | |
545 | printk(KERN_ERR | |
546 | "svc_recv: service %p, transport not NULL!\n", | |
547 | rqstp); | |
548 | if (waitqueue_active(&rqstp->rq_wait)) | |
549 | printk(KERN_ERR | |
550 | "svc_recv: service %p, wait queue active!\n", | |
551 | rqstp); | |
552 | ||
553 | /* now allocate needed pages. If we get a failure, sleep briefly */ | |
554 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | |
555 | for (i = 0; i < pages ; i++) | |
556 | while (rqstp->rq_pages[i] == NULL) { | |
557 | struct page *p = alloc_page(GFP_KERNEL); | |
558 | if (!p) { | |
559 | int j = msecs_to_jiffies(500); | |
560 | schedule_timeout_uninterruptible(j); | |
561 | } | |
562 | rqstp->rq_pages[i] = p; | |
563 | } | |
564 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | |
565 | BUG_ON(pages >= RPCSVC_MAXPAGES); | |
566 | ||
567 | /* Make arg->head point to first page and arg->pages point to rest */ | |
568 | arg = &rqstp->rq_arg; | |
569 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | |
570 | arg->head[0].iov_len = PAGE_SIZE; | |
571 | arg->pages = rqstp->rq_pages + 1; | |
572 | arg->page_base = 0; | |
573 | /* save at least one page for response */ | |
574 | arg->page_len = (pages-2)*PAGE_SIZE; | |
575 | arg->len = (pages-1)*PAGE_SIZE; | |
576 | arg->tail[0].iov_len = 0; | |
577 | ||
578 | try_to_freeze(); | |
579 | cond_resched(); | |
580 | if (signalled()) | |
581 | return -EINTR; | |
582 | ||
583 | spin_lock_bh(&pool->sp_lock); | |
584 | xprt = svc_xprt_dequeue(pool); | |
585 | if (xprt) { | |
586 | rqstp->rq_xprt = xprt; | |
587 | svc_xprt_get(xprt); | |
588 | rqstp->rq_reserved = serv->sv_max_mesg; | |
589 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | |
590 | } else { | |
591 | /* No data pending. Go to sleep */ | |
592 | svc_thread_enqueue(pool, rqstp); | |
593 | ||
594 | /* | |
595 | * We have to be able to interrupt this wait | |
596 | * to bring down the daemons ... | |
597 | */ | |
598 | set_current_state(TASK_INTERRUPTIBLE); | |
599 | add_wait_queue(&rqstp->rq_wait, &wait); | |
600 | spin_unlock_bh(&pool->sp_lock); | |
601 | ||
602 | schedule_timeout(timeout); | |
603 | ||
604 | try_to_freeze(); | |
605 | ||
606 | spin_lock_bh(&pool->sp_lock); | |
607 | remove_wait_queue(&rqstp->rq_wait, &wait); | |
608 | ||
609 | xprt = rqstp->rq_xprt; | |
610 | if (!xprt) { | |
611 | svc_thread_dequeue(pool, rqstp); | |
612 | spin_unlock_bh(&pool->sp_lock); | |
613 | dprintk("svc: server %p, no data yet\n", rqstp); | |
614 | return signalled()? -EINTR : -EAGAIN; | |
615 | } | |
616 | } | |
617 | spin_unlock_bh(&pool->sp_lock); | |
618 | ||
619 | len = 0; | |
620 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { | |
621 | dprintk("svc_recv: found XPT_CLOSE\n"); | |
622 | svc_delete_xprt(xprt); | |
623 | } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { | |
624 | struct svc_xprt *newxpt; | |
625 | newxpt = xprt->xpt_ops->xpo_accept(xprt); | |
626 | if (newxpt) { | |
627 | /* | |
628 | * We know this module_get will succeed because the | |
629 | * listener holds a reference too | |
630 | */ | |
631 | __module_get(newxpt->xpt_class->xcl_owner); | |
632 | svc_check_conn_limits(xprt->xpt_server); | |
633 | spin_lock_bh(&serv->sv_lock); | |
634 | set_bit(XPT_TEMP, &newxpt->xpt_flags); | |
635 | list_add(&newxpt->xpt_list, &serv->sv_tempsocks); | |
636 | serv->sv_tmpcnt++; | |
637 | if (serv->sv_temptimer.function == NULL) { | |
638 | /* setup timer to age temp transports */ | |
639 | setup_timer(&serv->sv_temptimer, | |
640 | svc_age_temp_xprts, | |
641 | (unsigned long)serv); | |
642 | mod_timer(&serv->sv_temptimer, | |
643 | jiffies + svc_conn_age_period * HZ); | |
644 | } | |
645 | spin_unlock_bh(&serv->sv_lock); | |
646 | svc_xprt_received(newxpt); | |
647 | } | |
648 | svc_xprt_received(xprt); | |
649 | } else { | |
650 | dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", | |
651 | rqstp, pool->sp_id, xprt, | |
652 | atomic_read(&xprt->xpt_ref.refcount)); | |
653 | rqstp->rq_deferred = svc_deferred_dequeue(xprt); | |
654 | if (rqstp->rq_deferred) { | |
655 | svc_xprt_received(xprt); | |
656 | len = svc_deferred_recv(rqstp); | |
657 | } else | |
658 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); | |
659 | dprintk("svc: got len=%d\n", len); | |
660 | } | |
661 | ||
662 | /* No data, incomplete (TCP) read, or accept() */ | |
663 | if (len == 0 || len == -EAGAIN) { | |
664 | rqstp->rq_res.len = 0; | |
665 | svc_xprt_release(rqstp); | |
666 | return -EAGAIN; | |
667 | } | |
668 | clear_bit(XPT_OLD, &xprt->xpt_flags); | |
669 | ||
670 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | |
671 | rqstp->rq_chandle.defer = svc_defer; | |
672 | ||
673 | if (serv->sv_stats) | |
674 | serv->sv_stats->netcnt++; | |
675 | return len; | |
676 | } | |
677 | ||
678 | /* | |
679 | * Drop request | |
680 | */ | |
681 | void svc_drop(struct svc_rqst *rqstp) | |
682 | { | |
683 | dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); | |
684 | svc_xprt_release(rqstp); | |
685 | } | |
686 | ||
687 | /* | |
688 | * Return reply to client. | |
689 | */ | |
690 | int svc_send(struct svc_rqst *rqstp) | |
691 | { | |
692 | struct svc_xprt *xprt; | |
693 | int len; | |
694 | struct xdr_buf *xb; | |
695 | ||
696 | xprt = rqstp->rq_xprt; | |
697 | if (!xprt) | |
698 | return -EFAULT; | |
699 | ||
700 | /* release the receive skb before sending the reply */ | |
701 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | |
702 | ||
703 | /* calculate over-all length */ | |
704 | xb = &rqstp->rq_res; | |
705 | xb->len = xb->head[0].iov_len + | |
706 | xb->page_len + | |
707 | xb->tail[0].iov_len; | |
708 | ||
709 | /* Grab mutex to serialize outgoing data. */ | |
710 | mutex_lock(&xprt->xpt_mutex); | |
711 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | |
712 | len = -ENOTCONN; | |
713 | else | |
714 | len = xprt->xpt_ops->xpo_sendto(rqstp); | |
715 | mutex_unlock(&xprt->xpt_mutex); | |
716 | svc_xprt_release(rqstp); | |
717 | ||
718 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | |
719 | return 0; | |
720 | return len; | |
721 | } | |
722 | ||
723 | /* | |
724 | * Timer function to close old temporary transports, using | |
725 | * a mark-and-sweep algorithm. | |
726 | */ | |
727 | static void svc_age_temp_xprts(unsigned long closure) | |
728 | { | |
729 | struct svc_serv *serv = (struct svc_serv *)closure; | |
730 | struct svc_xprt *xprt; | |
731 | struct list_head *le, *next; | |
732 | LIST_HEAD(to_be_aged); | |
733 | ||
734 | dprintk("svc_age_temp_xprts\n"); | |
735 | ||
736 | if (!spin_trylock_bh(&serv->sv_lock)) { | |
737 | /* busy, try again 1 sec later */ | |
738 | dprintk("svc_age_temp_xprts: busy\n"); | |
739 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | |
740 | return; | |
741 | } | |
742 | ||
743 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | |
744 | xprt = list_entry(le, struct svc_xprt, xpt_list); | |
745 | ||
746 | /* First time through, just mark it OLD. Second time | |
747 | * through, close it. */ | |
748 | if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) | |
749 | continue; | |
750 | if (atomic_read(&xprt->xpt_ref.refcount) > 1 | |
751 | || test_bit(XPT_BUSY, &xprt->xpt_flags)) | |
752 | continue; | |
753 | svc_xprt_get(xprt); | |
754 | list_move(le, &to_be_aged); | |
755 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
756 | set_bit(XPT_DETACHED, &xprt->xpt_flags); | |
757 | } | |
758 | spin_unlock_bh(&serv->sv_lock); | |
759 | ||
760 | while (!list_empty(&to_be_aged)) { | |
761 | le = to_be_aged.next; | |
762 | /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ | |
763 | list_del_init(le); | |
764 | xprt = list_entry(le, struct svc_xprt, xpt_list); | |
765 | ||
766 | dprintk("queuing xprt %p for closing\n", xprt); | |
767 | ||
768 | /* a thread will dequeue and close it soon */ | |
769 | svc_xprt_enqueue(xprt); | |
770 | svc_xprt_put(xprt); | |
771 | } | |
772 | ||
773 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | |
774 | } | |
775 | ||
776 | /* | |
777 | * Remove a dead transport | |
778 | */ | |
779 | void svc_delete_xprt(struct svc_xprt *xprt) | |
780 | { | |
781 | struct svc_serv *serv = xprt->xpt_server; | |
782 | ||
783 | dprintk("svc: svc_delete_xprt(%p)\n", xprt); | |
784 | xprt->xpt_ops->xpo_detach(xprt); | |
785 | ||
786 | spin_lock_bh(&serv->sv_lock); | |
787 | if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) | |
788 | list_del_init(&xprt->xpt_list); | |
789 | /* | |
790 | * We used to delete the transport from whichever list | |
791 | * it's sk_xprt.xpt_ready node was on, but we don't actually | |
792 | * need to. This is because the only time we're called | |
793 | * while still attached to a queue, the queue itself | |
794 | * is about to be destroyed (in svc_destroy). | |
795 | */ | |
796 | if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { | |
797 | BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); | |
798 | if (test_bit(XPT_TEMP, &xprt->xpt_flags)) | |
799 | serv->sv_tmpcnt--; | |
800 | svc_xprt_put(xprt); | |
801 | } | |
802 | spin_unlock_bh(&serv->sv_lock); | |
803 | } | |
804 | ||
805 | void svc_close_xprt(struct svc_xprt *xprt) | |
806 | { | |
807 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
808 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) | |
809 | /* someone else will have to effect the close */ | |
810 | return; | |
811 | ||
812 | svc_xprt_get(xprt); | |
813 | svc_delete_xprt(xprt); | |
814 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | |
815 | svc_xprt_put(xprt); | |
816 | } | |
817 | ||
818 | void svc_close_all(struct list_head *xprt_list) | |
819 | { | |
820 | struct svc_xprt *xprt; | |
821 | struct svc_xprt *tmp; | |
822 | ||
823 | list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { | |
824 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
825 | if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { | |
826 | /* Waiting to be processed, but no threads left, | |
827 | * So just remove it from the waiting list | |
828 | */ | |
829 | list_del_init(&xprt->xpt_ready); | |
830 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | |
831 | } | |
832 | svc_close_xprt(xprt); | |
833 | } | |
834 | } | |
835 | ||
836 | /* | |
837 | * Handle defer and revisit of requests | |
838 | */ | |
839 | ||
840 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | |
841 | { | |
842 | struct svc_deferred_req *dr = | |
843 | container_of(dreq, struct svc_deferred_req, handle); | |
844 | struct svc_xprt *xprt = dr->xprt; | |
845 | ||
846 | if (too_many) { | |
847 | svc_xprt_put(xprt); | |
848 | kfree(dr); | |
849 | return; | |
850 | } | |
851 | dprintk("revisit queued\n"); | |
852 | dr->xprt = NULL; | |
853 | spin_lock(&xprt->xpt_lock); | |
854 | list_add(&dr->handle.recent, &xprt->xpt_deferred); | |
855 | spin_unlock(&xprt->xpt_lock); | |
856 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | |
857 | svc_xprt_enqueue(xprt); | |
858 | svc_xprt_put(xprt); | |
859 | } | |
860 | ||
260c1d12 TT |
861 | /* |
862 | * Save the request off for later processing. The request buffer looks | |
863 | * like this: | |
864 | * | |
865 | * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> | |
866 | * | |
867 | * This code can only handle requests that consist of an xprt-header | |
868 | * and rpc-header. | |
869 | */ | |
0f0257ea TT |
870 | static struct cache_deferred_req *svc_defer(struct cache_req *req) |
871 | { | |
872 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | |
0f0257ea TT |
873 | struct svc_deferred_req *dr; |
874 | ||
875 | if (rqstp->rq_arg.page_len) | |
876 | return NULL; /* if more than a page, give up FIXME */ | |
877 | if (rqstp->rq_deferred) { | |
878 | dr = rqstp->rq_deferred; | |
879 | rqstp->rq_deferred = NULL; | |
880 | } else { | |
260c1d12 TT |
881 | size_t skip; |
882 | size_t size; | |
0f0257ea | 883 | /* FIXME maybe discard if size too large */ |
260c1d12 | 884 | size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; |
0f0257ea TT |
885 | dr = kmalloc(size, GFP_KERNEL); |
886 | if (dr == NULL) | |
887 | return NULL; | |
888 | ||
889 | dr->handle.owner = rqstp->rq_server; | |
890 | dr->prot = rqstp->rq_prot; | |
891 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | |
892 | dr->addrlen = rqstp->rq_addrlen; | |
893 | dr->daddr = rqstp->rq_daddr; | |
894 | dr->argslen = rqstp->rq_arg.len >> 2; | |
260c1d12 TT |
895 | dr->xprt_hlen = rqstp->rq_xprt_hlen; |
896 | ||
897 | /* back up head to the start of the buffer and copy */ | |
898 | skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | |
899 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, | |
900 | dr->argslen << 2); | |
0f0257ea TT |
901 | } |
902 | svc_xprt_get(rqstp->rq_xprt); | |
903 | dr->xprt = rqstp->rq_xprt; | |
904 | ||
905 | dr->handle.revisit = svc_revisit; | |
906 | return &dr->handle; | |
907 | } | |
908 | ||
909 | /* | |
910 | * recv data from a deferred request into an active one | |
911 | */ | |
912 | static int svc_deferred_recv(struct svc_rqst *rqstp) | |
913 | { | |
914 | struct svc_deferred_req *dr = rqstp->rq_deferred; | |
915 | ||
260c1d12 TT |
916 | /* setup iov_base past transport header */ |
917 | rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); | |
918 | /* The iov_len does not include the transport header bytes */ | |
919 | rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen; | |
0f0257ea | 920 | rqstp->rq_arg.page_len = 0; |
260c1d12 TT |
921 | /* The rq_arg.len includes the transport header bytes */ |
922 | rqstp->rq_arg.len = dr->argslen<<2; | |
0f0257ea TT |
923 | rqstp->rq_prot = dr->prot; |
924 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | |
925 | rqstp->rq_addrlen = dr->addrlen; | |
260c1d12 TT |
926 | /* Save off transport header len in case we get deferred again */ |
927 | rqstp->rq_xprt_hlen = dr->xprt_hlen; | |
0f0257ea TT |
928 | rqstp->rq_daddr = dr->daddr; |
929 | rqstp->rq_respages = rqstp->rq_pages; | |
260c1d12 | 930 | return (dr->argslen<<2) - dr->xprt_hlen; |
0f0257ea TT |
931 | } |
932 | ||
933 | ||
934 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) | |
935 | { | |
936 | struct svc_deferred_req *dr = NULL; | |
937 | ||
938 | if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) | |
939 | return NULL; | |
940 | spin_lock(&xprt->xpt_lock); | |
941 | clear_bit(XPT_DEFERRED, &xprt->xpt_flags); | |
942 | if (!list_empty(&xprt->xpt_deferred)) { | |
943 | dr = list_entry(xprt->xpt_deferred.next, | |
944 | struct svc_deferred_req, | |
945 | handle.recent); | |
946 | list_del_init(&dr->handle.recent); | |
947 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | |
948 | } | |
949 | spin_unlock(&xprt->xpt_lock); | |
950 | return dr; | |
951 | } |