Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
26 | * Copyright (c) 2011, 2012, Intel Corporation. | |
27 | */ | |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | */ | |
32 | ||
33 | #include "socklnd.h" | |
34 | ||
d7e09d03 | 35 | int |
ff13fd40 | 36 | ksocknal_lib_get_conn_addrs(struct ksock_conn *conn) |
d7e09d03 | 37 | { |
1ad6a73e JS |
38 | int rc = lnet_sock_getaddr(conn->ksnc_sock, 1, &conn->ksnc_ipaddr, |
39 | &conn->ksnc_port); | |
d7e09d03 PT |
40 | |
41 | /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ | |
978b9b35 | 42 | LASSERT(!conn->ksnc_closing); |
d7e09d03 | 43 | |
5fd88337 | 44 | if (rc) { |
978b9b35 | 45 | CERROR("Error %d getting sock peer IP\n", rc); |
d7e09d03 PT |
46 | return rc; |
47 | } | |
48 | ||
1ad6a73e | 49 | rc = lnet_sock_getaddr(conn->ksnc_sock, 0, &conn->ksnc_myipaddr, NULL); |
5fd88337 | 50 | if (rc) { |
978b9b35 | 51 | CERROR("Error %d getting sock local IP\n", rc); |
d7e09d03 PT |
52 | return rc; |
53 | } | |
54 | ||
55 | return 0; | |
56 | } | |
57 | ||
58 | int | |
ff13fd40 | 59 | ksocknal_lib_zc_capable(struct ksock_conn *conn) |
d7e09d03 | 60 | { |
97d10d0a | 61 | int caps = conn->ksnc_sock->sk->sk_route_caps; |
d7e09d03 PT |
62 | |
63 | if (conn->ksnc_proto == &ksocknal_protocol_v1x) | |
64 | return 0; | |
65 | ||
4420cfd3 JS |
66 | /* |
67 | * ZC if the socket supports scatter/gather and doesn't need software | |
68 | * checksums | |
69 | */ | |
5fd88337 | 70 | return ((caps & NETIF_F_SG) && (caps & NETIF_F_CSUM_MASK)); |
d7e09d03 PT |
71 | } |
72 | ||
73 | int | |
ff13fd40 | 74 | ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx) |
d7e09d03 | 75 | { |
805560e8 | 76 | struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; |
d7e09d03 | 77 | struct socket *sock = conn->ksnc_sock; |
805560e8 | 78 | int nob, i; |
d7e09d03 PT |
79 | |
80 | if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ | |
81 | conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ | |
82 | tx->tx_nob == tx->tx_resid && /* frist sending */ | |
5fd88337 | 83 | !tx->tx_msg.ksm_csum) /* not checksummed */ |
d7e09d03 PT |
84 | ksocknal_lib_csum_tx(tx); |
85 | ||
805560e8 AV |
86 | for (nob = i = 0; i < tx->tx_niov; i++) |
87 | nob += tx->tx_iov[i].iov_len; | |
88 | ||
89 | if (!list_empty(&conn->ksnc_tx_queue) || | |
90 | nob < tx->tx_resid) | |
91 | msg.msg_flags |= MSG_MORE; | |
92 | ||
93 | iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, | |
94 | tx->tx_iov, tx->tx_niov, nob); | |
95 | return sock_sendmsg(sock, &msg); | |
d7e09d03 PT |
96 | } |
97 | ||
98 | int | |
ff13fd40 | 99 | ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx) |
d7e09d03 PT |
100 | { |
101 | struct socket *sock = conn->ksnc_sock; | |
97d10d0a MS |
102 | lnet_kiov_t *kiov = tx->tx_kiov; |
103 | int rc; | |
104 | int nob; | |
d7e09d03 PT |
105 | |
106 | /* Not NOOP message */ | |
06ace26e | 107 | LASSERT(tx->tx_lnetmsg); |
d7e09d03 | 108 | |
5fd88337 | 109 | if (tx->tx_msg.ksm_zc_cookies[0]) { |
d7e09d03 | 110 | /* Zero copy is enabled */ |
97d10d0a | 111 | struct sock *sk = sock->sk; |
65ffc679 AV |
112 | struct page *page = kiov->bv_page; |
113 | int offset = kiov->bv_offset; | |
114 | int fragsize = kiov->bv_len; | |
97d10d0a | 115 | int msgflg = MSG_DONTWAIT; |
d7e09d03 PT |
116 | |
117 | CDEBUG(D_NET, "page %p + offset %x for %d\n", | |
65ffc679 | 118 | page, offset, kiov->bv_len); |
d7e09d03 PT |
119 | |
120 | if (!list_empty(&conn->ksnc_tx_queue) || | |
121 | fragsize < tx->tx_resid) | |
122 | msgflg |= MSG_MORE; | |
123 | ||
06ace26e | 124 | if (sk->sk_prot->sendpage) { |
d7e09d03 PT |
125 | rc = sk->sk_prot->sendpage(sk, page, |
126 | offset, fragsize, msgflg); | |
127 | } else { | |
d664d1fd | 128 | rc = tcp_sendpage(sk, page, offset, fragsize, msgflg); |
d7e09d03 PT |
129 | } |
130 | } else { | |
480f40de | 131 | struct msghdr msg = {.msg_flags = MSG_DONTWAIT}; |
97d10d0a | 132 | int i; |
d7e09d03 | 133 | |
1b4e992f AV |
134 | for (nob = i = 0; i < tx->tx_nkiov; i++) |
135 | nob += kiov[i].bv_len; | |
d7e09d03 PT |
136 | |
137 | if (!list_empty(&conn->ksnc_tx_queue) || | |
138 | nob < tx->tx_resid) | |
139 | msg.msg_flags |= MSG_MORE; | |
140 | ||
1b4e992f AV |
141 | iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, |
142 | kiov, tx->tx_nkiov, nob); | |
143 | rc = sock_sendmsg(sock, &msg); | |
d7e09d03 PT |
144 | } |
145 | return rc; | |
146 | } | |
147 | ||
148 | void | |
ff13fd40 | 149 | ksocknal_lib_eager_ack(struct ksock_conn *conn) |
d7e09d03 | 150 | { |
97d10d0a | 151 | int opt = 1; |
d7e09d03 PT |
152 | struct socket *sock = conn->ksnc_sock; |
153 | ||
4420cfd3 JS |
154 | /* |
155 | * Remind the socket to ACK eagerly. If I don't, the socket might | |
d7e09d03 PT |
156 | * think I'm about to send something it could piggy-back the ACK |
157 | * on, introducing delay in completing zero-copy sends in my | |
4420cfd3 JS |
158 | * peer. |
159 | */ | |
c314c319 JS |
160 | kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char *)&opt, |
161 | sizeof(opt)); | |
d7e09d03 PT |
162 | } |
163 | ||
164 | int | |
ff13fd40 | 165 | ksocknal_lib_recv_iov(struct ksock_conn *conn) |
d7e09d03 | 166 | { |
97d10d0a | 167 | unsigned int niov = conn->ksnc_rx_niov; |
f351bad2 | 168 | struct kvec *iov = conn->ksnc_rx_iov; |
d7e09d03 | 169 | struct msghdr msg = { |
97d10d0a | 170 | .msg_flags = 0 |
d7e09d03 | 171 | }; |
97d10d0a MS |
172 | int nob; |
173 | int i; | |
174 | int rc; | |
175 | int fragnob; | |
176 | int sum; | |
177 | __u32 saved_csum; | |
d7e09d03 | 178 | |
978b9b35 | 179 | LASSERT(niov > 0); |
d7e09d03 | 180 | |
8040ddfb AV |
181 | for (nob = i = 0; i < niov; i++) |
182 | nob += iov[i].iov_len; | |
183 | ||
978b9b35 | 184 | LASSERT(nob <= conn->ksnc_rx_nob_wanted); |
d7e09d03 | 185 | |
8040ddfb AV |
186 | iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob); |
187 | rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); | |
d7e09d03 PT |
188 | |
189 | saved_csum = 0; | |
190 | if (conn->ksnc_proto == &ksocknal_protocol_v2x) { | |
191 | saved_csum = conn->ksnc_msg.ksm_csum; | |
192 | conn->ksnc_msg.ksm_csum = 0; | |
193 | } | |
194 | ||
5fd88337 | 195 | if (saved_csum) { |
d7e09d03 PT |
196 | /* accumulate checksum */ |
197 | for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { | |
978b9b35 | 198 | LASSERT(i < niov); |
d7e09d03 PT |
199 | |
200 | fragnob = iov[i].iov_len; | |
201 | if (fragnob > sum) | |
202 | fragnob = sum; | |
203 | ||
204 | conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, | |
205 | iov[i].iov_base, fragnob); | |
206 | } | |
207 | conn->ksnc_msg.ksm_csum = saved_csum; | |
208 | } | |
209 | ||
210 | return rc; | |
211 | } | |
212 | ||
d7e09d03 | 213 | int |
ff13fd40 | 214 | ksocknal_lib_recv_kiov(struct ksock_conn *conn) |
d7e09d03 | 215 | { |
97d10d0a | 216 | unsigned int niov = conn->ksnc_rx_nkiov; |
d7e09d03 PT |
217 | lnet_kiov_t *kiov = conn->ksnc_rx_kiov; |
218 | struct msghdr msg = { | |
97d10d0a | 219 | .msg_flags = 0 |
d7e09d03 | 220 | }; |
97d10d0a MS |
221 | int nob; |
222 | int i; | |
223 | int rc; | |
224 | void *base; | |
97d10d0a MS |
225 | int sum; |
226 | int fragnob; | |
d7e09d03 | 227 | |
65ffc679 AV |
228 | for (nob = i = 0; i < niov; i++) |
229 | nob += kiov[i].bv_len; | |
d7e09d03 | 230 | |
978b9b35 | 231 | LASSERT(nob <= conn->ksnc_rx_nob_wanted); |
d7e09d03 | 232 | |
65ffc679 | 233 | iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob); |
28ac4ad0 | 234 | rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); |
d7e09d03 | 235 | |
5fd88337 | 236 | if (conn->ksnc_msg.ksm_csum) { |
d7e09d03 | 237 | for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { |
978b9b35 | 238 | LASSERT(i < niov); |
d7e09d03 | 239 | |
65ffc679 AV |
240 | base = kmap(kiov[i].bv_page) + kiov[i].bv_offset; |
241 | fragnob = kiov[i].bv_len; | |
d7e09d03 PT |
242 | if (fragnob > sum) |
243 | fragnob = sum; | |
244 | ||
245 | conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, | |
246 | base, fragnob); | |
247 | ||
65ffc679 | 248 | kunmap(kiov[i].bv_page); |
d7e09d03 PT |
249 | } |
250 | } | |
71397095 | 251 | return rc; |
d7e09d03 PT |
252 | } |
253 | ||
254 | void | |
ff13fd40 | 255 | ksocknal_lib_csum_tx(struct ksock_tx *tx) |
d7e09d03 | 256 | { |
97d10d0a MS |
257 | int i; |
258 | __u32 csum; | |
259 | void *base; | |
d7e09d03 | 260 | |
f351bad2 | 261 | LASSERT(tx->tx_iov[0].iov_base == &tx->tx_msg); |
06ace26e | 262 | LASSERT(tx->tx_conn); |
d7e09d03 PT |
263 | LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); |
264 | ||
265 | tx->tx_msg.ksm_csum = 0; | |
266 | ||
f351bad2 | 267 | csum = ksocknal_csum(~0, tx->tx_iov[0].iov_base, |
d7e09d03 PT |
268 | tx->tx_iov[0].iov_len); |
269 | ||
06ace26e | 270 | if (tx->tx_kiov) { |
d7e09d03 | 271 | for (i = 0; i < tx->tx_nkiov; i++) { |
65ffc679 AV |
272 | base = kmap(tx->tx_kiov[i].bv_page) + |
273 | tx->tx_kiov[i].bv_offset; | |
d7e09d03 | 274 | |
65ffc679 | 275 | csum = ksocknal_csum(csum, base, tx->tx_kiov[i].bv_len); |
d7e09d03 | 276 | |
65ffc679 | 277 | kunmap(tx->tx_kiov[i].bv_page); |
d7e09d03 PT |
278 | } |
279 | } else { | |
280 | for (i = 1; i < tx->tx_niov; i++) | |
281 | csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base, | |
282 | tx->tx_iov[i].iov_len); | |
283 | } | |
284 | ||
285 | if (*ksocknal_tunables.ksnd_inject_csum_error) { | |
286 | csum++; | |
287 | *ksocknal_tunables.ksnd_inject_csum_error = 0; | |
288 | } | |
289 | ||
290 | tx->tx_msg.ksm_csum = csum; | |
291 | } | |
292 | ||
293 | int | |
ff13fd40 | 294 | ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle) |
d7e09d03 | 295 | { |
d7e09d03 | 296 | struct socket *sock = conn->ksnc_sock; |
97d10d0a MS |
297 | int len; |
298 | int rc; | |
d7e09d03 PT |
299 | |
300 | rc = ksocknal_connsock_addref(conn); | |
5fd88337 | 301 | if (rc) { |
978b9b35 | 302 | LASSERT(conn->ksnc_closing); |
d7e09d03 | 303 | *txmem = *rxmem = *nagle = 0; |
71397095 | 304 | return -ESHUTDOWN; |
d7e09d03 PT |
305 | } |
306 | ||
1ad6a73e | 307 | rc = lnet_sock_getbuf(sock, txmem, rxmem); |
5fd88337 | 308 | if (!rc) { |
d7e09d03 | 309 | len = sizeof(*nagle); |
80db2734 | 310 | rc = kernel_getsockopt(sock, SOL_TCP, TCP_NODELAY, |
c314c319 | 311 | (char *)nagle, &len); |
d7e09d03 PT |
312 | } |
313 | ||
314 | ksocknal_connsock_decref(conn); | |
315 | ||
5fd88337 | 316 | if (!rc) |
d7e09d03 PT |
317 | *nagle = !*nagle; |
318 | else | |
319 | *txmem = *rxmem = *nagle = 0; | |
320 | ||
71397095 | 321 | return rc; |
d7e09d03 PT |
322 | } |
323 | ||
324 | int | |
978b9b35 | 325 | ksocknal_lib_setup_sock(struct socket *sock) |
d7e09d03 | 326 | { |
97d10d0a MS |
327 | int rc; |
328 | int option; | |
329 | int keep_idle; | |
330 | int keep_intvl; | |
331 | int keep_count; | |
332 | int do_keepalive; | |
333 | struct linger linger; | |
d7e09d03 PT |
334 | |
335 | sock->sk->sk_allocation = GFP_NOFS; | |
336 | ||
4420cfd3 JS |
337 | /* |
338 | * Ensure this socket aborts active sends immediately when we close | |
339 | * it. | |
340 | */ | |
d7e09d03 PT |
341 | linger.l_onoff = 0; |
342 | linger.l_linger = 0; | |
343 | ||
c314c319 JS |
344 | rc = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, (char *)&linger, |
345 | sizeof(linger)); | |
5fd88337 | 346 | if (rc) { |
978b9b35 | 347 | CERROR("Can't set SO_LINGER: %d\n", rc); |
71397095 | 348 | return rc; |
d7e09d03 PT |
349 | } |
350 | ||
351 | option = -1; | |
c314c319 JS |
352 | rc = kernel_setsockopt(sock, SOL_TCP, TCP_LINGER2, (char *)&option, |
353 | sizeof(option)); | |
5fd88337 | 354 | if (rc) { |
978b9b35 | 355 | CERROR("Can't set SO_LINGER2: %d\n", rc); |
71397095 | 356 | return rc; |
d7e09d03 PT |
357 | } |
358 | ||
359 | if (!*ksocknal_tunables.ksnd_nagle) { | |
360 | option = 1; | |
361 | ||
80db2734 | 362 | rc = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, |
c314c319 | 363 | (char *)&option, sizeof(option)); |
5fd88337 | 364 | if (rc) { |
978b9b35 | 365 | CERROR("Can't disable nagle: %d\n", rc); |
71397095 | 366 | return rc; |
d7e09d03 PT |
367 | } |
368 | } | |
369 | ||
1ad6a73e JS |
370 | rc = lnet_sock_setbuf(sock, *ksocknal_tunables.ksnd_tx_buffer_size, |
371 | *ksocknal_tunables.ksnd_rx_buffer_size); | |
5fd88337 | 372 | if (rc) { |
978b9b35 | 373 | CERROR("Can't set buffer tx %d, rx %d buffers: %d\n", |
c314c319 JS |
374 | *ksocknal_tunables.ksnd_tx_buffer_size, |
375 | *ksocknal_tunables.ksnd_rx_buffer_size, rc); | |
71397095 | 376 | return rc; |
d7e09d03 PT |
377 | } |
378 | ||
379 | /* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ | |
380 | ||
381 | /* snapshot tunables */ | |
382 | keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; | |
383 | keep_count = *ksocknal_tunables.ksnd_keepalive_count; | |
384 | keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; | |
385 | ||
386 | do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); | |
387 | ||
388 | option = (do_keepalive ? 1 : 0); | |
c314c319 JS |
389 | rc = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&option, |
390 | sizeof(option)); | |
5fd88337 | 391 | if (rc) { |
978b9b35 | 392 | CERROR("Can't set SO_KEEPALIVE: %d\n", rc); |
71397095 | 393 | return rc; |
d7e09d03 PT |
394 | } |
395 | ||
396 | if (!do_keepalive) | |
71397095 | 397 | return 0; |
d7e09d03 | 398 | |
c314c319 JS |
399 | rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE, (char *)&keep_idle, |
400 | sizeof(keep_idle)); | |
5fd88337 | 401 | if (rc) { |
978b9b35 | 402 | CERROR("Can't set TCP_KEEPIDLE: %d\n", rc); |
71397095 | 403 | return rc; |
d7e09d03 PT |
404 | } |
405 | ||
80db2734 | 406 | rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL, |
c314c319 | 407 | (char *)&keep_intvl, sizeof(keep_intvl)); |
5fd88337 | 408 | if (rc) { |
978b9b35 | 409 | CERROR("Can't set TCP_KEEPINTVL: %d\n", rc); |
71397095 | 410 | return rc; |
d7e09d03 PT |
411 | } |
412 | ||
c314c319 JS |
413 | rc = kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keep_count, |
414 | sizeof(keep_count)); | |
5fd88337 | 415 | if (rc) { |
978b9b35 | 416 | CERROR("Can't set TCP_KEEPCNT: %d\n", rc); |
71397095 | 417 | return rc; |
d7e09d03 PT |
418 | } |
419 | ||
71397095 | 420 | return 0; |
d7e09d03 PT |
421 | } |
422 | ||
423 | void | |
ff13fd40 | 424 | ksocknal_lib_push_conn(struct ksock_conn *conn) |
d7e09d03 | 425 | { |
97d10d0a | 426 | struct sock *sk; |
d7e09d03 | 427 | struct tcp_sock *tp; |
97d10d0a MS |
428 | int nonagle; |
429 | int val = 1; | |
430 | int rc; | |
d7e09d03 PT |
431 | |
432 | rc = ksocknal_connsock_addref(conn); | |
5fd88337 | 433 | if (rc) /* being shut down */ |
d7e09d03 PT |
434 | return; |
435 | ||
436 | sk = conn->ksnc_sock->sk; | |
437 | tp = tcp_sk(sk); | |
438 | ||
978b9b35 | 439 | lock_sock(sk); |
d7e09d03 PT |
440 | nonagle = tp->nonagle; |
441 | tp->nonagle = 1; | |
978b9b35 | 442 | release_sock(sk); |
d7e09d03 | 443 | |
80db2734 | 444 | rc = kernel_setsockopt(conn->ksnc_sock, SOL_TCP, TCP_NODELAY, |
c314c319 | 445 | (char *)&val, sizeof(val)); |
5fd88337 | 446 | LASSERT(!rc); |
d7e09d03 | 447 | |
978b9b35 | 448 | lock_sock(sk); |
d7e09d03 | 449 | tp->nonagle = nonagle; |
978b9b35 | 450 | release_sock(sk); |
d7e09d03 PT |
451 | |
452 | ksocknal_connsock_decref(conn); | |
453 | } | |
454 | ||
d7e09d03 PT |
455 | /* |
456 | * socket call back in Linux | |
457 | */ | |
458 | static void | |
978b9b35 | 459 | ksocknal_data_ready(struct sock *sk) |
d7e09d03 | 460 | { |
ff13fd40 | 461 | struct ksock_conn *conn; |
d7e09d03 PT |
462 | |
463 | /* interleave correctly with closing sockets... */ | |
464 | LASSERT(!in_irq()); | |
465 | read_lock(&ksocknal_data.ksnd_global_lock); | |
466 | ||
467 | conn = sk->sk_user_data; | |
06ace26e | 468 | if (!conn) { /* raced with ksocknal_terminate_conn */ |
978b9b35 HE |
469 | LASSERT(sk->sk_data_ready != &ksocknal_data_ready); |
470 | sk->sk_data_ready(sk); | |
06f2f2f2 | 471 | } else { |
d7e09d03 | 472 | ksocknal_read_callback(conn); |
06f2f2f2 | 473 | } |
d7e09d03 PT |
474 | |
475 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
d7e09d03 PT |
476 | } |
477 | ||
478 | static void | |
978b9b35 | 479 | ksocknal_write_space(struct sock *sk) |
d7e09d03 | 480 | { |
ff13fd40 | 481 | struct ksock_conn *conn; |
97d10d0a MS |
482 | int wspace; |
483 | int min_wpace; | |
d7e09d03 PT |
484 | |
485 | /* interleave correctly with closing sockets... */ | |
486 | LASSERT(!in_irq()); | |
487 | read_lock(&ksocknal_data.ksnd_global_lock); | |
488 | ||
489 | conn = sk->sk_user_data; | |
12c41f00 JH |
490 | wspace = sk_stream_wspace(sk); |
491 | min_wpace = sk_stream_min_wspace(sk); | |
d7e09d03 PT |
492 | |
493 | CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", | |
494 | sk, wspace, min_wpace, conn, | |
06ace26e | 495 | !conn ? "" : (conn->ksnc_tx_ready ? |
d7e09d03 | 496 | " ready" : " blocked"), |
06ace26e | 497 | !conn ? "" : (conn->ksnc_tx_scheduled ? |
d7e09d03 | 498 | " scheduled" : " idle"), |
06ace26e | 499 | !conn ? "" : (list_empty(&conn->ksnc_tx_queue) ? |
d7e09d03 PT |
500 | " empty" : " queued")); |
501 | ||
06ace26e | 502 | if (!conn) { /* raced with ksocknal_terminate_conn */ |
978b9b35 HE |
503 | LASSERT(sk->sk_write_space != &ksocknal_write_space); |
504 | sk->sk_write_space(sk); | |
d7e09d03 PT |
505 | |
506 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
507 | return; | |
508 | } | |
509 | ||
510 | if (wspace >= min_wpace) { /* got enough space */ | |
511 | ksocknal_write_callback(conn); | |
512 | ||
4420cfd3 JS |
513 | /* |
514 | * Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the | |
d7e09d03 | 515 | * ENOMEM check in ksocknal_transmit is race-free (think about |
4420cfd3 JS |
516 | * it). |
517 | */ | |
978b9b35 | 518 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
d7e09d03 PT |
519 | } |
520 | ||
521 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
522 | } | |
523 | ||
524 | void | |
ff13fd40 | 525 | ksocknal_lib_save_callback(struct socket *sock, struct ksock_conn *conn) |
d7e09d03 PT |
526 | { |
527 | conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; | |
528 | conn->ksnc_saved_write_space = sock->sk->sk_write_space; | |
529 | } | |
530 | ||
531 | void | |
ff13fd40 | 532 | ksocknal_lib_set_callback(struct socket *sock, struct ksock_conn *conn) |
d7e09d03 PT |
533 | { |
534 | sock->sk->sk_user_data = conn; | |
535 | sock->sk->sk_data_ready = ksocknal_data_ready; | |
536 | sock->sk->sk_write_space = ksocknal_write_space; | |
d7e09d03 PT |
537 | } |
538 | ||
539 | void | |
ff13fd40 | 540 | ksocknal_lib_reset_callback(struct socket *sock, struct ksock_conn *conn) |
d7e09d03 | 541 | { |
4420cfd3 JS |
542 | /* |
543 | * Remove conn's network callbacks. | |
d7e09d03 | 544 | * NB I _have_ to restore the callback, rather than storing a noop, |
4420cfd3 JS |
545 | * since the socket could survive past this module being unloaded!! |
546 | */ | |
d7e09d03 PT |
547 | sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; |
548 | sock->sk->sk_write_space = conn->ksnc_saved_write_space; | |
549 | ||
4420cfd3 JS |
550 | /* |
551 | * A callback could be in progress already; they hold a read lock | |
d7e09d03 | 552 | * on ksnd_global_lock (to serialise with me) and NOOP if |
4420cfd3 JS |
553 | * sk_user_data is NULL. |
554 | */ | |
d7e09d03 | 555 | sock->sk->sk_user_data = NULL; |
d7e09d03 PT |
556 | } |
557 | ||
558 | int | |
ff13fd40 | 559 | ksocknal_lib_memory_pressure(struct ksock_conn *conn) |
d7e09d03 | 560 | { |
97d10d0a | 561 | int rc = 0; |
ff13fd40 | 562 | struct ksock_sched *sched; |
d7e09d03 PT |
563 | |
564 | sched = conn->ksnc_scheduler; | |
565 | spin_lock_bh(&sched->kss_lock); | |
566 | ||
fb4a1539 | 567 | if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) && |
d7e09d03 | 568 | !conn->ksnc_tx_ready) { |
4420cfd3 JS |
569 | /* |
570 | * SOCK_NOSPACE is set when the socket fills | |
d7e09d03 PT |
571 | * and cleared in the write_space callback |
572 | * (which also sets ksnc_tx_ready). If | |
573 | * SOCK_NOSPACE and ksnc_tx_ready are BOTH | |
574 | * zero, I didn't fill the socket and | |
575 | * write_space won't reschedule me, so I | |
576 | * return -ENOMEM to get my caller to retry | |
4420cfd3 JS |
577 | * after a timeout |
578 | */ | |
d7e09d03 PT |
579 | rc = -ENOMEM; |
580 | } | |
581 | ||
582 | spin_unlock_bh(&sched->kss_lock); | |
583 | ||
584 | return rc; | |
585 | } |