Merge tag 'driver-core-4.6-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / drivers / staging / lustre / lnet / lnet / lib-socket.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
60ecf96e 18 * http://www.gnu.org/licenses/gpl-2.0.html
d7e09d03
PT
19 *
20 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
1dc563a6 26 * Copyright (c) 2012, 2015, Intel Corporation.
d7e09d03
PT
27 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
60ecf96e 30 * Lustre is a trademark of Seagate, Inc.
d7e09d03
PT
31 */
32#define DEBUG_SUBSYSTEM S_LNET
33
d7e09d03
PT
34#include <linux/if.h>
35#include <linux/in.h>
5c2414ef 36#include <linux/net.h>
d7e09d03 37#include <linux/file.h>
5c2414ef 38#include <linux/pagemap.h>
d7e09d03
PT
39/* For sys_open & sys_close */
40#include <linux/syscalls.h>
5c2414ef
JS
41#include <net/sock.h>
42
43#include "../../include/linux/libcfs/libcfs.h"
44#include "../../include/linux/lnet/lib-lnet.h"
45
46static int
47kernel_sock_unlocked_ioctl(struct file *filp, int cmd, unsigned long arg)
48{
49 mm_segment_t oldfs = get_fs();
50 int err;
51
52 set_fs(KERNEL_DS);
53 err = filp->f_op->unlocked_ioctl(filp, cmd, arg);
54 set_fs(oldfs);
55
56 return err;
57}
d7e09d03 58
2a74b9bd 59static int
1ad6a73e 60lnet_sock_ioctl(int cmd, unsigned long arg)
d7e09d03 61{
73092892
JS
62 struct file *sock_filp;
63 struct socket *sock;
64 int rc;
d7e09d03 65
73092892 66 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
5fd88337 67 if (rc) {
73092892 68 CERROR("Can't create socket: %d\n", rc);
d7e09d03
PT
69 return rc;
70 }
3eeb821e
OD
71
72 sock_filp = sock_alloc_file(sock, 0, NULL);
73 if (IS_ERR(sock_filp)) {
74 sock_release(sock);
75 rc = PTR_ERR(sock_filp);
76 goto out;
77 }
78
5c2414ef 79 rc = kernel_sock_unlocked_ioctl(sock_filp, cmd, arg);
3eeb821e
OD
80
81 fput(sock_filp);
82out:
d7e09d03
PT
83 return rc;
84}
85
86int
73092892 87lnet_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
d7e09d03 88{
73092892
JS
89 struct ifreq ifr;
90 int nob;
91 int rc;
92 __u32 val;
d7e09d03
PT
93
94 nob = strnlen(name, IFNAMSIZ);
95 if (nob == IFNAMSIZ) {
96 CERROR("Interface name %s too long\n", name);
97 return -EINVAL;
98 }
99
73092892 100 CLASSERT(sizeof(ifr.ifr_name) >= IFNAMSIZ);
d7e09d03 101
661489ad
SB
102 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
103 return -E2BIG;
104 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
105
1ad6a73e 106 rc = lnet_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
5fd88337 107 if (rc) {
d7e09d03
PT
108 CERROR("Can't get flags for interface %s\n", name);
109 return rc;
110 }
111
5fd88337 112 if (!(ifr.ifr_flags & IFF_UP)) {
d7e09d03
PT
113 CDEBUG(D_NET, "Interface %s down\n", name);
114 *up = 0;
115 *ip = *mask = 0;
116 return 0;
117 }
d7e09d03
PT
118 *up = 1;
119
661489ad
SB
120 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
121 return -E2BIG;
122 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
123
d7e09d03 124 ifr.ifr_addr.sa_family = AF_INET;
1ad6a73e 125 rc = lnet_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
5fd88337 126 if (rc) {
d7e09d03
PT
127 CERROR("Can't get IP address for interface %s\n", name);
128 return rc;
129 }
130
131 val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
132 *ip = ntohl(val);
133
661489ad
SB
134 if (strlen(name) > sizeof(ifr.ifr_name) - 1)
135 return -E2BIG;
136 strncpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
137
d7e09d03 138 ifr.ifr_addr.sa_family = AF_INET;
1ad6a73e 139 rc = lnet_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
5fd88337 140 if (rc) {
d7e09d03
PT
141 CERROR("Can't get netmask for interface %s\n", name);
142 return rc;
143 }
144
145 val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
146 *mask = ntohl(val);
147
148 return 0;
149}
1ad6a73e 150EXPORT_SYMBOL(lnet_ipif_query);
d7e09d03
PT
151
152int
73092892 153lnet_ipif_enumerate(char ***namesp)
d7e09d03
PT
154{
155 /* Allocate and fill in 'names', returning # interfaces/error */
73092892
JS
156 char **names;
157 int toobig;
158 int nalloc;
159 int nfound;
160 struct ifreq *ifr;
161 struct ifconf ifc;
162 int rc;
163 int nob;
164 int i;
d7e09d03
PT
165
166 nalloc = 16; /* first guess at max interfaces */
167 toobig = 0;
168 for (;;) {
09cbfeaf 169 if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
d7e09d03 170 toobig = 1;
09cbfeaf 171 nalloc = PAGE_SIZE / sizeof(*ifr);
d7e09d03
PT
172 CWARN("Too many interfaces: only enumerating first %d\n",
173 nalloc);
174 }
175
176 LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
06ace26e 177 if (!ifr) {
73092892
JS
178 CERROR("ENOMEM enumerating up to %d interfaces\n",
179 nalloc);
d7e09d03
PT
180 rc = -ENOMEM;
181 goto out0;
182 }
183
184 ifc.ifc_buf = (char *)ifr;
185 ifc.ifc_len = nalloc * sizeof(*ifr);
186
1ad6a73e 187 rc = lnet_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
d7e09d03 188 if (rc < 0) {
73092892 189 CERROR("Error %d enumerating interfaces\n", rc);
d7e09d03
PT
190 goto out1;
191 }
192
5fd88337 193 LASSERT(!rc);
d7e09d03 194
51078e25 195 nfound = ifc.ifc_len / sizeof(*ifr);
73092892 196 LASSERT(nfound <= nalloc);
d7e09d03
PT
197
198 if (nfound < nalloc || toobig)
199 break;
200
201 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
202 nalloc *= 2;
203 }
204
5fd88337 205 if (!nfound)
d7e09d03
PT
206 goto out1;
207
208 LIBCFS_ALLOC(names, nfound * sizeof(*names));
06ace26e 209 if (!names) {
d7e09d03
PT
210 rc = -ENOMEM;
211 goto out1;
212 }
d7e09d03
PT
213
214 for (i = 0; i < nfound; i++) {
73092892 215 nob = strnlen(ifr[i].ifr_name, IFNAMSIZ);
d7e09d03
PT
216 if (nob == IFNAMSIZ) {
217 /* no space for terminating NULL */
218 CERROR("interface name %.*s too long (%d max)\n",
219 nob, ifr[i].ifr_name, IFNAMSIZ);
220 rc = -ENAMETOOLONG;
221 goto out2;
222 }
223
224 LIBCFS_ALLOC(names[i], IFNAMSIZ);
06ace26e 225 if (!names[i]) {
d7e09d03
PT
226 rc = -ENOMEM;
227 goto out2;
228 }
229
230 memcpy(names[i], ifr[i].ifr_name, nob);
231 names[i][nob] = 0;
232 }
233
234 *namesp = names;
235 rc = nfound;
236
73092892 237out2:
d7e09d03 238 if (rc < 0)
1ad6a73e 239 lnet_ipif_free_enumeration(names, nfound);
73092892 240out1:
d7e09d03 241 LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
73092892 242out0:
d7e09d03
PT
243 return rc;
244}
1ad6a73e 245EXPORT_SYMBOL(lnet_ipif_enumerate);
d7e09d03
PT
246
247void
73092892 248lnet_ipif_free_enumeration(char **names, int n)
d7e09d03 249{
73092892 250 int i;
d7e09d03 251
73092892 252 LASSERT(n > 0);
d7e09d03 253
06ace26e 254 for (i = 0; i < n && names[i]; i++)
d7e09d03
PT
255 LIBCFS_FREE(names[i], IFNAMSIZ);
256
257 LIBCFS_FREE(names, n * sizeof(*names));
258}
1ad6a73e 259EXPORT_SYMBOL(lnet_ipif_free_enumeration);
d7e09d03
PT
260
261int
73092892 262lnet_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
d7e09d03 263{
73092892 264 int rc;
27d81ace 265 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
73092892 266 unsigned long then;
d7e09d03
PT
267 struct timeval tv;
268
73092892 269 LASSERT(nob > 0);
4420cfd3
JS
270 /*
271 * Caller may pass a zero timeout if she thinks the socket buffer is
272 * empty enough to take the whole message immediately
273 */
d7e09d03 274 for (;;) {
480f40de 275 struct kvec iov = {
d7e09d03
PT
276 .iov_base = buffer,
277 .iov_len = nob
278 };
279 struct msghdr msg = {
5fd88337 280 .msg_flags = !timeout ? MSG_DONTWAIT : 0
d7e09d03
PT
281 };
282
5fd88337 283 if (timeout) {
d7e09d03 284 /* Set send timeout to remaining time */
27d81ace 285 jiffies_to_timeval(jiffies_left, &tv);
80db2734 286 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
73092892 287 (char *)&tv, sizeof(tv));
5fd88337 288 if (rc) {
2d00bd17 289 CERROR("Can't set socket send timeout %ld.%06d: %d\n",
d7e09d03
PT
290 (long)tv.tv_sec, (int)tv.tv_usec, rc);
291 return rc;
292 }
293 }
294
d7e09d03 295 then = jiffies;
480f40de 296 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
27d81ace 297 jiffies_left -= jiffies - then;
d7e09d03
PT
298
299 if (rc == nob)
300 return 0;
301
302 if (rc < 0)
303 return rc;
304
5fd88337 305 if (!rc) {
73092892 306 CERROR("Unexpected zero rc\n");
fbe7c6c7 307 return -ECONNABORTED;
d7e09d03
PT
308 }
309
27d81ace 310 if (jiffies_left <= 0)
d7e09d03
PT
311 return -EAGAIN;
312
313 buffer = ((char *)buffer) + rc;
314 nob -= rc;
315 }
fbe7c6c7 316 return 0;
d7e09d03 317}
1ad6a73e 318EXPORT_SYMBOL(lnet_sock_write);
d7e09d03
PT
319
320int
73092892 321lnet_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
d7e09d03 322{
73092892 323 int rc;
27d81ace 324 long jiffies_left = timeout * msecs_to_jiffies(MSEC_PER_SEC);
73092892 325 unsigned long then;
d7e09d03
PT
326 struct timeval tv;
327
73092892 328 LASSERT(nob > 0);
27d81ace 329 LASSERT(jiffies_left > 0);
d7e09d03
PT
330
331 for (;;) {
b2f42cfe 332 struct kvec iov = {
d7e09d03
PT
333 .iov_base = buffer,
334 .iov_len = nob
335 };
336 struct msghdr msg = {
73092892 337 .msg_flags = 0
d7e09d03
PT
338 };
339
340 /* Set receive timeout to remaining time */
27d81ace 341 jiffies_to_timeval(jiffies_left, &tv);
80db2734 342 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
73092892 343 (char *)&tv, sizeof(tv));
5fd88337 344 if (rc) {
d7e09d03
PT
345 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
346 (long)tv.tv_sec, (int)tv.tv_usec, rc);
347 return rc;
348 }
349
d7e09d03 350 then = jiffies;
b2f42cfe 351 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
27d81ace 352 jiffies_left -= jiffies - then;
d7e09d03
PT
353
354 if (rc < 0)
355 return rc;
356
5fd88337 357 if (!rc)
d7e09d03
PT
358 return -ECONNRESET;
359
360 buffer = ((char *)buffer) + rc;
361 nob -= rc;
362
5fd88337 363 if (!nob)
d7e09d03
PT
364 return 0;
365
27d81ace 366 if (jiffies_left <= 0)
d7e09d03
PT
367 return -ETIMEDOUT;
368 }
369}
1ad6a73e 370EXPORT_SYMBOL(lnet_sock_read);
d7e09d03
PT
371
372static int
73092892
JS
373lnet_sock_create(struct socket **sockp, int *fatal, __u32 local_ip,
374 int local_port)
d7e09d03 375{
73092892
JS
376 struct sockaddr_in locaddr;
377 struct socket *sock;
378 int rc;
379 int option;
d7e09d03
PT
380
381 /* All errors are fatal except bind failure if the port is in use */
382 *fatal = 1;
383
73092892 384 rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
d7e09d03 385 *sockp = sock;
5fd88337 386 if (rc) {
73092892 387 CERROR("Can't create socket: %d\n", rc);
fbe7c6c7 388 return rc;
d7e09d03
PT
389 }
390
d7e09d03 391 option = 1;
80db2734 392 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
73092892 393 (char *)&option, sizeof(option));
5fd88337 394 if (rc) {
d7e09d03
PT
395 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
396 goto failed;
397 }
398
5fd88337 399 if (local_ip || local_port) {
d7e09d03
PT
400 memset(&locaddr, 0, sizeof(locaddr));
401 locaddr.sin_family = AF_INET;
402 locaddr.sin_port = htons(local_port);
5fd88337 403 locaddr.sin_addr.s_addr = !local_ip ?
d7e09d03
PT
404 INADDR_ANY : htonl(local_ip);
405
5c2414ef
JS
406 rc = kernel_bind(sock, (struct sockaddr *)&locaddr,
407 sizeof(locaddr));
d7e09d03
PT
408 if (rc == -EADDRINUSE) {
409 CDEBUG(D_NET, "Port %d already in use\n", local_port);
410 *fatal = 0;
411 goto failed;
412 }
5fd88337 413 if (rc) {
d7e09d03
PT
414 CERROR("Error trying to bind to port %d: %d\n",
415 local_port, rc);
416 goto failed;
417 }
418 }
d7e09d03
PT
419 return 0;
420
73092892 421failed:
d7e09d03
PT
422 sock_release(sock);
423 return rc;
424}
425
426int
73092892 427lnet_sock_setbuf(struct socket *sock, int txbufsize, int rxbufsize)
d7e09d03 428{
73092892
JS
429 int option;
430 int rc;
d7e09d03 431
5fd88337 432 if (txbufsize) {
d7e09d03 433 option = txbufsize;
80db2734 434 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
73092892 435 (char *)&option, sizeof(option));
5fd88337 436 if (rc) {
73092892
JS
437 CERROR("Can't set send buffer %d: %d\n",
438 option, rc);
fbe7c6c7 439 return rc;
d7e09d03
PT
440 }
441 }
442
5fd88337 443 if (rxbufsize) {
d7e09d03 444 option = rxbufsize;
80db2734 445 rc = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF,
c314c319 446 (char *)&option, sizeof(option));
5fd88337 447 if (rc) {
73092892
JS
448 CERROR("Can't set receive buffer %d: %d\n",
449 option, rc);
fbe7c6c7 450 return rc;
d7e09d03
PT
451 }
452 }
d7e09d03
PT
453 return 0;
454}
1ad6a73e 455EXPORT_SYMBOL(lnet_sock_setbuf);
d7e09d03
PT
456
457int
73092892 458lnet_sock_getaddr(struct socket *sock, bool remote, __u32 *ip, int *port)
d7e09d03
PT
459{
460 struct sockaddr_in sin;
73092892
JS
461 int len = sizeof(sin);
462 int rc;
d7e09d03 463
5c2414ef
JS
464 if (remote)
465 rc = kernel_getpeername(sock, (struct sockaddr *)&sin, &len);
466 else
467 rc = kernel_getsockname(sock, (struct sockaddr *)&sin, &len);
5fd88337 468 if (rc) {
73092892
JS
469 CERROR("Error %d getting sock %s IP/port\n",
470 rc, remote ? "peer" : "local");
d7e09d03
PT
471 return rc;
472 }
473
06ace26e 474 if (ip)
73092892 475 *ip = ntohl(sin.sin_addr.s_addr);
d7e09d03 476
06ace26e 477 if (port)
73092892 478 *port = ntohs(sin.sin_port);
d7e09d03
PT
479
480 return 0;
481}
1ad6a73e 482EXPORT_SYMBOL(lnet_sock_getaddr);
d7e09d03
PT
483
484int
73092892 485lnet_sock_getbuf(struct socket *sock, int *txbufsize, int *rxbufsize)
d7e09d03 486{
06ace26e 487 if (txbufsize)
d7e09d03 488 *txbufsize = sock->sk->sk_sndbuf;
d7e09d03 489
06ace26e 490 if (rxbufsize)
d7e09d03 491 *rxbufsize = sock->sk->sk_rcvbuf;
d7e09d03
PT
492
493 return 0;
494}
1ad6a73e 495EXPORT_SYMBOL(lnet_sock_getbuf);
d7e09d03
PT
496
497int
73092892
JS
498lnet_sock_listen(struct socket **sockp, __u32 local_ip, int local_port,
499 int backlog)
d7e09d03 500{
73092892
JS
501 int fatal;
502 int rc;
d7e09d03 503
1ad6a73e 504 rc = lnet_sock_create(sockp, &fatal, local_ip, local_port);
5fd88337 505 if (rc) {
d7e09d03
PT
506 if (!fatal)
507 CERROR("Can't create socket: port %d already in use\n",
508 local_port);
509 return rc;
510 }
511
5c2414ef 512 rc = kernel_listen(*sockp, backlog);
5fd88337 513 if (!rc)
d7e09d03
PT
514 return 0;
515
516 CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
517 sock_release(*sockp);
518 return rc;
519}
d7e09d03
PT
520
521int
73092892 522lnet_sock_accept(struct socket **newsockp, struct socket *sock)
d7e09d03 523{
73092892 524 wait_queue_t wait;
d7e09d03 525 struct socket *newsock;
73092892 526 int rc;
d7e09d03 527
4420cfd3
JS
528 /*
529 * XXX this should add a ref to sock->ops->owner, if
530 * TCP could be a module
531 */
d7e09d03
PT
532 rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
533 if (rc) {
534 CERROR("Can't allocate socket\n");
535 return rc;
536 }
537
538 newsock->ops = sock->ops;
539
d7e09d03
PT
540 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
541 if (rc == -EAGAIN) {
542 /* Nothing ready, so wait for activity */
933d36ba 543 init_waitqueue_entry(&wait, current);
834fe2e1 544 add_wait_queue(sk_sleep(sock->sk), &wait);
933d36ba 545 set_current_state(TASK_INTERRUPTIBLE);
d7e09d03 546 schedule();
834fe2e1 547 remove_wait_queue(sk_sleep(sock->sk), &wait);
d7e09d03
PT
548 rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
549 }
550
5fd88337 551 if (rc)
d7e09d03
PT
552 goto failed;
553
554 *newsockp = newsock;
555 return 0;
556
73092892 557failed:
d7e09d03
PT
558 sock_release(newsock);
559 return rc;
560}
d7e09d03 561
d7e09d03 562int
73092892
JS
563lnet_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip,
564 int local_port, __u32 peer_ip, int peer_port)
d7e09d03 565{
73092892
JS
566 struct sockaddr_in srvaddr;
567 int rc;
d7e09d03 568
1ad6a73e 569 rc = lnet_sock_create(sockp, fatal, local_ip, local_port);
5fd88337 570 if (rc)
d7e09d03
PT
571 return rc;
572
73092892 573 memset(&srvaddr, 0, sizeof(srvaddr));
d7e09d03
PT
574 srvaddr.sin_family = AF_INET;
575 srvaddr.sin_port = htons(peer_port);
576 srvaddr.sin_addr.s_addr = htonl(peer_ip);
577
5c2414ef
JS
578 rc = kernel_connect(*sockp, (struct sockaddr *)&srvaddr,
579 sizeof(srvaddr), 0);
5fd88337 580 if (!rc)
d7e09d03
PT
581 return 0;
582
4420cfd3
JS
583 /*
584 * EADDRNOTAVAIL probably means we're already connected to the same
d7e09d03
PT
585 * peer/port on the same local port on a differently typed
586 * connection. Let our caller retry with a different local
4420cfd3
JS
587 * port...
588 */
d7e09d03
PT
589 *fatal = !(rc == -EADDRNOTAVAIL);
590
591 CDEBUG_LIMIT(*fatal ? D_NETERROR : D_NET,
73092892
JS
592 "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc,
593 &local_ip, local_port, &peer_ip, peer_port);
d7e09d03
PT
594
595 sock_release(*sockp);
596 return rc;
597}
This page took 0.408504 seconds and 5 git commands to generate.