Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2011, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | * | |
32 | * lnet/klnds/o2iblnd/o2iblnd.h | |
33 | * | |
34 | * Author: Eric Barton <eric@bartonsoftware.com> | |
35 | */ | |
36 | ||
37 | #include <linux/module.h> | |
38 | #include <linux/kernel.h> | |
39 | #include <linux/mm.h> | |
40 | #include <linux/string.h> | |
41 | #include <linux/stat.h> | |
42 | #include <linux/errno.h> | |
43 | #include <linux/unistd.h> | |
44 | #include <linux/uio.h> | |
e8fd99fd | 45 | #include <linux/uaccess.h> |
d7e09d03 | 46 | |
6255049d | 47 | #include <linux/io.h> |
d7e09d03 | 48 | |
d7e09d03 PT |
49 | #include <linux/fs.h> |
50 | #include <linux/file.h> | |
d7e09d03 PT |
51 | #include <linux/list.h> |
52 | #include <linux/kmod.h> | |
53 | #include <linux/sysctl.h> | |
54 | #include <linux/pci.h> | |
55 | ||
56 | #include <net/sock.h> | |
57 | #include <linux/in.h> | |
58 | ||
18ddb13c JS |
59 | #include <rdma/rdma_cm.h> |
60 | #include <rdma/ib_cm.h> | |
61 | #include <rdma/ib_verbs.h> | |
62 | #include <rdma/ib_fmr_pool.h> | |
63 | ||
d7e09d03 PT |
64 | #define DEBUG_SUBSYSTEM S_LND |
65 | ||
490e7dd4 GKH |
66 | #include "../../../include/linux/libcfs/libcfs.h" |
67 | #include "../../../include/linux/lnet/lnet.h" | |
68 | #include "../../../include/linux/lnet/lib-lnet.h" | |
d7e09d03 | 69 | |
d7e09d03 PT |
70 | #define IBLND_PEER_HASH_SIZE 101 /* # peer lists */ |
71 | /* # scheduler loops before reschedule */ | |
72 | #define IBLND_RESCHED 100 | |
73 | ||
74 | #define IBLND_N_SCHED 2 | |
75 | #define IBLND_N_SCHED_HIGH 4 | |
76 | ||
8d9de3f4 | 77 | struct kib_tunables { |
d0bed035 JB |
78 | int *kib_dev_failover; /* HCA failover */ |
79 | unsigned int *kib_service; /* IB service number */ | |
80 | int *kib_min_reconnect_interval; /* first failed connection retry... */ | |
81 | int *kib_max_reconnect_interval; /* exponentially increasing to this */ | |
8d9de3f4 | 82 | int *kib_cksum; /* checksum struct kib_msg? */ |
d0bed035 JB |
83 | int *kib_timeout; /* comms timeout (seconds) */ |
84 | int *kib_keepalive; /* keepalive timeout (seconds) */ | |
85 | int *kib_ntx; /* # tx descs */ | |
d0bed035 JB |
86 | char **kib_default_ipif; /* default IPoIB interface */ |
87 | int *kib_retry_count; | |
88 | int *kib_rnr_retry_count; | |
d0bed035 | 89 | int *kib_ib_mtu; /* IB MTU */ |
d0bed035 JB |
90 | int *kib_require_priv_port; /* accept only privileged ports */ |
91 | int *kib_use_priv_port; /* use privileged port for active connect */ | |
92 | int *kib_nscheds; /* # threads on each CPT */ | |
8d9de3f4 | 93 | }; |
d7e09d03 | 94 | |
8d9de3f4 | 95 | extern struct kib_tunables kiblnd_tunables; |
d7e09d03 | 96 | |
ec3d17c0 MS |
97 | #define IBLND_MSG_QUEUE_SIZE_V1 8 /* V1 only : # messages/RDMAs in-flight */ |
98 | #define IBLND_CREDIT_HIGHWATER_V1 7 /* V1 only : when eagerly to return credits */ | |
d7e09d03 | 99 | |
ec3d17c0 | 100 | #define IBLND_CREDITS_DEFAULT 8 /* default # of peer credits */ |
9797fb0e | 101 | #define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *)0)->ibm_credits)) - 1) /* Max # of peer credits */ |
d7e09d03 | 102 | |
32c8deb8 AS |
103 | /* when eagerly to return credits */ |
104 | #define IBLND_CREDITS_HIGHWATER(t, v) ((v) == IBLND_MSG_VERSION_1 ? \ | |
105 | IBLND_CREDIT_HIGHWATER_V1 : \ | |
106 | t->lnd_peercredits_hiw) | |
d7e09d03 | 107 | |
494025c6 | 108 | #define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(current->nsproxy->net_ns, \ |
fa20105e GS |
109 | cb, dev, \ |
110 | ps, qpt) | |
d7e09d03 | 111 | |
d7e09d03 PT |
112 | /* 2 OOB shall suffice for 1 keepalive and 1 returning credits */ |
113 | #define IBLND_OOB_CAPABLE(v) ((v) != IBLND_MSG_VERSION_1) | |
114 | #define IBLND_OOB_MSGS(v) (IBLND_OOB_CAPABLE(v) ? 2 : 0) | |
115 | ||
bbc2d82f JS |
116 | #define IBLND_FRAG_SHIFT (PAGE_SHIFT - 12) /* frag size on wire is in 4K units */ |
117 | #define IBLND_MSG_SIZE (4 << 10) /* max size of queued messages (inc hdr) */ | |
118 | #define IBLND_MAX_RDMA_FRAGS (LNET_MAX_PAYLOAD >> 12)/* max # of fragments supported in 4K size */ | |
d7e09d03 PT |
119 | |
120 | /************************/ | |
121 | /* derived constants... */ | |
122 | /* Pools (shared by connections on each CPT) */ | |
123 | /* These pools can grow at runtime, so don't need give a very large value */ | |
124 | #define IBLND_TX_POOL 256 | |
d7e09d03 PT |
125 | #define IBLND_FMR_POOL 256 |
126 | #define IBLND_FMR_POOL_FLUSH 192 | |
127 | ||
2fb44f2b JF |
128 | #define IBLND_RX_MSGS(c) \ |
129 | ((c->ibc_queue_depth) * 2 + IBLND_OOB_MSGS(c->ibc_version)) | |
130 | #define IBLND_RX_MSG_BYTES(c) (IBLND_RX_MSGS(c) * IBLND_MSG_SIZE) | |
131 | #define IBLND_RX_MSG_PAGES(c) \ | |
132 | ((IBLND_RX_MSG_BYTES(c) + PAGE_SIZE - 1) / PAGE_SIZE) | |
d7e09d03 PT |
133 | |
134 | /* WRs and CQEs (per connection) */ | |
2fb44f2b JF |
135 | #define IBLND_RECV_WRS(c) IBLND_RX_MSGS(c) |
136 | #define IBLND_SEND_WRS(c) \ | |
bbc2d82f JS |
137 | (((c->ibc_max_frags + 1) << IBLND_FRAG_SHIFT) * \ |
138 | kiblnd_concurrent_sends(c->ibc_version, c->ibc_peer->ibp_ni)) | |
2fb44f2b | 139 | #define IBLND_CQ_ENTRIES(c) (IBLND_RECV_WRS(c) + IBLND_SEND_WRS(c)) |
d7e09d03 PT |
140 | |
141 | struct kib_hca_dev; | |
142 | ||
143 | /* o2iblnd can run over aliased interface */ | |
144 | #ifdef IFALIASZ | |
145 | #define KIB_IFNAME_SIZE IFALIASZ | |
146 | #else | |
147 | #define KIB_IFNAME_SIZE 256 | |
148 | #endif | |
149 | ||
8d9de3f4 | 150 | struct kib_dev { |
ec3d17c0 MS |
151 | struct list_head ibd_list; /* chain on kib_devs */ |
152 | struct list_head ibd_fail_list; /* chain on kib_failed_devs */ | |
153 | __u32 ibd_ifip; /* IPoIB interface IP */ | |
154 | ||
155 | /* IPoIB interface name */ | |
156 | char ibd_ifname[KIB_IFNAME_SIZE]; | |
157 | int ibd_nnets; /* # nets extant */ | |
158 | ||
159 | unsigned long ibd_next_failover; | |
160 | int ibd_failed_failover; /* # failover failures */ | |
161 | unsigned int ibd_failover; /* failover in progress */ | |
d0bed035 | 162 | unsigned int ibd_can_failover; /* IPoIB interface is a bonding master */ |
ec3d17c0 MS |
163 | struct list_head ibd_nets; |
164 | struct kib_hca_dev *ibd_hdev; | |
8d9de3f4 | 165 | }; |
d7e09d03 | 166 | |
8d9de3f4 | 167 | struct kib_hca_dev { |
ec3d17c0 MS |
168 | struct rdma_cm_id *ibh_cmid; /* listener cmid */ |
169 | struct ib_device *ibh_ibdev; /* IB device */ | |
170 | int ibh_page_shift; /* page shift of current HCA */ | |
171 | int ibh_page_size; /* page size of current HCA */ | |
172 | __u64 ibh_page_mask; /* page mask of current HCA */ | |
173 | int ibh_mr_shift; /* bits shift of max MR size */ | |
174 | __u64 ibh_mr_size; /* size of MR */ | |
7cadcc7c | 175 | struct ib_mr *ibh_mrs; /* global MR */ |
ec3d17c0 | 176 | struct ib_pd *ibh_pd; /* PD */ |
8d9de3f4 | 177 | struct kib_dev *ibh_dev; /* owner */ |
ec3d17c0 | 178 | atomic_t ibh_ref; /* refcount */ |
8d9de3f4 | 179 | }; |
d7e09d03 PT |
180 | |
181 | /** # of seconds to keep pool alive */ | |
182 | #define IBLND_POOL_DEADLINE 300 | |
183 | /** # of seconds to retry if allocation failed */ | |
184 | #define IBLND_POOL_RETRY 1 | |
185 | ||
8d9de3f4 | 186 | struct kib_pages { |
ec3d17c0 MS |
187 | int ibp_npages; /* # pages */ |
188 | struct page *ibp_pages[0]; /* page array */ | |
8d9de3f4 | 189 | }; |
d7e09d03 | 190 | |
d7e09d03 PT |
191 | struct kib_pool; |
192 | struct kib_poolset; | |
193 | ||
194 | typedef int (*kib_ps_pool_create_t)(struct kib_poolset *ps, | |
195 | int inc, struct kib_pool **pp_po); | |
196 | typedef void (*kib_ps_pool_destroy_t)(struct kib_pool *po); | |
197 | typedef void (*kib_ps_node_init_t)(struct kib_pool *po, struct list_head *node); | |
198 | typedef void (*kib_ps_node_fini_t)(struct kib_pool *po, struct list_head *node); | |
199 | ||
200 | struct kib_net; | |
201 | ||
202 | #define IBLND_POOL_NAME_LEN 32 | |
203 | ||
8d9de3f4 | 204 | struct kib_poolset { |
ec3d17c0 MS |
205 | spinlock_t ps_lock; /* serialize */ |
206 | struct kib_net *ps_net; /* network it belongs to */ | |
207 | char ps_name[IBLND_POOL_NAME_LEN]; /* pool set name */ | |
208 | struct list_head ps_pool_list; /* list of pools */ | |
209 | struct list_head ps_failed_pool_list;/* failed pool list */ | |
d0bed035 JB |
210 | unsigned long ps_next_retry; /* time stamp for retry if */ |
211 | /* failed to allocate */ | |
ec3d17c0 MS |
212 | int ps_increasing; /* is allocating new pool */ |
213 | int ps_pool_size; /* new pool size */ | |
214 | int ps_cpt; /* CPT id */ | |
215 | ||
216 | kib_ps_pool_create_t ps_pool_create; /* create a new pool */ | |
217 | kib_ps_pool_destroy_t ps_pool_destroy; /* destroy a pool */ | |
d0bed035 | 218 | kib_ps_node_init_t ps_node_init; /* initialize new allocated node */ |
ec3d17c0 | 219 | kib_ps_node_fini_t ps_node_fini; /* finalize node */ |
8d9de3f4 | 220 | }; |
d7e09d03 | 221 | |
8d9de3f4 | 222 | struct kib_pool { |
d0bed035 JB |
223 | struct list_head po_list; /* chain on pool list */ |
224 | struct list_head po_free_list; /* pre-allocated node */ | |
8d9de3f4 | 225 | struct kib_poolset *po_owner; /* pool_set of this pool */ |
d0bed035 JB |
226 | unsigned long po_deadline; /* deadline of this pool */ |
227 | int po_allocated; /* # of elements in use */ | |
228 | int po_failed; /* pool is created on failed HCA */ | |
229 | int po_size; /* # of pre-allocated elements */ | |
8d9de3f4 | 230 | }; |
d7e09d03 | 231 | |
8d9de3f4 JS |
232 | struct kib_tx_poolset { |
233 | struct kib_poolset tps_poolset; /* pool-set */ | |
ec3d17c0 | 234 | __u64 tps_next_tx_cookie; /* cookie of TX */ |
8d9de3f4 | 235 | }; |
d7e09d03 | 236 | |
8d9de3f4 JS |
237 | struct kib_tx_pool { |
238 | struct kib_pool tpo_pool; /* pool */ | |
239 | struct kib_hca_dev *tpo_hdev; /* device for this pool */ | |
240 | struct kib_tx *tpo_tx_descs; /* all the tx descriptors */ | |
241 | struct kib_pages *tpo_tx_pages; /* premapped tx msg pages */ | |
242 | }; | |
d7e09d03 | 243 | |
8d9de3f4 | 244 | struct kib_fmr_poolset { |
ec3d17c0 MS |
245 | spinlock_t fps_lock; /* serialize */ |
246 | struct kib_net *fps_net; /* IB network */ | |
247 | struct list_head fps_pool_list; /* FMR pool list */ | |
248 | struct list_head fps_failed_pool_list;/* FMR pool list */ | |
249 | __u64 fps_version; /* validity stamp */ | |
250 | int fps_cpt; /* CPT id */ | |
251 | int fps_pool_size; | |
252 | int fps_flush_trigger; | |
32c8deb8 | 253 | int fps_cache; |
ec3d17c0 | 254 | int fps_increasing; /* is allocating new pool */ |
d0bed035 JB |
255 | unsigned long fps_next_retry; /* time stamp for retry if*/ |
256 | /* failed to allocate */ | |
8d9de3f4 | 257 | }; |
d7e09d03 | 258 | |
80e05b34 DE |
259 | struct kib_fast_reg_descriptor { /* For fast registration */ |
260 | struct list_head frd_list; | |
261 | struct ib_send_wr frd_inv_wr; | |
262 | struct ib_reg_wr frd_fastreg_wr; | |
263 | struct ib_mr *frd_mr; | |
264 | bool frd_valid; | |
265 | }; | |
266 | ||
8d9de3f4 JS |
267 | struct kib_fmr_pool { |
268 | struct list_head fpo_list; /* chain on pool list */ | |
269 | struct kib_hca_dev *fpo_hdev; /* device for this pool */ | |
270 | struct kib_fmr_poolset *fpo_owner; /* owner of this pool */ | |
87f4f6f5 DE |
271 | union { |
272 | struct { | |
273 | struct ib_fmr_pool *fpo_fmr_pool; /* IB FMR pool */ | |
274 | } fmr; | |
80e05b34 DE |
275 | struct { /* For fast registration */ |
276 | struct list_head fpo_pool_list; | |
277 | int fpo_pool_size; | |
278 | } fast_reg; | |
87f4f6f5 | 279 | }; |
ec3d17c0 MS |
280 | unsigned long fpo_deadline; /* deadline of this pool */ |
281 | int fpo_failed; /* fmr pool is failed */ | |
282 | int fpo_map_count; /* # of mapped FMR */ | |
80e05b34 | 283 | int fpo_is_fmr; |
8d9de3f4 | 284 | }; |
d7e09d03 | 285 | |
8d9de3f4 JS |
286 | struct kib_fmr { |
287 | struct kib_fmr_pool *fmr_pool; /* pool of FMR */ | |
80e05b34 DE |
288 | struct ib_pool_fmr *fmr_pfmr; /* IB pool fmr */ |
289 | struct kib_fast_reg_descriptor *fmr_frd; | |
290 | u32 fmr_key; | |
8d9de3f4 | 291 | }; |
d7e09d03 | 292 | |
8d9de3f4 JS |
293 | struct kib_net { |
294 | struct list_head ibn_list; /* chain on struct kib_dev::ibd_nets */ | |
ec3d17c0 MS |
295 | __u64 ibn_incarnation;/* my epoch */ |
296 | int ibn_init; /* initialisation state */ | |
297 | int ibn_shutdown; /* shutting down? */ | |
d7e09d03 | 298 | |
ec3d17c0 MS |
299 | atomic_t ibn_npeers; /* # peers extant */ |
300 | atomic_t ibn_nconns; /* # connections extant */ | |
d7e09d03 | 301 | |
8d9de3f4 JS |
302 | struct kib_tx_poolset **ibn_tx_ps; /* tx pool-set */ |
303 | struct kib_fmr_poolset **ibn_fmr_ps; /* fmr pool-set */ | |
d7e09d03 | 304 | |
8d9de3f4 JS |
305 | struct kib_dev *ibn_dev; /* underlying IB device */ |
306 | }; | |
d7e09d03 PT |
307 | |
308 | #define KIB_THREAD_SHIFT 16 | |
309 | #define KIB_THREAD_ID(cpt, tid) ((cpt) << KIB_THREAD_SHIFT | (tid)) | |
310 | #define KIB_THREAD_CPT(id) ((id) >> KIB_THREAD_SHIFT) | |
311 | #define KIB_THREAD_TID(id) ((id) & ((1UL << KIB_THREAD_SHIFT) - 1)) | |
312 | ||
313 | struct kib_sched_info { | |
ec3d17c0 MS |
314 | spinlock_t ibs_lock; /* serialise */ |
315 | wait_queue_head_t ibs_waitq; /* schedulers sleep here */ | |
316 | struct list_head ibs_conns; /* conns to check for rx completions */ | |
317 | int ibs_nthreads; /* number of scheduler threads */ | |
318 | int ibs_nthreads_max; /* max allowed scheduler threads */ | |
319 | int ibs_cpt; /* CPT id */ | |
d7e09d03 PT |
320 | }; |
321 | ||
8d9de3f4 | 322 | struct kib_data { |
d0bed035 JB |
323 | int kib_init; /* initialisation state */ |
324 | int kib_shutdown; /* shut down? */ | |
325 | struct list_head kib_devs; /* IB devices extant */ | |
326 | struct list_head kib_failed_devs; /* list head of failed devices */ | |
327 | wait_queue_head_t kib_failover_waitq; /* schedulers sleep here */ | |
328 | atomic_t kib_nthreads; /* # live threads */ | |
329 | rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */ | |
330 | struct list_head *kib_peers; /* hash table of all my known peers */ | |
331 | int kib_peer_hash_size; /* size of kib_peers */ | |
332 | void *kib_connd; /* the connd task (serialisation assertions) */ | |
333 | struct list_head kib_connd_conns; /* connections to setup/teardown */ | |
334 | struct list_head kib_connd_zombies; /* connections with zero refcount */ | |
4d99b258 LZ |
335 | /* connections to reconnect */ |
336 | struct list_head kib_reconn_list; | |
337 | /* peers wait for reconnection */ | |
338 | struct list_head kib_reconn_wait; | |
339 | /** | |
340 | * The second that peers are pulled out from \a kib_reconn_wait | |
341 | * for reconnection. | |
342 | */ | |
343 | time64_t kib_reconn_sec; | |
344 | ||
d0bed035 JB |
345 | wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ |
346 | spinlock_t kib_connd_lock; /* serialise */ | |
347 | struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ | |
348 | struct kib_sched_info **kib_scheds; /* percpt data for schedulers */ | |
8d9de3f4 | 349 | }; |
d7e09d03 | 350 | |
ec3d17c0 MS |
351 | #define IBLND_INIT_NOTHING 0 |
352 | #define IBLND_INIT_DATA 1 | |
353 | #define IBLND_INIT_ALL 2 | |
d7e09d03 PT |
354 | |
355 | /************************************************************************ | |
356 | * IB Wire message format. | |
357 | * These are sent in sender's byte order (i.e. receiver flips). | |
358 | */ | |
359 | ||
8d9de3f4 | 360 | struct kib_connparams { |
ec3d17c0 MS |
361 | __u16 ibcp_queue_depth; |
362 | __u16 ibcp_max_frags; | |
363 | __u32 ibcp_max_msg_size; | |
8d9de3f4 | 364 | } WIRE_ATTR; |
d7e09d03 | 365 | |
8d9de3f4 | 366 | struct kib_immediate_msg { |
ec3d17c0 MS |
367 | lnet_hdr_t ibim_hdr; /* portals header */ |
368 | char ibim_payload[0]; /* piggy-backed payload */ | |
8d9de3f4 | 369 | } WIRE_ATTR; |
d7e09d03 | 370 | |
8d9de3f4 | 371 | struct kib_rdma_frag { |
ec3d17c0 MS |
372 | __u32 rf_nob; /* # bytes this frag */ |
373 | __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */ | |
8d9de3f4 | 374 | } WIRE_ATTR; |
d7e09d03 | 375 | |
8d9de3f4 | 376 | struct kib_rdma_desc { |
ec3d17c0 MS |
377 | __u32 rd_key; /* local/remote key */ |
378 | __u32 rd_nfrags; /* # fragments */ | |
8d9de3f4 JS |
379 | struct kib_rdma_frag rd_frags[0]; /* buffer frags */ |
380 | } WIRE_ATTR; | |
d7e09d03 | 381 | |
8d9de3f4 | 382 | struct kib_putreq_msg { |
ec3d17c0 MS |
383 | lnet_hdr_t ibprm_hdr; /* portals header */ |
384 | __u64 ibprm_cookie; /* opaque completion cookie */ | |
8d9de3f4 | 385 | } WIRE_ATTR; |
d7e09d03 | 386 | |
8d9de3f4 | 387 | struct kib_putack_msg { |
ec3d17c0 MS |
388 | __u64 ibpam_src_cookie; /* reflected completion cookie */ |
389 | __u64 ibpam_dst_cookie; /* opaque completion cookie */ | |
8d9de3f4 JS |
390 | struct kib_rdma_desc ibpam_rd; /* sender's sink buffer */ |
391 | } WIRE_ATTR; | |
d7e09d03 | 392 | |
8d9de3f4 | 393 | struct kib_get_msg { |
ec3d17c0 MS |
394 | lnet_hdr_t ibgm_hdr; /* portals header */ |
395 | __u64 ibgm_cookie; /* opaque completion cookie */ | |
8d9de3f4 JS |
396 | struct kib_rdma_desc ibgm_rd; /* rdma descriptor */ |
397 | } WIRE_ATTR; | |
d7e09d03 | 398 | |
8d9de3f4 | 399 | struct kib_completion_msg { |
ec3d17c0 MS |
400 | __u64 ibcm_cookie; /* opaque completion cookie */ |
401 | __s32 ibcm_status; /* < 0 failure: >= 0 length */ | |
8d9de3f4 | 402 | } WIRE_ATTR; |
d7e09d03 | 403 | |
8d9de3f4 | 404 | struct kib_msg { |
d7e09d03 | 405 | /* First 2 fields fixed FOR ALL TIME */ |
ec3d17c0 MS |
406 | __u32 ibm_magic; /* I'm an ibnal message */ |
407 | __u16 ibm_version; /* this is my version number */ | |
408 | ||
409 | __u8 ibm_type; /* msg type */ | |
410 | __u8 ibm_credits; /* returned credits */ | |
411 | __u32 ibm_nob; /* # bytes in whole message */ | |
412 | __u32 ibm_cksum; /* checksum (0 == no checksum) */ | |
413 | __u64 ibm_srcnid; /* sender's NID */ | |
414 | __u64 ibm_srcstamp; /* sender's incarnation */ | |
415 | __u64 ibm_dstnid; /* destination's NID */ | |
416 | __u64 ibm_dststamp; /* destination's incarnation */ | |
d7e09d03 PT |
417 | |
418 | union { | |
8d9de3f4 JS |
419 | struct kib_connparams connparams; |
420 | struct kib_immediate_msg immediate; | |
421 | struct kib_putreq_msg putreq; | |
422 | struct kib_putack_msg putack; | |
423 | struct kib_get_msg get; | |
424 | struct kib_completion_msg completion; | |
d7e09d03 | 425 | } WIRE_ATTR ibm_u; |
8d9de3f4 | 426 | } WIRE_ATTR; |
d7e09d03 | 427 | |
ec3d17c0 | 428 | #define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */ |
d7e09d03 | 429 | |
ec3d17c0 MS |
430 | #define IBLND_MSG_VERSION_1 0x11 |
431 | #define IBLND_MSG_VERSION_2 0x12 | |
432 | #define IBLND_MSG_VERSION IBLND_MSG_VERSION_2 | |
d7e09d03 | 433 | |
ec3d17c0 MS |
434 | #define IBLND_MSG_CONNREQ 0xc0 /* connection request */ |
435 | #define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */ | |
436 | #define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */ | |
437 | #define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */ | |
438 | #define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ | |
439 | #define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ | |
440 | #define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ | |
441 | #define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ | |
442 | #define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ | |
443 | #define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ | |
d7e09d03 | 444 | |
8d9de3f4 | 445 | struct kib_rej { |
ec3d17c0 MS |
446 | __u32 ibr_magic; /* sender's magic */ |
447 | __u16 ibr_version; /* sender's version */ | |
448 | __u8 ibr_why; /* reject reason */ | |
449 | __u8 ibr_padding; /* padding */ | |
450 | __u64 ibr_incarnation; /* incarnation of peer */ | |
8d9de3f4 JS |
451 | struct kib_connparams ibr_cp; /* connection parameters */ |
452 | } WIRE_ATTR; | |
d7e09d03 PT |
453 | |
454 | /* connection rejection reasons */ | |
ec3d17c0 MS |
455 | #define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */ |
456 | #define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */ | |
457 | #define IBLND_REJECT_FATAL 3 /* Anything else */ | |
458 | #define IBLND_REJECT_CONN_UNCOMPAT 4 /* incompatible version peer */ | |
459 | #define IBLND_REJECT_CONN_STALE 5 /* stale peer */ | |
2fb44f2b JF |
460 | /* peer's rdma frags doesn't match mine */ |
461 | #define IBLND_REJECT_RDMA_FRAGS 6 | |
462 | /* peer's msg queue size doesn't match mine */ | |
463 | #define IBLND_REJECT_MSG_QUEUE_SIZE 7 | |
d7e09d03 PT |
464 | |
465 | /***********************************************************************/ | |
466 | ||
8d9de3f4 | 467 | struct kib_rx { /* receive message */ |
ec3d17c0 MS |
468 | struct list_head rx_list; /* queue for attention */ |
469 | struct kib_conn *rx_conn; /* owning conn */ | |
d0bed035 | 470 | int rx_nob; /* # bytes received (-1 while posted) */ |
ec3d17c0 | 471 | enum ib_wc_status rx_status; /* completion status */ |
8d9de3f4 | 472 | struct kib_msg *rx_msg; /* message buffer (host vaddr) */ |
ec3d17c0 | 473 | __u64 rx_msgaddr; /* message buffer (I/O addr) */ |
270f0c31 | 474 | DECLARE_PCI_UNMAP_ADDR(rx_msgunmap); /* for dma_unmap_single() */ |
ec3d17c0 MS |
475 | struct ib_recv_wr rx_wrq; /* receive work item... */ |
476 | struct ib_sge rx_sge; /* ...and its memory */ | |
8d9de3f4 | 477 | }; |
d7e09d03 | 478 | |
d0bed035 JB |
479 | #define IBLND_POSTRX_DONT_POST 0 /* don't post */ |
480 | #define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */ | |
481 | #define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ | |
482 | #define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give self back 1 reserved credit */ | |
d7e09d03 | 483 | |
8d9de3f4 | 484 | struct kib_tx { /* transmit message */ |
d0bed035 | 485 | struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ |
8d9de3f4 | 486 | struct kib_tx_pool *tx_pool; /* pool I'm from */ |
d0bed035 JB |
487 | struct kib_conn *tx_conn; /* owning conn */ |
488 | short tx_sending; /* # tx callbacks outstanding */ | |
489 | short tx_queued; /* queued for sending */ | |
490 | short tx_waiting; /* waiting for peer */ | |
491 | int tx_status; /* LNET completion status */ | |
492 | unsigned long tx_deadline; /* completion deadline */ | |
493 | __u64 tx_cookie; /* completion cookie */ | |
494 | lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ | |
8d9de3f4 | 495 | struct kib_msg *tx_msg; /* message buffer (host vaddr) */ |
d0bed035 | 496 | __u64 tx_msgaddr; /* message buffer (I/O addr) */ |
270f0c31 | 497 | DECLARE_PCI_UNMAP_ADDR(tx_msgunmap); /* for dma_unmap_single() */ |
d0bed035 | 498 | int tx_nwrq; /* # send work items */ |
ab9f2faf | 499 | struct ib_rdma_wr *tx_wrq; /* send work items... */ |
d0bed035 | 500 | struct ib_sge *tx_sge; /* ...and their memory */ |
8d9de3f4 | 501 | struct kib_rdma_desc *tx_rd; /* rdma descriptor */ |
d0bed035 JB |
502 | int tx_nfrags; /* # entries in... */ |
503 | struct scatterlist *tx_frags; /* dma_map_sg descriptor */ | |
504 | __u64 *tx_pages; /* rdma phys page addrs */ | |
8d9de3f4 | 505 | struct kib_fmr fmr; /* FMR */ |
d0bed035 | 506 | int tx_dmadir; /* dma direction */ |
8d9de3f4 | 507 | }; |
d7e09d03 | 508 | |
8d9de3f4 JS |
509 | struct kib_connvars { |
510 | struct kib_msg cv_msg; /* connection-in-progress variables */ | |
511 | }; | |
d7e09d03 | 512 | |
8d9de3f4 | 513 | struct kib_conn { |
d0bed035 JB |
514 | struct kib_sched_info *ibc_sched; /* scheduler information */ |
515 | struct kib_peer *ibc_peer; /* owning peer */ | |
8d9de3f4 | 516 | struct kib_hca_dev *ibc_hdev; /* HCA bound on */ |
d0bed035 JB |
517 | struct list_head ibc_list; /* stash on peer's conn list */ |
518 | struct list_head ibc_sched_list; /* schedule for attention */ | |
519 | __u16 ibc_version; /* version of connection */ | |
4d99b258 LZ |
520 | /* reconnect later */ |
521 | __u16 ibc_reconnect:1; | |
d0bed035 JB |
522 | __u64 ibc_incarnation; /* which instance of the peer */ |
523 | atomic_t ibc_refcount; /* # users */ | |
524 | int ibc_state; /* what's happening */ | |
525 | int ibc_nsends_posted; /* # uncompleted sends */ | |
526 | int ibc_noops_posted; /* # uncompleted NOOPs */ | |
527 | int ibc_credits; /* # credits I have */ | |
ec3d17c0 MS |
528 | int ibc_outstanding_credits; /* # credits to return */ |
529 | int ibc_reserved_credits; /* # ACK/DONE msg credits */ | |
d0bed035 | 530 | int ibc_comms_error; /* set on comms error */ |
2fb44f2b JF |
531 | /* connections queue depth */ |
532 | __u16 ibc_queue_depth; | |
533 | /* connections max frags */ | |
534 | __u16 ibc_max_frags; | |
d0bed035 JB |
535 | unsigned int ibc_nrx:16; /* receive buffers owned */ |
536 | unsigned int ibc_scheduled:1; /* scheduled for attention */ | |
537 | unsigned int ibc_ready:1; /* CQ callback fired */ | |
538 | unsigned long ibc_last_send; /* time of last send */ | |
539 | struct list_head ibc_connd_list; /* link chain for */ | |
540 | /* kiblnd_check_conns only */ | |
541 | struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ | |
542 | struct list_head ibc_tx_noops; /* IBLND_MSG_NOOPs for */ | |
543 | /* IBLND_MSG_VERSION_1 */ | |
544 | struct list_head ibc_tx_queue; /* sends that need a credit */ | |
545 | struct list_head ibc_tx_queue_nocred; /* sends that don't need a */ | |
546 | /* credit */ | |
547 | struct list_head ibc_tx_queue_rsrvd; /* sends that need to */ | |
548 | /* reserve an ACK/DONE msg */ | |
549 | struct list_head ibc_active_txs; /* active tx awaiting completion */ | |
550 | spinlock_t ibc_lock; /* serialise */ | |
8d9de3f4 JS |
551 | struct kib_rx *ibc_rxs; /* the rx descs */ |
552 | struct kib_pages *ibc_rx_pages; /* premapped rx msg pages */ | |
d0bed035 JB |
553 | |
554 | struct rdma_cm_id *ibc_cmid; /* CM id */ | |
555 | struct ib_cq *ibc_cq; /* completion queue */ | |
556 | ||
8d9de3f4 JS |
557 | struct kib_connvars *ibc_connvars; /* in-progress connection state */ |
558 | }; | |
d7e09d03 | 559 | |
ec3d17c0 MS |
560 | #define IBLND_CONN_INIT 0 /* being initialised */ |
561 | #define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */ | |
562 | #define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */ | |
563 | #define IBLND_CONN_ESTABLISHED 3 /* connection established */ | |
564 | #define IBLND_CONN_CLOSING 4 /* being closed */ | |
565 | #define IBLND_CONN_DISCONNECTED 5 /* disconnected */ | |
d7e09d03 | 566 | |
8d9de3f4 | 567 | struct kib_peer { |
ec3d17c0 MS |
568 | struct list_head ibp_list; /* stash on global peer list */ |
569 | lnet_nid_t ibp_nid; /* who's on the other end(s) */ | |
570 | lnet_ni_t *ibp_ni; /* LNet interface */ | |
ec3d17c0 MS |
571 | struct list_head ibp_conns; /* all active connections */ |
572 | struct list_head ibp_tx_queue; /* msgs waiting for a conn */ | |
ec3d17c0 | 573 | __u64 ibp_incarnation; /* incarnation of peer */ |
4d99b258 LZ |
574 | /* when (in jiffies) I was last alive */ |
575 | unsigned long ibp_last_alive; | |
576 | /* # users */ | |
577 | atomic_t ibp_refcount; | |
578 | /* version of peer */ | |
579 | __u16 ibp_version; | |
580 | /* current passive connection attempts */ | |
581 | unsigned short ibp_accepting; | |
582 | /* current active connection attempts */ | |
583 | unsigned short ibp_connecting; | |
584 | /* reconnect this peer later */ | |
585 | unsigned short ibp_reconnecting:1; | |
72e62b6c DO |
586 | /* counter of how many times we triggered a conn race */ |
587 | unsigned char ibp_races; | |
4d99b258 LZ |
588 | /* # consecutive reconnection attempts to this peer */ |
589 | unsigned int ibp_reconnected; | |
590 | /* errno on closing this peer */ | |
591 | int ibp_error; | |
a01fa108 AS |
592 | /* max map_on_demand */ |
593 | __u16 ibp_max_frags; | |
594 | /* max_peer_credits */ | |
595 | __u16 ibp_queue_depth; | |
8d9de3f4 | 596 | }; |
d7e09d03 | 597 | |
8d9de3f4 | 598 | extern struct kib_data kiblnd_data; |
d7e09d03 | 599 | |
8d9de3f4 | 600 | void kiblnd_hdev_destroy(struct kib_hca_dev *hdev); |
d7e09d03 | 601 | |
9e7d5bf3 AS |
602 | int kiblnd_msg_queue_size(int version, struct lnet_ni *ni); |
603 | ||
604 | /* max # of fragments configured by user */ | |
605 | static inline int | |
606 | kiblnd_cfg_rdma_frags(struct lnet_ni *ni) | |
607 | { | |
32c8deb8 AS |
608 | struct lnet_ioctl_config_o2iblnd_tunables *tunables; |
609 | int mod; | |
610 | ||
611 | tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; | |
612 | mod = tunables->lnd_map_on_demand; | |
bbc2d82f | 613 | return mod ? mod : IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT; |
9e7d5bf3 AS |
614 | } |
615 | ||
616 | static inline int | |
617 | kiblnd_rdma_frags(int version, struct lnet_ni *ni) | |
618 | { | |
619 | return version == IBLND_MSG_VERSION_1 ? | |
bbc2d82f | 620 | (IBLND_MAX_RDMA_FRAGS >> IBLND_FRAG_SHIFT) : |
9e7d5bf3 AS |
621 | kiblnd_cfg_rdma_frags(ni); |
622 | } | |
623 | ||
624 | static inline int | |
625 | kiblnd_concurrent_sends(int version, struct lnet_ni *ni) | |
626 | { | |
32c8deb8 | 627 | struct lnet_ioctl_config_o2iblnd_tunables *tunables; |
9e7d5bf3 AS |
628 | int concurrent_sends; |
629 | ||
32c8deb8 AS |
630 | tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; |
631 | concurrent_sends = tunables->lnd_concurrent_sends; | |
9e7d5bf3 AS |
632 | |
633 | if (version == IBLND_MSG_VERSION_1) { | |
634 | if (concurrent_sends > IBLND_MSG_QUEUE_SIZE_V1 * 2) | |
635 | return IBLND_MSG_QUEUE_SIZE_V1 * 2; | |
636 | ||
637 | if (concurrent_sends < IBLND_MSG_QUEUE_SIZE_V1 / 2) | |
638 | return IBLND_MSG_QUEUE_SIZE_V1 / 2; | |
639 | } | |
640 | ||
641 | return concurrent_sends; | |
642 | } | |
643 | ||
d7e09d03 | 644 | static inline void |
8d9de3f4 | 645 | kiblnd_hdev_addref_locked(struct kib_hca_dev *hdev) |
d7e09d03 | 646 | { |
270f0c31 | 647 | LASSERT(atomic_read(&hdev->ibh_ref) > 0); |
d7e09d03 PT |
648 | atomic_inc(&hdev->ibh_ref); |
649 | } | |
650 | ||
651 | static inline void | |
8d9de3f4 | 652 | kiblnd_hdev_decref(struct kib_hca_dev *hdev) |
d7e09d03 | 653 | { |
270f0c31 | 654 | LASSERT(atomic_read(&hdev->ibh_ref) > 0); |
d7e09d03 PT |
655 | if (atomic_dec_and_test(&hdev->ibh_ref)) |
656 | kiblnd_hdev_destroy(hdev); | |
657 | } | |
658 | ||
659 | static inline int | |
8d9de3f4 | 660 | kiblnd_dev_can_failover(struct kib_dev *dev) |
d7e09d03 PT |
661 | { |
662 | if (!list_empty(&dev->ibd_fail_list)) /* already scheduled */ | |
663 | return 0; | |
664 | ||
5fd88337 | 665 | if (!*kiblnd_tunables.kib_dev_failover) /* disabled */ |
d7e09d03 PT |
666 | return 0; |
667 | ||
668 | if (*kiblnd_tunables.kib_dev_failover > 1) /* force failover */ | |
669 | return 1; | |
670 | ||
671 | return dev->ibd_can_failover; | |
672 | } | |
673 | ||
674 | #define kiblnd_conn_addref(conn) \ | |
675 | do { \ | |
676 | CDEBUG(D_NET, "conn[%p] (%d)++\n", \ | |
677 | (conn), atomic_read(&(conn)->ibc_refcount)); \ | |
678 | atomic_inc(&(conn)->ibc_refcount); \ | |
679 | } while (0) | |
680 | ||
681 | #define kiblnd_conn_decref(conn) \ | |
682 | do { \ | |
683 | unsigned long flags; \ | |
684 | \ | |
685 | CDEBUG(D_NET, "conn[%p] (%d)--\n", \ | |
686 | (conn), atomic_read(&(conn)->ibc_refcount)); \ | |
687 | LASSERT_ATOMIC_POS(&(conn)->ibc_refcount); \ | |
688 | if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ | |
689 | spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \ | |
690 | list_add_tail(&(conn)->ibc_list, \ | |
691 | &kiblnd_data.kib_connd_zombies); \ | |
692 | wake_up(&kiblnd_data.kib_connd_waitq); \ | |
693 | spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags);\ | |
694 | } \ | |
695 | } while (0) | |
696 | ||
697 | #define kiblnd_peer_addref(peer) \ | |
698 | do { \ | |
699 | CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ | |
700 | (peer), libcfs_nid2str((peer)->ibp_nid), \ | |
270f0c31 | 701 | atomic_read(&(peer)->ibp_refcount)); \ |
d7e09d03 PT |
702 | atomic_inc(&(peer)->ibp_refcount); \ |
703 | } while (0) | |
704 | ||
705 | #define kiblnd_peer_decref(peer) \ | |
706 | do { \ | |
707 | CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ | |
708 | (peer), libcfs_nid2str((peer)->ibp_nid), \ | |
270f0c31 | 709 | atomic_read(&(peer)->ibp_refcount)); \ |
d7e09d03 PT |
710 | LASSERT_ATOMIC_POS(&(peer)->ibp_refcount); \ |
711 | if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ | |
712 | kiblnd_destroy_peer(peer); \ | |
713 | } while (0) | |
714 | ||
4d99b258 | 715 | static inline bool |
8d9de3f4 | 716 | kiblnd_peer_connecting(struct kib_peer *peer) |
4d99b258 LZ |
717 | { |
718 | return peer->ibp_connecting || | |
719 | peer->ibp_reconnecting || | |
720 | peer->ibp_accepting; | |
721 | } | |
722 | ||
723 | static inline bool | |
8d9de3f4 | 724 | kiblnd_peer_idle(struct kib_peer *peer) |
4d99b258 LZ |
725 | { |
726 | return !kiblnd_peer_connecting(peer) && list_empty(&peer->ibp_conns); | |
727 | } | |
728 | ||
d7e09d03 | 729 | static inline struct list_head * |
270f0c31 | 730 | kiblnd_nid2peerlist(lnet_nid_t nid) |
d7e09d03 PT |
731 | { |
732 | unsigned int hash = | |
733 | ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size; | |
734 | ||
8a1d7b09 | 735 | return &kiblnd_data.kib_peers[hash]; |
d7e09d03 PT |
736 | } |
737 | ||
738 | static inline int | |
8d9de3f4 | 739 | kiblnd_peer_active(struct kib_peer *peer) |
d7e09d03 PT |
740 | { |
741 | /* Am I in the peer hash table? */ | |
8a1d7b09 | 742 | return !list_empty(&peer->ibp_list); |
d7e09d03 PT |
743 | } |
744 | ||
8d9de3f4 JS |
745 | static inline struct kib_conn * |
746 | kiblnd_get_conn_locked(struct kib_peer *peer) | |
d7e09d03 | 747 | { |
270f0c31 | 748 | LASSERT(!list_empty(&peer->ibp_conns)); |
d7e09d03 PT |
749 | |
750 | /* just return the first connection */ | |
8d9de3f4 | 751 | return list_entry(peer->ibp_conns.next, struct kib_conn, ibc_list); |
d7e09d03 PT |
752 | } |
753 | ||
754 | static inline int | |
8d9de3f4 | 755 | kiblnd_send_keepalive(struct kib_conn *conn) |
d7e09d03 PT |
756 | { |
757 | return (*kiblnd_tunables.kib_keepalive > 0) && | |
758 | cfs_time_after(jiffies, conn->ibc_last_send + | |
27d81ace JY |
759 | msecs_to_jiffies(*kiblnd_tunables.kib_keepalive * |
760 | MSEC_PER_SEC)); | |
d7e09d03 PT |
761 | } |
762 | ||
763 | static inline int | |
8d9de3f4 | 764 | kiblnd_need_noop(struct kib_conn *conn) |
d7e09d03 | 765 | { |
32c8deb8 AS |
766 | struct lnet_ioctl_config_o2iblnd_tunables *tunables; |
767 | lnet_ni_t *ni = conn->ibc_peer->ibp_ni; | |
768 | ||
270f0c31 | 769 | LASSERT(conn->ibc_state >= IBLND_CONN_ESTABLISHED); |
32c8deb8 | 770 | tunables = &ni->ni_lnd_tunables->lt_tun_u.lt_o2ib; |
d7e09d03 PT |
771 | |
772 | if (conn->ibc_outstanding_credits < | |
32c8deb8 | 773 | IBLND_CREDITS_HIGHWATER(tunables, conn->ibc_version) && |
d7e09d03 PT |
774 | !kiblnd_send_keepalive(conn)) |
775 | return 0; /* No need to send NOOP */ | |
776 | ||
777 | if (IBLND_OOB_CAPABLE(conn->ibc_version)) { | |
778 | if (!list_empty(&conn->ibc_tx_queue_nocred)) | |
779 | return 0; /* NOOP can be piggybacked */ | |
780 | ||
781 | /* No tx to piggyback NOOP onto or no credit to send a tx */ | |
782 | return (list_empty(&conn->ibc_tx_queue) || | |
5fd88337 | 783 | !conn->ibc_credits); |
d7e09d03 PT |
784 | } |
785 | ||
786 | if (!list_empty(&conn->ibc_tx_noops) || /* NOOP already queued */ | |
787 | !list_empty(&conn->ibc_tx_queue_nocred) || /* piggyback NOOP */ | |
5fd88337 | 788 | !conn->ibc_credits) /* no credit */ |
d7e09d03 PT |
789 | return 0; |
790 | ||
791 | if (conn->ibc_credits == 1 && /* last credit reserved for */ | |
5fd88337 | 792 | !conn->ibc_outstanding_credits) /* giving back credits */ |
d7e09d03 PT |
793 | return 0; |
794 | ||
795 | /* No tx to piggyback NOOP onto or no credit to send a tx */ | |
796 | return (list_empty(&conn->ibc_tx_queue) || conn->ibc_credits == 1); | |
797 | } | |
798 | ||
799 | static inline void | |
8d9de3f4 | 800 | kiblnd_abort_receives(struct kib_conn *conn) |
d7e09d03 PT |
801 | { |
802 | ib_modify_qp(conn->ibc_cmid->qp, | |
803 | &kiblnd_data.kib_error_qpa, IB_QP_STATE); | |
804 | } | |
805 | ||
806 | static inline const char * | |
8d9de3f4 | 807 | kiblnd_queue2str(struct kib_conn *conn, struct list_head *q) |
d7e09d03 PT |
808 | { |
809 | if (q == &conn->ibc_tx_queue) | |
810 | return "tx_queue"; | |
811 | ||
812 | if (q == &conn->ibc_tx_queue_rsrvd) | |
813 | return "tx_queue_rsrvd"; | |
814 | ||
815 | if (q == &conn->ibc_tx_queue_nocred) | |
816 | return "tx_queue_nocred"; | |
817 | ||
818 | if (q == &conn->ibc_active_txs) | |
819 | return "active_txs"; | |
820 | ||
821 | LBUG(); | |
822 | return NULL; | |
823 | } | |
824 | ||
d0bed035 JB |
825 | /* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the */ |
826 | /* lowest bits of the work request id to stash the work item type. */ | |
d7e09d03 | 827 | |
82fffff4 LZ |
828 | #define IBLND_WID_INVAL 0 |
829 | #define IBLND_WID_TX 1 | |
830 | #define IBLND_WID_RX 2 | |
831 | #define IBLND_WID_RDMA 3 | |
4d65730b DE |
832 | #define IBLND_WID_MR 4 |
833 | #define IBLND_WID_MASK 7UL | |
d7e09d03 PT |
834 | |
835 | static inline __u64 | |
270f0c31 | 836 | kiblnd_ptr2wreqid(void *ptr, int type) |
d7e09d03 PT |
837 | { |
838 | unsigned long lptr = (unsigned long)ptr; | |
839 | ||
5fd88337 JS |
840 | LASSERT(!(lptr & IBLND_WID_MASK)); |
841 | LASSERT(!(type & ~IBLND_WID_MASK)); | |
d7e09d03 PT |
842 | return (__u64)(lptr | type); |
843 | } | |
844 | ||
845 | static inline void * | |
270f0c31 | 846 | kiblnd_wreqid2ptr(__u64 wreqid) |
d7e09d03 PT |
847 | { |
848 | return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK); | |
849 | } | |
850 | ||
851 | static inline int | |
270f0c31 | 852 | kiblnd_wreqid2type(__u64 wreqid) |
d7e09d03 | 853 | { |
8a1d7b09 | 854 | return wreqid & IBLND_WID_MASK; |
d7e09d03 PT |
855 | } |
856 | ||
857 | static inline void | |
8d9de3f4 | 858 | kiblnd_set_conn_state(struct kib_conn *conn, int state) |
d7e09d03 PT |
859 | { |
860 | conn->ibc_state = state; | |
861 | mb(); | |
862 | } | |
863 | ||
864 | static inline void | |
8d9de3f4 | 865 | kiblnd_init_msg(struct kib_msg *msg, int type, int body_nob) |
d7e09d03 PT |
866 | { |
867 | msg->ibm_type = type; | |
8d9de3f4 | 868 | msg->ibm_nob = offsetof(struct kib_msg, ibm_u) + body_nob; |
d7e09d03 PT |
869 | } |
870 | ||
871 | static inline int | |
8d9de3f4 | 872 | kiblnd_rd_size(struct kib_rdma_desc *rd) |
d7e09d03 PT |
873 | { |
874 | int i; | |
875 | int size; | |
876 | ||
877 | for (i = size = 0; i < rd->rd_nfrags; i++) | |
878 | size += rd->rd_frags[i].rf_nob; | |
879 | ||
880 | return size; | |
881 | } | |
882 | ||
883 | static inline __u64 | |
8d9de3f4 | 884 | kiblnd_rd_frag_addr(struct kib_rdma_desc *rd, int index) |
d7e09d03 PT |
885 | { |
886 | return rd->rd_frags[index].rf_addr; | |
887 | } | |
888 | ||
889 | static inline __u32 | |
8d9de3f4 | 890 | kiblnd_rd_frag_size(struct kib_rdma_desc *rd, int index) |
d7e09d03 PT |
891 | { |
892 | return rd->rd_frags[index].rf_nob; | |
893 | } | |
894 | ||
895 | static inline __u32 | |
8d9de3f4 | 896 | kiblnd_rd_frag_key(struct kib_rdma_desc *rd, int index) |
d7e09d03 PT |
897 | { |
898 | return rd->rd_key; | |
899 | } | |
900 | ||
901 | static inline int | |
8d9de3f4 | 902 | kiblnd_rd_consume_frag(struct kib_rdma_desc *rd, int index, __u32 nob) |
d7e09d03 PT |
903 | { |
904 | if (nob < rd->rd_frags[index].rf_nob) { | |
905 | rd->rd_frags[index].rf_addr += nob; | |
906 | rd->rd_frags[index].rf_nob -= nob; | |
907 | } else { | |
83b912c6 | 908 | index++; |
d7e09d03 PT |
909 | } |
910 | ||
911 | return index; | |
912 | } | |
913 | ||
914 | static inline int | |
8d9de3f4 | 915 | kiblnd_rd_msg_size(struct kib_rdma_desc *rd, int msgtype, int n) |
d7e09d03 | 916 | { |
270f0c31 MBD |
917 | LASSERT(msgtype == IBLND_MSG_GET_REQ || |
918 | msgtype == IBLND_MSG_PUT_ACK); | |
d7e09d03 PT |
919 | |
920 | return msgtype == IBLND_MSG_GET_REQ ? | |
8d9de3f4 JS |
921 | offsetof(struct kib_get_msg, ibgm_rd.rd_frags[n]) : |
922 | offsetof(struct kib_putack_msg, ibpam_rd.rd_frags[n]); | |
d7e09d03 PT |
923 | } |
924 | ||
d7e09d03 PT |
925 | static inline __u64 |
926 | kiblnd_dma_mapping_error(struct ib_device *dev, u64 dma_addr) | |
927 | { | |
928 | return ib_dma_mapping_error(dev, dma_addr); | |
929 | } | |
930 | ||
931 | static inline __u64 kiblnd_dma_map_single(struct ib_device *dev, | |
932 | void *msg, size_t size, | |
933 | enum dma_data_direction direction) | |
934 | { | |
935 | return ib_dma_map_single(dev, msg, size, direction); | |
936 | } | |
937 | ||
938 | static inline void kiblnd_dma_unmap_single(struct ib_device *dev, | |
939 | __u64 addr, size_t size, | |
940 | enum dma_data_direction direction) | |
941 | { | |
942 | ib_dma_unmap_single(dev, addr, size, direction); | |
943 | } | |
944 | ||
945 | #define KIBLND_UNMAP_ADDR_SET(p, m, a) do {} while (0) | |
946 | #define KIBLND_UNMAP_ADDR(p, m, a) (a) | |
947 | ||
948 | static inline int kiblnd_dma_map_sg(struct ib_device *dev, | |
949 | struct scatterlist *sg, int nents, | |
950 | enum dma_data_direction direction) | |
951 | { | |
952 | return ib_dma_map_sg(dev, sg, nents, direction); | |
953 | } | |
954 | ||
955 | static inline void kiblnd_dma_unmap_sg(struct ib_device *dev, | |
956 | struct scatterlist *sg, int nents, | |
957 | enum dma_data_direction direction) | |
958 | { | |
959 | ib_dma_unmap_sg(dev, sg, nents, direction); | |
960 | } | |
961 | ||
962 | static inline __u64 kiblnd_sg_dma_address(struct ib_device *dev, | |
963 | struct scatterlist *sg) | |
964 | { | |
965 | return ib_sg_dma_address(dev, sg); | |
966 | } | |
967 | ||
968 | static inline unsigned int kiblnd_sg_dma_len(struct ib_device *dev, | |
969 | struct scatterlist *sg) | |
970 | { | |
971 | return ib_sg_dma_len(dev, sg); | |
972 | } | |
973 | ||
d0bed035 JB |
974 | /* XXX We use KIBLND_CONN_PARAM(e) as writable buffer, it's not strictly */ |
975 | /* right because OFED1.2 defines it as const, to use it we have to add */ | |
976 | /* (void *) cast to overcome "const" */ | |
d7e09d03 | 977 | |
ec3d17c0 MS |
978 | #define KIBLND_CONN_PARAM(e) ((e)->param.conn.private_data) |
979 | #define KIBLND_CONN_PARAM_LEN(e) ((e)->param.conn.private_data_len) | |
d7e09d03 | 980 | |
8d9de3f4 | 981 | struct ib_mr *kiblnd_find_rd_dma_mr(struct lnet_ni *ni, struct kib_rdma_desc *rd, |
2fb44f2b | 982 | int negotiated_nfrags); |
8d9de3f4 JS |
983 | void kiblnd_map_rx_descs(struct kib_conn *conn); |
984 | void kiblnd_unmap_rx_descs(struct kib_conn *conn); | |
985 | void kiblnd_pool_free_node(struct kib_pool *pool, struct list_head *node); | |
986 | struct list_head *kiblnd_pool_alloc_node(struct kib_poolset *ps); | |
d7e09d03 | 987 | |
8d9de3f4 JS |
988 | int kiblnd_fmr_pool_map(struct kib_fmr_poolset *fps, struct kib_tx *tx, |
989 | struct kib_rdma_desc *rd, __u32 nob, __u64 iov, | |
990 | struct kib_fmr *fmr); | |
991 | void kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status); | |
d7e09d03 | 992 | |
f6e50066 | 993 | int kiblnd_tunables_setup(struct lnet_ni *ni); |
025ba826 | 994 | void kiblnd_tunables_init(void); |
d7e09d03 | 995 | |
270f0c31 | 996 | int kiblnd_connd(void *arg); |
d7e09d03 PT |
997 | int kiblnd_scheduler(void *arg); |
998 | int kiblnd_thread_start(int (*fn)(void *arg), void *arg, char *name); | |
270f0c31 | 999 | int kiblnd_failover_thread(void *arg); |
d7e09d03 | 1000 | |
8d9de3f4 | 1001 | int kiblnd_alloc_pages(struct kib_pages **pp, int cpt, int npages); |
d7e09d03 PT |
1002 | |
1003 | int kiblnd_cm_callback(struct rdma_cm_id *cmid, | |
1004 | struct rdma_cm_event *event); | |
1005 | int kiblnd_translate_mtu(int value); | |
1006 | ||
8d9de3f4 JS |
1007 | int kiblnd_dev_failover(struct kib_dev *dev); |
1008 | int kiblnd_create_peer(lnet_ni_t *ni, struct kib_peer **peerp, lnet_nid_t nid); | |
1009 | void kiblnd_destroy_peer(struct kib_peer *peer); | |
1010 | bool kiblnd_reconnect_peer(struct kib_peer *peer); | |
1011 | void kiblnd_destroy_dev(struct kib_dev *dev); | |
1012 | void kiblnd_unlink_peer_locked(struct kib_peer *peer); | |
1013 | struct kib_peer *kiblnd_find_peer_locked(lnet_nid_t nid); | |
1014 | int kiblnd_close_stale_conns_locked(struct kib_peer *peer, | |
c314c319 | 1015 | int version, __u64 incarnation); |
8d9de3f4 | 1016 | int kiblnd_close_peer_conns_locked(struct kib_peer *peer, int why); |
d7e09d03 | 1017 | |
8d9de3f4 JS |
1018 | struct kib_conn *kiblnd_create_conn(struct kib_peer *peer, |
1019 | struct rdma_cm_id *cmid, | |
1020 | int state, int version); | |
1021 | void kiblnd_destroy_conn(struct kib_conn *conn, bool free_conn); | |
1022 | void kiblnd_close_conn(struct kib_conn *conn, int error); | |
1023 | void kiblnd_close_conn_locked(struct kib_conn *conn, int error); | |
d7e09d03 | 1024 | |
8d9de3f4 | 1025 | void kiblnd_launch_tx(lnet_ni_t *ni, struct kib_tx *tx, lnet_nid_t nid); |
270f0c31 | 1026 | void kiblnd_txlist_done(lnet_ni_t *ni, struct list_head *txlist, |
c314c319 | 1027 | int status); |
d7e09d03 PT |
1028 | |
1029 | void kiblnd_qp_event(struct ib_event *event, void *arg); | |
1030 | void kiblnd_cq_event(struct ib_event *event, void *arg); | |
1031 | void kiblnd_cq_completion(struct ib_cq *cq, void *arg); | |
1032 | ||
8d9de3f4 | 1033 | void kiblnd_pack_msg(lnet_ni_t *ni, struct kib_msg *msg, int version, |
c314c319 | 1034 | int credits, lnet_nid_t dstnid, __u64 dststamp); |
8d9de3f4 JS |
1035 | int kiblnd_unpack_msg(struct kib_msg *msg, int nob); |
1036 | int kiblnd_post_rx(struct kib_rx *rx, int credit); | |
d7e09d03 PT |
1037 | |
1038 | int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); | |
1039 | int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, | |
c1b7b8eb | 1040 | struct iov_iter *to, unsigned int rlen); |