Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
4f3ca893 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 PT |
19 | * |
20 | * GPL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
1dc563a6 | 26 | * Copyright (c) 2012, 2015, Intel Corporation. |
d7e09d03 PT |
27 | */ |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
4f3ca893 | 30 | * Lustre is a trademark of Seagate, Inc. |
d7e09d03 PT |
31 | * |
32 | * lnet/include/lnet/lib-types.h | |
d7e09d03 PT |
33 | */ |
34 | ||
35 | #ifndef __LNET_LIB_TYPES_H__ | |
36 | #define __LNET_LIB_TYPES_H__ | |
37 | ||
db18b8e9 JS |
38 | #include <linux/kthread.h> |
39 | #include <linux/uio.h> | |
40 | #include <linux/types.h> | |
21ca52bb | 41 | #include <linux/completion.h> |
d7e09d03 | 42 | |
db18b8e9 | 43 | #include "types.h" |
0fbbced2 | 44 | #include "lnetctl.h" |
d7e09d03 | 45 | |
db18b8e9 JS |
46 | /* Max payload size */ |
47 | #define LNET_MAX_PAYLOAD CONFIG_LNET_MAX_PAYLOAD | |
48 | #if (LNET_MAX_PAYLOAD < LNET_MTU) | |
49 | # error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" | |
50 | #elif (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) | |
188acc61 | 51 | # error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" |
db18b8e9 | 52 | #endif |
d7e09d03 PT |
53 | |
54 | /* forward refs */ | |
55 | struct lnet_libmd; | |
56 | ||
57 | typedef struct lnet_msg { | |
188acc61 JS |
58 | struct list_head msg_activelist; |
59 | struct list_head msg_list; /* Q for credits/MD */ | |
d7e09d03 | 60 | |
188acc61 | 61 | lnet_process_id_t msg_target; |
d7e09d03 PT |
62 | /* where is it from, it's only for building event */ |
63 | lnet_nid_t msg_from; | |
64 | __u32 msg_type; | |
65 | ||
253d50eb | 66 | /* committed for sending */ |
d7e09d03 PT |
67 | unsigned int msg_tx_committed:1; |
68 | /* CPT # this message committed for sending */ | |
69 | unsigned int msg_tx_cpt:15; | |
253d50eb | 70 | /* committed for receiving */ |
d7e09d03 PT |
71 | unsigned int msg_rx_committed:1; |
72 | /* CPT # this message committed for receiving */ | |
73 | unsigned int msg_rx_cpt:15; | |
74 | /* queued for tx credit */ | |
75 | unsigned int msg_tx_delayed:1; | |
76 | /* queued for RX buffer */ | |
77 | unsigned int msg_rx_delayed:1; | |
78 | /* ready for pending on RX delay list */ | |
79 | unsigned int msg_rx_ready_delay:1; | |
80 | ||
188acc61 JS |
81 | unsigned int msg_vmflush:1; /* VM trying to free memory */ |
82 | unsigned int msg_target_is_router:1; /* sending to a router */ | |
83 | unsigned int msg_routing:1; /* being forwarded */ | |
84 | unsigned int msg_ack:1; /* ack on finalize (PUT) */ | |
85 | unsigned int msg_sending:1; /* outgoing message */ | |
86 | unsigned int msg_receiving:1; /* being received */ | |
87 | unsigned int msg_txcredit:1; /* taken an NI send credit */ | |
88 | unsigned int msg_peertxcredit:1; /* taken a peer send credit */ | |
4420cfd3 | 89 | unsigned int msg_rtrcredit:1; /* taken a global router credit */ |
188acc61 JS |
90 | unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ |
91 | unsigned int msg_onactivelist:1; /* on the activelist */ | |
b7acfc95 | 92 | unsigned int msg_rdma_get:1; |
188acc61 JS |
93 | |
94 | struct lnet_peer *msg_txpeer; /* peer I'm sending to */ | |
95 | struct lnet_peer *msg_rxpeer; /* peer I received from */ | |
96 | ||
97 | void *msg_private; | |
98 | struct lnet_libmd *msg_md; | |
99 | ||
100 | unsigned int msg_len; | |
101 | unsigned int msg_wanted; | |
102 | unsigned int msg_offset; | |
103 | unsigned int msg_niov; | |
104 | struct kvec *msg_iov; | |
105 | lnet_kiov_t *msg_kiov; | |
106 | ||
107 | lnet_event_t msg_ev; | |
108 | lnet_hdr_t msg_hdr; | |
d7e09d03 PT |
109 | } lnet_msg_t; |
110 | ||
d7e09d03 | 111 | typedef struct lnet_libhandle { |
188acc61 JS |
112 | struct list_head lh_hash_chain; |
113 | __u64 lh_cookie; | |
d7e09d03 PT |
114 | } lnet_libhandle_t; |
115 | ||
116 | #define lh_entry(ptr, type, member) \ | |
51078e25 | 117 | ((type *)((char *)(ptr) - (char *)(&((type *)0)->member))) |
d7e09d03 PT |
118 | |
119 | typedef struct lnet_eq { | |
188acc61 JS |
120 | struct list_head eq_list; |
121 | lnet_libhandle_t eq_lh; | |
122 | lnet_seq_t eq_enq_seq; | |
123 | lnet_seq_t eq_deq_seq; | |
124 | unsigned int eq_size; | |
125 | lnet_eq_handler_t eq_callback; | |
126 | lnet_event_t *eq_events; | |
d7e09d03 PT |
127 | int **eq_refs; /* percpt refcount for EQ */ |
128 | } lnet_eq_t; | |
129 | ||
130 | typedef struct lnet_me { | |
188acc61 JS |
131 | struct list_head me_list; |
132 | lnet_libhandle_t me_lh; | |
133 | lnet_process_id_t me_match_id; | |
134 | unsigned int me_portal; | |
135 | unsigned int me_pos; /* hash offset in mt_hash */ | |
136 | __u64 me_match_bits; | |
137 | __u64 me_ignore_bits; | |
138 | lnet_unlink_t me_unlink; | |
139 | struct lnet_libmd *me_md; | |
d7e09d03 PT |
140 | } lnet_me_t; |
141 | ||
142 | typedef struct lnet_libmd { | |
188acc61 JS |
143 | struct list_head md_list; |
144 | lnet_libhandle_t md_lh; | |
145 | lnet_me_t *md_me; | |
146 | char *md_start; | |
147 | unsigned int md_offset; | |
148 | unsigned int md_length; | |
149 | unsigned int md_max_size; | |
150 | int md_threshold; | |
151 | int md_refcount; | |
152 | unsigned int md_options; | |
153 | unsigned int md_flags; | |
154 | void *md_user_ptr; | |
155 | lnet_eq_t *md_eq; | |
156 | unsigned int md_niov; /* # frags */ | |
d7e09d03 | 157 | union { |
188acc61 JS |
158 | struct kvec iov[LNET_MAX_IOV]; |
159 | lnet_kiov_t kiov[LNET_MAX_IOV]; | |
d7e09d03 PT |
160 | } md_iov; |
161 | } lnet_libmd_t; | |
162 | ||
188acc61 JS |
163 | #define LNET_MD_FLAG_ZOMBIE (1 << 0) |
164 | #define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) | |
165 | #define LNET_MD_FLAG_ABORTED (1 << 2) | |
d7e09d03 | 166 | |
d7e09d03 PT |
167 | typedef struct { |
168 | /* info about peers we are trying to fail */ | |
188acc61 JS |
169 | struct list_head tp_list; /* ln_test_peers */ |
170 | lnet_nid_t tp_nid; /* matching nid */ | |
171 | unsigned int tp_threshold; /* # failures to simulate */ | |
d7e09d03 PT |
172 | } lnet_test_peer_t; |
173 | ||
188acc61 JS |
174 | #define LNET_COOKIE_TYPE_MD 1 |
175 | #define LNET_COOKIE_TYPE_ME 2 | |
176 | #define LNET_COOKIE_TYPE_EQ 3 | |
177 | #define LNET_COOKIE_TYPE_BITS 2 | |
d7e09d03 PT |
178 | #define LNET_COOKIE_MASK ((1ULL << LNET_COOKIE_TYPE_BITS) - 1ULL) |
179 | ||
188acc61 | 180 | struct lnet_ni; /* forward ref */ |
d7e09d03 | 181 | |
3b77f472 | 182 | typedef struct lnet_lnd { |
d7e09d03 | 183 | /* fields managed by portals */ |
188acc61 JS |
184 | struct list_head lnd_list; /* stash in the LND table */ |
185 | int lnd_refcount; /* # active instances */ | |
d7e09d03 PT |
186 | |
187 | /* fields initialised by the LND */ | |
db18b8e9 | 188 | __u32 lnd_type; |
d7e09d03 | 189 | |
b11866b3 AO |
190 | int (*lnd_startup)(struct lnet_ni *ni); |
191 | void (*lnd_shutdown)(struct lnet_ni *ni); | |
d7e09d03 PT |
192 | int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); |
193 | ||
4420cfd3 JS |
194 | /* |
195 | * In data movement APIs below, payload buffers are described as a set | |
d7e09d03 PT |
196 | * of 'niov' fragments which are... |
197 | * EITHER | |
198 | * in virtual memory (struct iovec *iov != NULL) | |
199 | * OR | |
200 | * in pages (kernel only: plt_kiov_t *kiov != NULL). | |
201 | * The LND may NOT overwrite these fragment descriptors. | |
202 | * An 'offset' and may specify a byte offset within the set of | |
203 | * fragments to start from | |
204 | */ | |
205 | ||
4420cfd3 JS |
206 | /* |
207 | * Start sending a preformatted message. 'private' is NULL for PUT and | |
d7e09d03 PT |
208 | * GET messages; otherwise this is a response to an incoming message |
209 | * and 'private' is the 'private' passed to lnet_parse(). Return | |
210 | * non-zero for immediate failure, otherwise complete later with | |
4420cfd3 JS |
211 | * lnet_finalize() |
212 | */ | |
d7e09d03 PT |
213 | int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); |
214 | ||
4420cfd3 JS |
215 | /* |
216 | * Start receiving 'mlen' bytes of payload data, skipping the following | |
d7e09d03 | 217 | * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to |
d766b4b5 | 218 | * lnet_parse(). Return non-zero for immediate failure, otherwise |
d7e09d03 | 219 | * complete later with lnet_finalize(). This also gives back a receive |
4420cfd3 JS |
220 | * credit if the LND does flow control. |
221 | */ | |
d7e09d03 | 222 | int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, |
c1b7b8eb | 223 | int delayed, struct iov_iter *to, unsigned int rlen); |
d7e09d03 | 224 | |
4420cfd3 JS |
225 | /* |
226 | * lnet_parse() has had to delay processing of this message | |
d7e09d03 PT |
227 | * (e.g. waiting for a forwarding buffer or send credits). Give the |
228 | * LND a chance to free urgently needed resources. If called, return 0 | |
229 | * for success and do NOT give back a receive credit; that has to wait | |
230 | * until lnd_recv() gets called. On failure return < 0 and | |
4420cfd3 JS |
231 | * release resources; lnd_recv() will not be called. |
232 | */ | |
188acc61 JS |
233 | int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, |
234 | lnet_msg_t *msg, void **new_privatep); | |
d7e09d03 PT |
235 | |
236 | /* notification of peer health */ | |
237 | void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); | |
238 | ||
239 | /* query of peer aliveness */ | |
188acc61 JS |
240 | void (*lnd_query)(struct lnet_ni *ni, lnet_nid_t peer, |
241 | unsigned long *when); | |
d7e09d03 PT |
242 | |
243 | /* accept a new connection */ | |
e327dc88 | 244 | int (*lnd_accept)(struct lnet_ni *ni, struct socket *sock); |
d7e09d03 PT |
245 | } lnd_t; |
246 | ||
d7e09d03 PT |
247 | struct lnet_tx_queue { |
248 | int tq_credits; /* # tx credits free */ | |
249 | int tq_credits_min; /* lowest it's been */ | |
250 | int tq_credits_max; /* total # tx credits */ | |
188acc61 | 251 | struct list_head tq_delayed; /* delayed TXs */ |
d7e09d03 PT |
252 | }; |
253 | ||
d7e09d03 | 254 | typedef struct lnet_ni { |
188acc61 JS |
255 | spinlock_t ni_lock; |
256 | struct list_head ni_list; /* chain on ln_nis */ | |
257 | struct list_head ni_cptlist; /* chain on ln_nis_cpt */ | |
258 | int ni_maxtxcredits; /* # tx credits */ | |
d7e09d03 | 259 | /* # per-peer send credits */ |
188acc61 | 260 | int ni_peertxcredits; |
d7e09d03 | 261 | /* # per-peer router buffer credits */ |
188acc61 | 262 | int ni_peerrtrcredits; |
d7e09d03 | 263 | /* seconds to consider peer dead */ |
188acc61 JS |
264 | int ni_peertimeout; |
265 | int ni_ncpts; /* number of CPTs */ | |
266 | __u32 *ni_cpts; /* bond NI on some CPTs */ | |
267 | lnet_nid_t ni_nid; /* interface's NID */ | |
268 | void *ni_data; /* instance-specific data */ | |
269 | lnd_t *ni_lnd; /* procedural interface */ | |
d7e09d03 PT |
270 | struct lnet_tx_queue **ni_tx_queues; /* percpt TX queues */ |
271 | int **ni_refs; /* percpt reference count */ | |
ec0067d1 | 272 | time64_t ni_last_alive;/* when I was last alive */ |
188acc61 | 273 | lnet_ni_status_t *ni_status; /* my health status */ |
243a941c AS |
274 | /* per NI LND tunables */ |
275 | struct lnet_ioctl_config_lnd_tunables *ni_lnd_tunables; | |
d7e09d03 | 276 | /* equivalent interfaces to use */ |
188acc61 | 277 | char *ni_interfaces[LNET_MAX_INTERFACES]; |
d7e09d03 PT |
278 | } lnet_ni_t; |
279 | ||
280 | #define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL | |
281 | ||
4420cfd3 JS |
282 | /* |
283 | * NB: value of these features equal to LNET_PROTO_PING_VERSION_x | |
284 | * of old LNet, so there shouldn't be any compatibility issue | |
285 | */ | |
d7e09d03 PT |
286 | #define LNET_PING_FEAT_INVAL (0) /* no feature */ |
287 | #define LNET_PING_FEAT_BASE (1 << 0) /* just a ping */ | |
288 | #define LNET_PING_FEAT_NI_STATUS (1 << 1) /* return NI status */ | |
86ef6250 | 289 | #define LNET_PING_FEAT_RTE_DISABLED (1 << 2) /* Routing enabled */ |
d7e09d03 PT |
290 | |
291 | #define LNET_PING_FEAT_MASK (LNET_PING_FEAT_BASE | \ | |
292 | LNET_PING_FEAT_NI_STATUS) | |
293 | ||
d7e09d03 PT |
294 | /* router checker data, per router */ |
295 | #define LNET_MAX_RTR_NIS 16 | |
296 | #define LNET_PINGINFO_SIZE offsetof(lnet_ping_info_t, pi_ni[LNET_MAX_RTR_NIS]) | |
297 | typedef struct { | |
298 | /* chain on the_lnet.ln_zombie_rcd or ln_deathrow_rcd */ | |
188acc61 JS |
299 | struct list_head rcd_list; |
300 | lnet_handle_md_t rcd_mdh; /* ping buffer MD */ | |
d7e09d03 PT |
301 | struct lnet_peer *rcd_gateway; /* reference to gateway */ |
302 | lnet_ping_info_t *rcd_pinginfo; /* ping buffer */ | |
303 | } lnet_rc_data_t; | |
304 | ||
305 | typedef struct lnet_peer { | |
188acc61 JS |
306 | struct list_head lp_hashlist; /* chain on peer hash */ |
307 | struct list_head lp_txq; /* messages blocking for | |
308 | tx credits */ | |
309 | struct list_head lp_rtrq; /* messages blocking for | |
310 | router credits */ | |
311 | struct list_head lp_rtr_list; /* chain on router list */ | |
312 | int lp_txcredits; /* # tx credits available */ | |
313 | int lp_mintxcredits; /* low water mark */ | |
314 | int lp_rtrcredits; /* # router credits */ | |
315 | int lp_minrtrcredits; /* low water mark */ | |
316 | unsigned int lp_alive:1; /* alive/dead? */ | |
317 | unsigned int lp_notify:1; /* notification outstanding? */ | |
318 | unsigned int lp_notifylnd:1;/* outstanding notification | |
319 | for LND? */ | |
320 | unsigned int lp_notifying:1; /* some thread is handling | |
321 | notification */ | |
322 | unsigned int lp_ping_notsent;/* SEND event outstanding | |
323 | from ping */ | |
324 | int lp_alive_count; /* # times router went | |
325 | dead<->alive */ | |
326 | long lp_txqnob; /* bytes queued for sending */ | |
327 | unsigned long lp_timestamp; /* time of last aliveness | |
328 | news */ | |
329 | unsigned long lp_ping_timestamp;/* time of last ping | |
330 | attempt */ | |
331 | unsigned long lp_ping_deadline; /* != 0 if ping reply | |
332 | expected */ | |
333 | unsigned long lp_last_alive; /* when I was last alive */ | |
334 | unsigned long lp_last_query; /* when lp_ni was queried | |
335 | last time */ | |
336 | lnet_ni_t *lp_ni; /* interface peer is on */ | |
337 | lnet_nid_t lp_nid; /* peer's NID */ | |
338 | int lp_refcount; /* # refs */ | |
339 | int lp_cpt; /* CPT this peer attached on */ | |
d7e09d03 | 340 | /* # refs from lnet_route_t::lr_gateway */ |
188acc61 | 341 | int lp_rtr_refcount; |
d7e09d03 | 342 | /* returned RC ping features */ |
188acc61 JS |
343 | unsigned int lp_ping_feats; |
344 | struct list_head lp_routes; /* routers on this peer */ | |
d7e09d03 PT |
345 | lnet_rc_data_t *lp_rcd; /* router checker state */ |
346 | } lnet_peer_t; | |
347 | ||
d7e09d03 | 348 | /* peer hash size */ |
188acc61 JS |
349 | #define LNET_PEER_HASH_BITS 9 |
350 | #define LNET_PEER_HASH_SIZE (1 << LNET_PEER_HASH_BITS) | |
d7e09d03 PT |
351 | |
352 | /* peer hash table */ | |
353 | struct lnet_peer_table { | |
188acc61 JS |
354 | int pt_version; /* /proc validity stamp */ |
355 | int pt_number; /* # peers extant */ | |
21602c7d AS |
356 | /* # zombies to go to deathrow (and not there yet) */ |
357 | int pt_zombies; | |
188acc61 JS |
358 | struct list_head pt_deathrow; /* zombie peers */ |
359 | struct list_head *pt_hash; /* NID->peer hash */ | |
d7e09d03 PT |
360 | }; |
361 | ||
4420cfd3 JS |
362 | /* |
363 | * peer aliveness is enabled only on routers for peers in a network where the | |
364 | * lnet_ni_t::ni_peertimeout has been set to a positive value | |
365 | */ | |
5fd88337 | 366 | #define lnet_peer_aliveness_enabled(lp) (the_lnet.ln_routing && \ |
d7e09d03 PT |
367 | (lp)->lp_ni->ni_peertimeout > 0) |
368 | ||
369 | typedef struct { | |
188acc61 JS |
370 | struct list_head lr_list; /* chain on net */ |
371 | struct list_head lr_gwlist; /* chain on gateway */ | |
d7e09d03 | 372 | lnet_peer_t *lr_gateway; /* router node */ |
188acc61 JS |
373 | __u32 lr_net; /* remote network number */ |
374 | int lr_seq; /* sequence for round-robin */ | |
375 | unsigned int lr_downis; /* number of down NIs */ | |
b9bbb61c | 376 | __u32 lr_hops; /* how far I am */ |
188acc61 | 377 | unsigned int lr_priority; /* route priority */ |
d7e09d03 PT |
378 | } lnet_route_t; |
379 | ||
380 | #define LNET_REMOTE_NETS_HASH_DEFAULT (1U << 7) | |
381 | #define LNET_REMOTE_NETS_HASH_MAX (1U << 16) | |
382 | #define LNET_REMOTE_NETS_HASH_SIZE (1 << the_lnet.ln_remote_nets_hbits) | |
383 | ||
384 | typedef struct { | |
188acc61 JS |
385 | struct list_head lrn_list; /* chain on |
386 | ln_remote_nets_hash */ | |
387 | struct list_head lrn_routes; /* routes to me */ | |
388 | __u32 lrn_net; /* my net number */ | |
d7e09d03 PT |
389 | } lnet_remotenet_t; |
390 | ||
db18b8e9 JS |
391 | /** lnet message has credit and can be submitted to lnd for send/receive */ |
392 | #define LNET_CREDIT_OK 0 | |
393 | /** lnet message is waiting for credit */ | |
394 | #define LNET_CREDIT_WAIT 1 | |
395 | ||
d7e09d03 | 396 | typedef struct { |
188acc61 JS |
397 | struct list_head rbp_bufs; /* my free buffer pool */ |
398 | struct list_head rbp_msgs; /* messages blocking | |
399 | for a buffer */ | |
400 | int rbp_npages; /* # pages in each buffer */ | |
95fc2938 AS |
401 | /* requested number of buffers */ |
402 | int rbp_req_nbuffers; | |
403 | /* # buffers actually allocated */ | |
404 | int rbp_nbuffers; | |
188acc61 JS |
405 | int rbp_credits; /* # free buffers / |
406 | blocked messages */ | |
407 | int rbp_mincredits; /* low water mark */ | |
d7e09d03 PT |
408 | } lnet_rtrbufpool_t; |
409 | ||
410 | typedef struct { | |
188acc61 JS |
411 | struct list_head rb_list; /* chain on rbp_bufs */ |
412 | lnet_rtrbufpool_t *rb_pool; /* owning pool */ | |
413 | lnet_kiov_t rb_kiov[0]; /* the buffer space */ | |
d7e09d03 PT |
414 | } lnet_rtrbuf_t; |
415 | ||
188acc61 | 416 | #define LNET_PEER_HASHSIZE 503 /* prime! */ |
d7e09d03 | 417 | |
86ef6250 AS |
418 | #define LNET_TINY_BUF_IDX 0 |
419 | #define LNET_SMALL_BUF_IDX 1 | |
420 | #define LNET_LARGE_BUF_IDX 2 | |
421 | ||
422 | /* # different router buffer pools */ | |
423 | #define LNET_NRBPOOLS (LNET_LARGE_BUF_IDX + 1) | |
d7e09d03 PT |
424 | |
425 | enum { | |
426 | /* Didn't match anything */ | |
427 | LNET_MATCHMD_NONE = (1 << 0), | |
428 | /* Matched OK */ | |
429 | LNET_MATCHMD_OK = (1 << 1), | |
430 | /* Must be discarded */ | |
431 | LNET_MATCHMD_DROP = (1 << 2), | |
432 | /* match and buffer is exhausted */ | |
188acc61 | 433 | LNET_MATCHMD_EXHAUSTED = (1 << 3), |
d7e09d03 | 434 | /* match or drop */ |
188acc61 | 435 | LNET_MATCHMD_FINISH = (LNET_MATCHMD_OK | LNET_MATCHMD_DROP), |
d7e09d03 PT |
436 | }; |
437 | ||
438 | /* Options for lnet_portal_t::ptl_options */ | |
188acc61 JS |
439 | #define LNET_PTL_LAZY (1 << 0) |
440 | #define LNET_PTL_MATCH_UNIQUE (1 << 1) /* unique match, for RDMA */ | |
441 | #define LNET_PTL_MATCH_WILDCARD (1 << 2) /* wildcard match, | |
442 | request portal */ | |
d7e09d03 PT |
443 | |
444 | /* parameter for matching operations (GET, PUT) */ | |
445 | struct lnet_match_info { | |
446 | __u64 mi_mbits; | |
447 | lnet_process_id_t mi_id; | |
448 | unsigned int mi_opc; | |
449 | unsigned int mi_portal; | |
450 | unsigned int mi_rlength; | |
451 | unsigned int mi_roffset; | |
452 | }; | |
453 | ||
454 | /* ME hash of RDMA portal */ | |
455 | #define LNET_MT_HASH_BITS 8 | |
456 | #define LNET_MT_HASH_SIZE (1 << LNET_MT_HASH_BITS) | |
457 | #define LNET_MT_HASH_MASK (LNET_MT_HASH_SIZE - 1) | |
4420cfd3 JS |
458 | /* |
459 | * we allocate (LNET_MT_HASH_SIZE + 1) entries for lnet_match_table::mt_hash, | |
460 | * the last entry is reserved for MEs with ignore-bits | |
461 | */ | |
d7e09d03 | 462 | #define LNET_MT_HASH_IGNORE LNET_MT_HASH_SIZE |
4420cfd3 JS |
463 | /* |
464 | * __u64 has 2^6 bits, so need 2^(LNET_MT_HASH_BITS - LNET_MT_BITS_U64) which | |
d7e09d03 | 465 | * is 4 __u64s as bit-map, and add an extra __u64 (only use one bit) for the |
4420cfd3 JS |
466 | * ME-list with ignore-bits, which is mtable::mt_hash[LNET_MT_HASH_IGNORE] |
467 | */ | |
d7e09d03 PT |
468 | #define LNET_MT_BITS_U64 6 /* 2^6 bits */ |
469 | #define LNET_MT_EXHAUSTED_BITS (LNET_MT_HASH_BITS - LNET_MT_BITS_U64) | |
470 | #define LNET_MT_EXHAUSTED_BMAP ((1 << LNET_MT_EXHAUSTED_BITS) + 1) | |
471 | ||
472 | /* portal match table */ | |
473 | struct lnet_match_table { | |
474 | /* reserved for upcoming patches, CPU partition ID */ | |
188acc61 JS |
475 | unsigned int mt_cpt; |
476 | unsigned int mt_portal; /* portal index */ | |
4420cfd3 JS |
477 | /* |
478 | * match table is set as "enabled" if there's non-exhausted MD | |
479 | * attached on mt_mhash, it's only valid for wildcard portal | |
480 | */ | |
188acc61 | 481 | unsigned int mt_enabled; |
d7e09d03 | 482 | /* bitmap to flag whether MEs on mt_hash are exhausted or not */ |
188acc61 JS |
483 | __u64 mt_exhausted[LNET_MT_EXHAUSTED_BMAP]; |
484 | struct list_head *mt_mhash; /* matching hash */ | |
d7e09d03 PT |
485 | }; |
486 | ||
487 | /* these are only useful for wildcard portal */ | |
488 | /* Turn off message rotor for wildcard portals */ | |
489 | #define LNET_PTL_ROTOR_OFF 0 | |
490 | /* round-robin dispatch all PUT messages for wildcard portals */ | |
491 | #define LNET_PTL_ROTOR_ON 1 | |
492 | /* round-robin dispatch routed PUT message for wildcard portals */ | |
493 | #define LNET_PTL_ROTOR_RR_RT 2 | |
494 | /* dispatch routed PUT message by hashing source NID for wildcard portals */ | |
495 | #define LNET_PTL_ROTOR_HASH_RT 3 | |
496 | ||
497 | typedef struct lnet_portal { | |
188acc61 JS |
498 | spinlock_t ptl_lock; |
499 | unsigned int ptl_index; /* portal ID, reserved */ | |
d7e09d03 | 500 | /* flags on this portal: lazy, unique... */ |
188acc61 | 501 | unsigned int ptl_options; |
2b284326 | 502 | /* list of messages which are stealing buffer */ |
188acc61 | 503 | struct list_head ptl_msg_stealing; |
d7e09d03 | 504 | /* messages blocking for MD */ |
188acc61 | 505 | struct list_head ptl_msg_delayed; |
d7e09d03 PT |
506 | /* Match table for each CPT */ |
507 | struct lnet_match_table **ptl_mtables; | |
508 | /* spread rotor of incoming "PUT" */ | |
188acc61 | 509 | unsigned int ptl_rotor; |
d7e09d03 | 510 | /* # active entries for this portal */ |
188acc61 | 511 | int ptl_mt_nmaps; |
d7e09d03 | 512 | /* array of active entries' cpu-partition-id */ |
188acc61 | 513 | int ptl_mt_maps[0]; |
d7e09d03 PT |
514 | } lnet_portal_t; |
515 | ||
516 | #define LNET_LH_HASH_BITS 12 | |
517 | #define LNET_LH_HASH_SIZE (1ULL << LNET_LH_HASH_BITS) | |
518 | #define LNET_LH_HASH_MASK (LNET_LH_HASH_SIZE - 1) | |
519 | ||
520 | /* resource container (ME, MD, EQ) */ | |
521 | struct lnet_res_container { | |
188acc61 JS |
522 | unsigned int rec_type; /* container type */ |
523 | __u64 rec_lh_cookie; /* cookie generator */ | |
524 | struct list_head rec_active; /* active resource list */ | |
525 | struct list_head *rec_lh_hash; /* handle hash */ | |
d7e09d03 PT |
526 | }; |
527 | ||
528 | /* message container */ | |
529 | struct lnet_msg_container { | |
188acc61 | 530 | int msc_init; /* initialized or not */ |
d7e09d03 | 531 | /* max # threads finalizing */ |
188acc61 | 532 | int msc_nfinalizers; |
d7e09d03 | 533 | /* msgs waiting to complete finalizing */ |
188acc61 JS |
534 | struct list_head msc_finalizing; |
535 | struct list_head msc_active; /* active message list */ | |
d7e09d03 PT |
536 | /* threads doing finalization */ |
537 | void **msc_finalizers; | |
d7e09d03 PT |
538 | }; |
539 | ||
540 | /* Router Checker states */ | |
541 | #define LNET_RC_STATE_SHUTDOWN 0 /* not started */ | |
542 | #define LNET_RC_STATE_RUNNING 1 /* started up OK */ | |
543 | #define LNET_RC_STATE_STOPPING 2 /* telling thread to stop */ | |
544 | ||
3b77f472 | 545 | typedef struct { |
d7e09d03 | 546 | /* CPU partition table of LNet */ |
188acc61 | 547 | struct cfs_cpt_table *ln_cpt_table; |
d7e09d03 | 548 | /* number of CPTs in ln_cpt_table */ |
188acc61 JS |
549 | unsigned int ln_cpt_number; |
550 | unsigned int ln_cpt_bits; | |
d7e09d03 PT |
551 | |
552 | /* protect LNet resources (ME/MD/EQ) */ | |
188acc61 | 553 | struct cfs_percpt_lock *ln_res_lock; |
d7e09d03 | 554 | /* # portals */ |
188acc61 | 555 | int ln_nportals; |
d7e09d03 PT |
556 | /* the vector of portals */ |
557 | lnet_portal_t **ln_portals; | |
558 | /* percpt ME containers */ | |
559 | struct lnet_res_container **ln_me_containers; | |
560 | /* percpt MD container */ | |
561 | struct lnet_res_container **ln_md_containers; | |
562 | ||
563 | /* Event Queue container */ | |
188acc61 JS |
564 | struct lnet_res_container ln_eq_container; |
565 | wait_queue_head_t ln_eq_waitq; | |
566 | spinlock_t ln_eq_wait_lock; | |
567 | unsigned int ln_remote_nets_hbits; | |
d7e09d03 PT |
568 | |
569 | /* protect NI, peer table, credits, routers, rtrbuf... */ | |
188acc61 | 570 | struct cfs_percpt_lock *ln_net_lock; |
d7e09d03 PT |
571 | /* percpt message containers for active/finalizing/freed message */ |
572 | struct lnet_msg_container **ln_msg_containers; | |
573 | lnet_counters_t **ln_counters; | |
574 | struct lnet_peer_table **ln_peer_tables; | |
575 | /* failure simulation */ | |
188acc61 | 576 | struct list_head ln_test_peers; |
0fbbced2 | 577 | struct list_head ln_drop_rules; |
b7acfc95 | 578 | struct list_head ln_delay_rules; |
d7e09d03 | 579 | |
188acc61 | 580 | struct list_head ln_nis; /* LND instances */ |
d7e09d03 | 581 | /* NIs bond on specific CPT(s) */ |
188acc61 | 582 | struct list_head ln_nis_cpt; |
d7e09d03 | 583 | /* dying LND instances */ |
188acc61 JS |
584 | struct list_head ln_nis_zombie; |
585 | lnet_ni_t *ln_loni; /* the loopback NI */ | |
d7e09d03 PT |
586 | |
587 | /* remote networks with routes to them */ | |
188acc61 | 588 | struct list_head *ln_remote_nets_hash; |
d7e09d03 | 589 | /* validity stamp */ |
188acc61 | 590 | __u64 ln_remote_nets_version; |
d7e09d03 | 591 | /* list of all known routers */ |
188acc61 | 592 | struct list_head ln_routers; |
d7e09d03 | 593 | /* validity stamp */ |
188acc61 | 594 | __u64 ln_routers_version; |
d7e09d03 PT |
595 | /* percpt router buffer pools */ |
596 | lnet_rtrbufpool_t **ln_rtrpools; | |
597 | ||
188acc61 JS |
598 | lnet_handle_md_t ln_ping_target_md; |
599 | lnet_handle_eq_t ln_ping_target_eq; | |
600 | lnet_ping_info_t *ln_ping_info; | |
d7e09d03 PT |
601 | |
602 | /* router checker startup/shutdown state */ | |
188acc61 | 603 | int ln_rc_state; |
d7e09d03 | 604 | /* router checker's event queue */ |
188acc61 | 605 | lnet_handle_eq_t ln_rc_eqh; |
d7e09d03 | 606 | /* rcd still pending on net */ |
188acc61 | 607 | struct list_head ln_rcd_deathrow; |
d7e09d03 | 608 | /* rcd ready for free */ |
188acc61 | 609 | struct list_head ln_rcd_zombie; |
d7e09d03 | 610 | /* serialise startup/shutdown */ |
21ca52bb | 611 | struct completion ln_rc_signal; |
d7e09d03 | 612 | |
188acc61 JS |
613 | struct mutex ln_api_mutex; |
614 | struct mutex ln_lnd_mutex; | |
b7acfc95 | 615 | struct mutex ln_delay_mutex; |
d7e09d03 | 616 | /* Have I called LNetNIInit myself? */ |
188acc61 | 617 | int ln_niinit_self; |
d7e09d03 | 618 | /* LNetNIInit/LNetNIFini counter */ |
188acc61 | 619 | int ln_refcount; |
d7e09d03 | 620 | /* shutdown in progress */ |
188acc61 | 621 | int ln_shutdown; |
d7e09d03 | 622 | |
188acc61 JS |
623 | int ln_routing; /* am I a router? */ |
624 | lnet_pid_t ln_pid; /* requested pid */ | |
d7e09d03 | 625 | /* uniquely identifies this ni in this epoch */ |
188acc61 | 626 | __u64 ln_interface_cookie; |
d7e09d03 | 627 | /* registered LNDs */ |
188acc61 | 628 | struct list_head ln_lnds; |
d7e09d03 | 629 | |
d7e09d03 | 630 | /* test protocol compatibility flags */ |
188acc61 | 631 | int ln_testprotocompat; |
d7e09d03 | 632 | |
edeb5d8c AS |
633 | /* |
634 | * 0 - load the NIs from the mod params | |
635 | * 1 - do not load the NIs from the mod params | |
636 | * Reverse logic to ensure that other calls to LNetNIInit | |
637 | * need no change | |
638 | */ | |
639 | bool ln_nis_from_mod_params; | |
640 | ||
7f8b70e0 AS |
641 | /* |
642 | * waitq for router checker. As long as there are no routes in | |
643 | * the list, the router checker will sleep on this queue. when | |
644 | * routes are added the thread will wake up | |
645 | */ | |
646 | wait_queue_head_t ln_rc_waitq; | |
647 | ||
d7e09d03 PT |
648 | } lnet_t; |
649 | ||
650 | #endif |