Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
26 | * Copyright (c) 2011, 2012, Intel Corporation. | |
27 | */ | |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | */ | |
32 | /** \defgroup obd_import PtlRPC import definitions | |
33 | * Imports are client-side representation of remote obd target. | |
34 | * | |
35 | * @{ | |
36 | */ | |
37 | ||
38 | #ifndef __IMPORT_H | |
39 | #define __IMPORT_H | |
40 | ||
41 | /** \defgroup export export | |
42 | * | |
43 | * @{ | |
44 | */ | |
45 | ||
1accaadf GKH |
46 | #include "lustre_handles.h" |
47 | #include "lustre/lustre_idl.h" | |
d7e09d03 | 48 | |
d7e09d03 PT |
49 | /** |
50 | * Adaptive Timeout stuff | |
51 | * | |
52 | * @{ | |
53 | */ | |
54 | #define D_ADAPTTO D_OTHER | |
55 | #define AT_BINS 4 /* "bin" means "N seconds of history" */ | |
56 | #define AT_FLG_NOHIST 0x1 /* use last reported value only */ | |
57 | ||
58 | struct adaptive_timeout { | |
0ac0478b | 59 | time64_t at_binstart; /* bin start time */ |
d7e09d03 PT |
60 | unsigned int at_hist[AT_BINS]; /* timeout history bins */ |
61 | unsigned int at_flags; | |
62 | unsigned int at_current; /* current timeout value */ | |
63 | unsigned int at_worst_ever; /* worst-ever timeout value */ | |
0ac0478b | 64 | time64_t at_worst_time; /* worst-ever timeout timestamp */ |
d7e09d03 PT |
65 | spinlock_t at_lock; |
66 | }; | |
67 | ||
68 | struct ptlrpc_at_array { | |
69 | struct list_head *paa_reqs_array; /** array to hold requests */ | |
70 | __u32 paa_size; /** the size of array */ | |
71 | __u32 paa_count; /** the total count of reqs */ | |
219e6de6 | 72 | time64_t paa_deadline; /** the earliest deadline of reqs */ |
d7e09d03 PT |
73 | __u32 *paa_reqs_count; /** the count of reqs in each entry */ |
74 | }; | |
75 | ||
76 | #define IMP_AT_MAX_PORTALS 8 | |
77 | struct imp_at { | |
78 | int iat_portal[IMP_AT_MAX_PORTALS]; | |
79 | struct adaptive_timeout iat_net_latency; | |
80 | struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS]; | |
81 | }; | |
82 | ||
d7e09d03 PT |
83 | /** @} */ |
84 | ||
85 | /** Possible import states */ | |
86 | enum lustre_imp_state { | |
87 | LUSTRE_IMP_CLOSED = 1, | |
88 | LUSTRE_IMP_NEW = 2, | |
89 | LUSTRE_IMP_DISCON = 3, | |
90 | LUSTRE_IMP_CONNECTING = 4, | |
91 | LUSTRE_IMP_REPLAY = 5, | |
92 | LUSTRE_IMP_REPLAY_LOCKS = 6, | |
93 | LUSTRE_IMP_REPLAY_WAIT = 7, | |
94 | LUSTRE_IMP_RECOVER = 8, | |
95 | LUSTRE_IMP_FULL = 9, | |
96 | LUSTRE_IMP_EVICTED = 10, | |
97 | }; | |
98 | ||
99 | /** Returns test string representation of numeric import state \a state */ | |
727543d6 | 100 | static inline char *ptlrpc_import_state_name(enum lustre_imp_state state) |
d7e09d03 | 101 | { |
727543d6 | 102 | static char *import_state_names[] = { |
d7e09d03 PT |
103 | "<UNKNOWN>", "CLOSED", "NEW", "DISCONN", |
104 | "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", | |
105 | "RECOVER", "FULL", "EVICTED", | |
106 | }; | |
107 | ||
e9570b49 | 108 | LASSERT(state <= LUSTRE_IMP_EVICTED); |
d7e09d03 PT |
109 | return import_state_names[state]; |
110 | } | |
111 | ||
112 | /** | |
113 | * List of import event types | |
114 | */ | |
115 | enum obd_import_event { | |
116 | IMP_EVENT_DISCON = 0x808001, | |
117 | IMP_EVENT_INACTIVE = 0x808002, | |
118 | IMP_EVENT_INVALIDATE = 0x808003, | |
119 | IMP_EVENT_ACTIVE = 0x808004, | |
120 | IMP_EVENT_OCD = 0x808005, | |
121 | IMP_EVENT_DEACTIVATE = 0x808006, | |
122 | IMP_EVENT_ACTIVATE = 0x808007, | |
123 | }; | |
124 | ||
125 | /** | |
126 | * Definition of import connection structure | |
127 | */ | |
128 | struct obd_import_conn { | |
129 | /** Item for linking connections together */ | |
130 | struct list_head oic_item; | |
131 | /** Pointer to actual PortalRPC connection */ | |
132 | struct ptlrpc_connection *oic_conn; | |
133 | /** uuid of remote side */ | |
134 | struct obd_uuid oic_uuid; | |
135 | /** | |
136 | * Time (64 bit jiffies) of last connection attempt on this connection | |
137 | */ | |
138 | __u64 oic_last_attempt; | |
139 | }; | |
140 | ||
141 | /* state history */ | |
142 | #define IMP_STATE_HIST_LEN 16 | |
143 | struct import_state_hist { | |
144 | enum lustre_imp_state ish_state; | |
74e489aa | 145 | time64_t ish_time; |
d7e09d03 PT |
146 | }; |
147 | ||
148 | /** | |
bd9070cb | 149 | * Definition of PortalRPC import structure. |
d7e09d03 PT |
150 | * Imports are representing client-side view to remote target. |
151 | */ | |
152 | struct obd_import { | |
153 | /** Local handle (== id) for this import. */ | |
154 | struct portals_handle imp_handle; | |
155 | /** Reference counter */ | |
156 | atomic_t imp_refcount; | |
157 | struct lustre_handle imp_dlm_handle; /* client's ldlm export */ | |
158 | /** Currently active connection */ | |
159 | struct ptlrpc_connection *imp_connection; | |
160 | /** PortalRPC client structure for this import */ | |
161 | struct ptlrpc_client *imp_client; | |
162 | /** List element for linking into pinger chain */ | |
163 | struct list_head imp_pinger_chain; | |
164 | /** List element for linking into chain for destruction */ | |
165 | struct list_head imp_zombie_chain; | |
166 | ||
167 | /** | |
168 | * Lists of requests that are retained for replay, waiting for a reply, | |
169 | * or waiting for recovery to complete, respectively. | |
170 | * @{ | |
171 | */ | |
172 | struct list_head imp_replay_list; | |
173 | struct list_head imp_sending_list; | |
174 | struct list_head imp_delayed_list; | |
175 | /** @} */ | |
176 | ||
63d42578 HZ |
177 | /** |
178 | * List of requests that are retained for committed open replay. Once | |
179 | * open is committed, open replay request will be moved from the | |
180 | * imp_replay_list into the imp_committed_list. | |
181 | * The imp_replay_cursor is for accelerating searching during replay. | |
182 | * @{ | |
183 | */ | |
184 | struct list_head imp_committed_list; | |
185 | struct list_head *imp_replay_cursor; | |
186 | /** @} */ | |
187 | ||
d7e09d03 PT |
188 | /** obd device for this import */ |
189 | struct obd_device *imp_obd; | |
190 | ||
191 | /** | |
192 | * some seciruty-related fields | |
193 | * @{ | |
194 | */ | |
195 | struct ptlrpc_sec *imp_sec; | |
196 | struct mutex imp_sec_mutex; | |
986ef135 | 197 | time64_t imp_sec_expire; |
d7e09d03 PT |
198 | /** @} */ |
199 | ||
200 | /** Wait queue for those who need to wait for recovery completion */ | |
201 | wait_queue_head_t imp_recovery_waitq; | |
202 | ||
203 | /** Number of requests currently in-flight */ | |
204 | atomic_t imp_inflight; | |
205 | /** Number of requests currently unregistering */ | |
206 | atomic_t imp_unregistering; | |
207 | /** Number of replay requests inflight */ | |
208 | atomic_t imp_replay_inflight; | |
209 | /** Number of currently happening import invalidations */ | |
210 | atomic_t imp_inval_count; | |
211 | /** Numbner of request timeouts */ | |
212 | atomic_t imp_timeouts; | |
213 | /** Current import state */ | |
214 | enum lustre_imp_state imp_state; | |
502cb58e AS |
215 | /** Last replay state */ |
216 | enum lustre_imp_state imp_replay_state; | |
d7e09d03 PT |
217 | /** History of import states */ |
218 | struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; | |
219 | int imp_state_hist_idx; | |
220 | /** Current import generation. Incremented on every reconnect */ | |
221 | int imp_generation; | |
222 | /** Incremented every time we send reconnection request */ | |
223 | __u32 imp_conn_cnt; | |
224 | /** | |
225 | * \see ptlrpc_free_committed remembers imp_generation value here | |
226 | * after a check to save on unnecessary replay list iterations | |
227 | */ | |
228 | int imp_last_generation_checked; | |
bd9070cb | 229 | /** Last transno we replayed */ |
d7e09d03 PT |
230 | __u64 imp_last_replay_transno; |
231 | /** Last transno committed on remote side */ | |
232 | __u64 imp_peer_committed_transno; | |
233 | /** | |
234 | * \see ptlrpc_free_committed remembers last_transno since its last | |
235 | * check here and if last_transno did not change since last run of | |
236 | * ptlrpc_free_committed and import generation is the same, we can | |
237 | * skip looking for requests to remove from replay list as optimisation | |
238 | */ | |
239 | __u64 imp_last_transno_checked; | |
240 | /** | |
241 | * Remote export handle. This is how remote side knows what export | |
242 | * we are talking to. Filled from response to connect request | |
243 | */ | |
244 | struct lustre_handle imp_remote_handle; | |
245 | /** When to perform next ping. time in jiffies. */ | |
a649ad1d | 246 | unsigned long imp_next_ping; |
bd9070cb | 247 | /** When we last successfully connected. time in 64bit jiffies */ |
d7e09d03 PT |
248 | __u64 imp_last_success_conn; |
249 | ||
250 | /** List of all possible connection for import. */ | |
251 | struct list_head imp_conn_list; | |
252 | /** | |
253 | * Current connection. \a imp_connection is imp_conn_current->oic_conn | |
254 | */ | |
255 | struct obd_import_conn *imp_conn_current; | |
256 | ||
257 | /** Protects flags, level, generation, conn_cnt, *_list */ | |
258 | spinlock_t imp_lock; | |
259 | ||
260 | /* flags */ | |
261 | unsigned long imp_no_timeout:1, /* timeouts are disabled */ | |
262 | imp_invalid:1, /* evicted */ | |
263 | /* administratively disabled */ | |
264 | imp_deactive:1, | |
265 | /* try to recover the import */ | |
266 | imp_replayable:1, | |
267 | /* don't run recovery (timeout instead) */ | |
268 | imp_dlm_fake:1, | |
269 | /* use 1/2 timeout on MDS' OSCs */ | |
270 | imp_server_timeout:1, | |
271 | /* VBR: imp in delayed recovery */ | |
272 | imp_delayed_recovery:1, | |
273 | /* VBR: if gap was found then no lock replays | |
274 | */ | |
275 | imp_no_lock_replay:1, | |
276 | /* recovery by versions was failed */ | |
277 | imp_vbr_failed:1, | |
bd9070cb | 278 | /* force an immediate ping */ |
d7e09d03 PT |
279 | imp_force_verify:1, |
280 | /* force a scheduled ping */ | |
281 | imp_force_next_verify:1, | |
282 | /* pingable */ | |
283 | imp_pingable:1, | |
284 | /* resend for replay */ | |
285 | imp_resend_replay:1, | |
286 | /* disable normal recovery, for test only. */ | |
287 | imp_no_pinger_recover:1, | |
288 | /* need IR MNE swab */ | |
289 | imp_need_mne_swab:1, | |
290 | /* import must be reconnected instead of | |
c56e256d OD |
291 | * chosing new connection |
292 | */ | |
d7e09d03 PT |
293 | imp_force_reconnect:1, |
294 | /* import has tried to connect with server */ | |
295 | imp_connect_tried:1; | |
296 | __u32 imp_connect_op; | |
297 | struct obd_connect_data imp_connect_data; | |
298 | __u64 imp_connect_flags_orig; | |
299 | int imp_connect_error; | |
300 | ||
301 | __u32 imp_msg_magic; | |
302 | __u32 imp_msghdr_flags; /* adjusted based on server capability */ | |
303 | ||
d7e09d03 | 304 | struct imp_at imp_at; /* adaptive timeout data */ |
74e489aa | 305 | time64_t imp_last_reply_time; /* for health check */ |
d7e09d03 PT |
306 | }; |
307 | ||
d7e09d03 PT |
308 | /* import.c */ |
309 | static inline unsigned int at_est2timeout(unsigned int val) | |
310 | { | |
311 | /* add an arbitrary minimum: 125% +5 sec */ | |
312 | return (val + (val >> 2) + 5); | |
313 | } | |
314 | ||
315 | static inline unsigned int at_timeout2est(unsigned int val) | |
316 | { | |
317 | /* restore estimate value from timeout: e=4/5(t-5) */ | |
318 | LASSERT(val); | |
319 | return (max((val << 2) / 5, 5U) - 4); | |
320 | } | |
321 | ||
db7b4b39 SB |
322 | static inline void at_reset(struct adaptive_timeout *at, int val) |
323 | { | |
f84d3d47 | 324 | spin_lock(&at->at_lock); |
d7e09d03 PT |
325 | at->at_current = val; |
326 | at->at_worst_ever = val; | |
0ac0478b | 327 | at->at_worst_time = ktime_get_real_seconds(); |
f84d3d47 | 328 | spin_unlock(&at->at_lock); |
d7e09d03 | 329 | } |
c9f6bb96 | 330 | |
db7b4b39 SB |
331 | static inline void at_init(struct adaptive_timeout *at, int val, int flags) |
332 | { | |
d7e09d03 PT |
333 | memset(at, 0, sizeof(*at)); |
334 | spin_lock_init(&at->at_lock); | |
335 | at->at_flags = flags; | |
336 | at_reset(at, val); | |
337 | } | |
c9f6bb96 | 338 | |
d7e09d03 | 339 | extern unsigned int at_min; |
db7b4b39 SB |
340 | static inline int at_get(struct adaptive_timeout *at) |
341 | { | |
d7e09d03 PT |
342 | return (at->at_current > at_min) ? at->at_current : at_min; |
343 | } | |
c9f6bb96 | 344 | |
d7e09d03 PT |
345 | int at_measured(struct adaptive_timeout *at, unsigned int val); |
346 | int import_at_get_index(struct obd_import *imp, int portal); | |
347 | extern unsigned int at_max; | |
348 | #define AT_OFF (at_max == 0) | |
349 | ||
350 | /* genops.c */ | |
351 | struct obd_export; | |
8150a97f | 352 | struct obd_import *class_exp2cliimp(struct obd_export *); |
d7e09d03 PT |
353 | |
354 | /** @} import */ | |
355 | ||
356 | #endif /* __IMPORT_H */ | |
357 | ||
358 | /** @} obd_import */ |