4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 #define DEBUG_SUBSYSTEM S_CLASS
38 #include "../include/obd_support.h"
39 #include "../include/obd.h"
40 #include "../include/lprocfs_status.h"
41 #include "../include/lustre/lustre_idl.h"
42 #include "../include/lustre_net.h"
43 #include "../include/obd_class.h"
44 #include "ptlrpc_internal.h"
46 static struct ll_rpc_opcode
{
49 } ll_rpc_opcode_table
[LUSTRE_MAX_OPCODES
] = {
50 { OST_REPLY
, "ost_reply" },
51 { OST_GETATTR
, "ost_getattr" },
52 { OST_SETATTR
, "ost_setattr" },
53 { OST_READ
, "ost_read" },
54 { OST_WRITE
, "ost_write" },
55 { OST_CREATE
, "ost_create" },
56 { OST_DESTROY
, "ost_destroy" },
57 { OST_GET_INFO
, "ost_get_info" },
58 { OST_CONNECT
, "ost_connect" },
59 { OST_DISCONNECT
, "ost_disconnect" },
60 { OST_PUNCH
, "ost_punch" },
61 { OST_OPEN
, "ost_open" },
62 { OST_CLOSE
, "ost_close" },
63 { OST_STATFS
, "ost_statfs" },
64 { 14, NULL
}, /* formerly OST_SAN_READ */
65 { 15, NULL
}, /* formerly OST_SAN_WRITE */
66 { OST_SYNC
, "ost_sync" },
67 { OST_SET_INFO
, "ost_set_info" },
68 { OST_QUOTACHECK
, "ost_quotacheck" },
69 { OST_QUOTACTL
, "ost_quotactl" },
70 { OST_QUOTA_ADJUST_QUNIT
, "ost_quota_adjust_qunit" },
71 { MDS_GETATTR
, "mds_getattr" },
72 { MDS_GETATTR_NAME
, "mds_getattr_lock" },
73 { MDS_CLOSE
, "mds_close" },
74 { MDS_REINT
, "mds_reint" },
75 { MDS_READPAGE
, "mds_readpage" },
76 { MDS_CONNECT
, "mds_connect" },
77 { MDS_DISCONNECT
, "mds_disconnect" },
78 { MDS_GETSTATUS
, "mds_getstatus" },
79 { MDS_STATFS
, "mds_statfs" },
80 { MDS_PIN
, "mds_pin" },
81 { MDS_UNPIN
, "mds_unpin" },
82 { MDS_SYNC
, "mds_sync" },
83 { MDS_DONE_WRITING
, "mds_done_writing" },
84 { MDS_SET_INFO
, "mds_set_info" },
85 { MDS_QUOTACHECK
, "mds_quotacheck" },
86 { MDS_QUOTACTL
, "mds_quotactl" },
87 { MDS_GETXATTR
, "mds_getxattr" },
88 { MDS_SETXATTR
, "mds_setxattr" },
89 { MDS_WRITEPAGE
, "mds_writepage" },
90 { MDS_IS_SUBDIR
, "mds_is_subdir" },
91 { MDS_GET_INFO
, "mds_get_info" },
92 { MDS_HSM_STATE_GET
, "mds_hsm_state_get" },
93 { MDS_HSM_STATE_SET
, "mds_hsm_state_set" },
94 { MDS_HSM_ACTION
, "mds_hsm_action" },
95 { MDS_HSM_PROGRESS
, "mds_hsm_progress" },
96 { MDS_HSM_REQUEST
, "mds_hsm_request" },
97 { MDS_HSM_CT_REGISTER
, "mds_hsm_ct_register" },
98 { MDS_HSM_CT_UNREGISTER
, "mds_hsm_ct_unregister" },
99 { MDS_SWAP_LAYOUTS
, "mds_swap_layouts" },
100 { LDLM_ENQUEUE
, "ldlm_enqueue" },
101 { LDLM_CONVERT
, "ldlm_convert" },
102 { LDLM_CANCEL
, "ldlm_cancel" },
103 { LDLM_BL_CALLBACK
, "ldlm_bl_callback" },
104 { LDLM_CP_CALLBACK
, "ldlm_cp_callback" },
105 { LDLM_GL_CALLBACK
, "ldlm_gl_callback" },
106 { LDLM_SET_INFO
, "ldlm_set_info" },
107 { MGS_CONNECT
, "mgs_connect" },
108 { MGS_DISCONNECT
, "mgs_disconnect" },
109 { MGS_EXCEPTION
, "mgs_exception" },
110 { MGS_TARGET_REG
, "mgs_target_reg" },
111 { MGS_TARGET_DEL
, "mgs_target_del" },
112 { MGS_SET_INFO
, "mgs_set_info" },
113 { MGS_CONFIG_READ
, "mgs_config_read" },
114 { OBD_PING
, "obd_ping" },
115 { OBD_LOG_CANCEL
, "llog_cancel" },
116 { OBD_QC_CALLBACK
, "obd_quota_callback" },
117 { OBD_IDX_READ
, "dt_index_read" },
118 { LLOG_ORIGIN_HANDLE_CREATE
, "llog_origin_handle_open" },
119 { LLOG_ORIGIN_HANDLE_NEXT_BLOCK
, "llog_origin_handle_next_block" },
120 { LLOG_ORIGIN_HANDLE_READ_HEADER
, "llog_origin_handle_read_header" },
121 { LLOG_ORIGIN_HANDLE_WRITE_REC
, "llog_origin_handle_write_rec" },
122 { LLOG_ORIGIN_HANDLE_CLOSE
, "llog_origin_handle_close" },
123 { LLOG_ORIGIN_CONNECT
, "llog_origin_connect" },
124 { LLOG_CATINFO
, "llog_catinfo" },
125 { LLOG_ORIGIN_HANDLE_PREV_BLOCK
, "llog_origin_handle_prev_block" },
126 { LLOG_ORIGIN_HANDLE_DESTROY
, "llog_origin_handle_destroy" },
127 { QUOTA_DQACQ
, "quota_acquire" },
128 { QUOTA_DQREL
, "quota_release" },
129 { SEQ_QUERY
, "seq_query" },
130 { SEC_CTX_INIT
, "sec_ctx_init" },
131 { SEC_CTX_INIT_CONT
, "sec_ctx_init_cont" },
132 { SEC_CTX_FINI
, "sec_ctx_fini" },
133 { FLD_QUERY
, "fld_query" },
136 static struct ll_eopcode
{
139 } ll_eopcode_table
[EXTRA_LAST_OPC
] = {
140 { LDLM_GLIMPSE_ENQUEUE
, "ldlm_glimpse_enqueue" },
141 { LDLM_PLAIN_ENQUEUE
, "ldlm_plain_enqueue" },
142 { LDLM_EXTENT_ENQUEUE
, "ldlm_extent_enqueue" },
143 { LDLM_FLOCK_ENQUEUE
, "ldlm_flock_enqueue" },
144 { LDLM_IBITS_ENQUEUE
, "ldlm_ibits_enqueue" },
145 { MDS_REINT_SETATTR
, "mds_reint_setattr" },
146 { MDS_REINT_CREATE
, "mds_reint_create" },
147 { MDS_REINT_LINK
, "mds_reint_link" },
148 { MDS_REINT_UNLINK
, "mds_reint_unlink" },
149 { MDS_REINT_RENAME
, "mds_reint_rename" },
150 { MDS_REINT_OPEN
, "mds_reint_open" },
151 { MDS_REINT_SETXATTR
, "mds_reint_setxattr" },
152 { BRW_READ_BYTES
, "read_bytes" },
153 { BRW_WRITE_BYTES
, "write_bytes" },
156 const char *ll_opcode2str(__u32 opcode
)
158 /* When one of the assertions below fail, chances are that:
159 * 1) A new opcode was added in include/lustre/lustre_idl.h,
160 * but is missing from the table above.
161 * or 2) The opcode space was renumbered or rearranged,
162 * and the opcode_offset() function in
163 * ptlrpc_internal.h needs to be modified.
165 __u32 offset
= opcode_offset(opcode
);
167 LASSERTF(offset
< LUSTRE_MAX_OPCODES
,
168 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
169 offset
, LUSTRE_MAX_OPCODES
);
170 LASSERTF(ll_rpc_opcode_table
[offset
].opcode
== opcode
,
171 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
172 offset
, ll_rpc_opcode_table
[offset
].opcode
, opcode
);
173 return ll_rpc_opcode_table
[offset
].opname
;
176 static const char *ll_eopcode2str(__u32 opcode
)
178 LASSERT(ll_eopcode_table
[opcode
].opcode
== opcode
);
179 return ll_eopcode_table
[opcode
].opname
;
183 ptlrpc_ldebugfs_register(struct dentry
*root
, char *dir
,
185 struct dentry
**debugfs_root_ret
,
186 struct lprocfs_stats
**stats_ret
)
188 struct dentry
*svc_debugfs_entry
;
189 struct lprocfs_stats
*svc_stats
;
191 unsigned int svc_counter_config
= LPROCFS_CNTR_AVGMINMAX
|
194 LASSERT(!*debugfs_root_ret
);
195 LASSERT(!*stats_ret
);
197 svc_stats
= lprocfs_alloc_stats(EXTRA_MAX_OPCODES
+LUSTRE_MAX_OPCODES
,
203 svc_debugfs_entry
= ldebugfs_register(dir
, root
, NULL
, NULL
);
204 if (IS_ERR(svc_debugfs_entry
)) {
205 lprocfs_free_stats(&svc_stats
);
209 svc_debugfs_entry
= root
;
212 lprocfs_counter_init(svc_stats
, PTLRPC_REQWAIT_CNTR
,
213 svc_counter_config
, "req_waittime", "usec");
214 lprocfs_counter_init(svc_stats
, PTLRPC_REQQDEPTH_CNTR
,
215 svc_counter_config
, "req_qdepth", "reqs");
216 lprocfs_counter_init(svc_stats
, PTLRPC_REQACTIVE_CNTR
,
217 svc_counter_config
, "req_active", "reqs");
218 lprocfs_counter_init(svc_stats
, PTLRPC_TIMEOUT
,
219 svc_counter_config
, "req_timeout", "sec");
220 lprocfs_counter_init(svc_stats
, PTLRPC_REQBUF_AVAIL_CNTR
,
221 svc_counter_config
, "reqbuf_avail", "bufs");
222 for (i
= 0; i
< EXTRA_LAST_OPC
; i
++) {
226 case BRW_WRITE_BYTES
:
234 lprocfs_counter_init(svc_stats
, PTLRPC_LAST_CNTR
+ i
,
236 ll_eopcode2str(i
), units
);
238 for (i
= 0; i
< LUSTRE_MAX_OPCODES
; i
++) {
239 __u32 opcode
= ll_rpc_opcode_table
[i
].opcode
;
241 lprocfs_counter_init(svc_stats
,
242 EXTRA_MAX_OPCODES
+ i
, svc_counter_config
,
243 ll_opcode2str(opcode
), "usec");
246 rc
= ldebugfs_register_stats(svc_debugfs_entry
, name
, svc_stats
);
249 ldebugfs_remove(&svc_debugfs_entry
);
250 lprocfs_free_stats(&svc_stats
);
253 *debugfs_root_ret
= svc_debugfs_entry
;
254 *stats_ret
= svc_stats
;
259 ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file
*m
, void *v
)
261 struct ptlrpc_service
*svc
= m
->private;
262 struct ptlrpc_service_part
*svcpt
;
266 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
267 total
+= svcpt
->scp_hist_nrqbds
;
269 seq_printf(m
, "%d\n", total
);
273 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len
);
276 ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file
*m
, void *n
)
278 struct ptlrpc_service
*svc
= m
->private;
279 struct ptlrpc_service_part
*svcpt
;
283 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
284 total
+= svc
->srv_hist_nrqbds_cpt_max
;
286 seq_printf(m
, "%d\n", total
);
291 ptlrpc_lprocfs_req_history_max_seq_write(struct file
*file
,
292 const char __user
*buffer
,
293 size_t count
, loff_t
*off
)
295 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
300 rc
= lprocfs_write_helper(buffer
, count
, &val
);
307 /* This sanity check is more of an insanity check; we can still
308 * hose a kernel by allowing the request history to grow too
311 bufpages
= (svc
->srv_buf_size
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
312 if (val
> totalram_pages
/ (2 * bufpages
))
315 spin_lock(&svc
->srv_lock
);
318 svc
->srv_hist_nrqbds_cpt_max
= 0;
320 svc
->srv_hist_nrqbds_cpt_max
= max(1, (val
/ svc
->srv_ncpts
));
322 spin_unlock(&svc
->srv_lock
);
327 LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max
);
329 static ssize_t
threads_min_show(struct kobject
*kobj
, struct attribute
*attr
,
332 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
335 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
);
338 static ssize_t
threads_min_store(struct kobject
*kobj
, struct attribute
*attr
,
339 const char *buffer
, size_t count
)
341 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
344 int rc
= kstrtoul(buffer
, 10, &val
);
349 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
352 spin_lock(&svc
->srv_lock
);
353 if (val
> svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
) {
354 spin_unlock(&svc
->srv_lock
);
358 svc
->srv_nthrs_cpt_init
= val
/ svc
->srv_ncpts
;
360 spin_unlock(&svc
->srv_lock
);
364 LUSTRE_RW_ATTR(threads_min
);
366 static ssize_t
threads_started_show(struct kobject
*kobj
,
367 struct attribute
*attr
,
370 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
372 struct ptlrpc_service_part
*svcpt
;
376 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
377 total
+= svcpt
->scp_nthrs_running
;
379 return sprintf(buf
, "%d\n", total
);
381 LUSTRE_RO_ATTR(threads_started
);
383 static ssize_t
threads_max_show(struct kobject
*kobj
, struct attribute
*attr
,
386 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
389 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
);
392 static ssize_t
threads_max_store(struct kobject
*kobj
, struct attribute
*attr
,
393 const char *buffer
, size_t count
)
395 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
398 int rc
= kstrtoul(buffer
, 10, &val
);
403 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
406 spin_lock(&svc
->srv_lock
);
407 if (val
< svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
) {
408 spin_unlock(&svc
->srv_lock
);
412 svc
->srv_nthrs_cpt_limit
= val
/ svc
->srv_ncpts
;
414 spin_unlock(&svc
->srv_lock
);
418 LUSTRE_RW_ATTR(threads_max
);
426 * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
428 * \param[in] state The policy state
430 static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state
)
435 case NRS_POL_STATE_INVALID
:
437 case NRS_POL_STATE_STOPPED
:
439 case NRS_POL_STATE_STOPPING
:
441 case NRS_POL_STATE_STARTING
:
443 case NRS_POL_STATE_STARTED
:
449 * Obtains status information for \a policy.
451 * Information is copied in \a info.
453 * \param[in] policy The policy
454 * \param[out] info Holds returned status information
456 static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy
*policy
,
457 struct ptlrpc_nrs_pol_info
*info
)
459 assert_spin_locked(&policy
->pol_nrs
->nrs_lock
);
461 memcpy(info
->pi_name
, policy
->pol_desc
->pd_name
, NRS_POL_NAME_MAX
);
463 info
->pi_fallback
= !!(policy
->pol_flags
& PTLRPC_NRS_FL_FALLBACK
);
464 info
->pi_state
= policy
->pol_state
;
466 * XXX: These are accessed without holding
467 * ptlrpc_service_part::scp_req_lock.
469 info
->pi_req_queued
= policy
->pol_req_queued
;
470 info
->pi_req_started
= policy
->pol_req_started
;
474 * Reads and prints policy status information for all policies of a PTLRPC
477 static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file
*m
, void *n
)
479 struct ptlrpc_service
*svc
= m
->private;
480 struct ptlrpc_service_part
*svcpt
;
481 struct ptlrpc_nrs
*nrs
;
482 struct ptlrpc_nrs_policy
*policy
;
483 struct ptlrpc_nrs_pol_info
*infos
;
484 struct ptlrpc_nrs_pol_info tmp
;
486 unsigned pol_idx
= 0;
492 * Serialize NRS core lprocfs operations with policy registration/
495 mutex_lock(&nrs_core
.nrs_mutex
);
498 * Use the first service partition's regular NRS head in order to obtain
499 * the number of policies registered with NRS heads of this service. All
500 * service partitions will have the same number of policies.
502 nrs
= nrs_svcpt2nrs(svc
->srv_parts
[0], false);
504 spin_lock(&nrs
->nrs_lock
);
505 num_pols
= svc
->srv_parts
[0]->scp_nrs_reg
.nrs_num_pols
;
506 spin_unlock(&nrs
->nrs_lock
);
508 infos
= kcalloc(num_pols
, sizeof(*infos
), GFP_NOFS
);
515 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
516 nrs
= nrs_svcpt2nrs(svcpt
, hp
);
517 spin_lock(&nrs
->nrs_lock
);
521 list_for_each_entry(policy
, &nrs
->nrs_policy_list
, pol_list
) {
522 LASSERT(pol_idx
< num_pols
);
524 nrs_policy_get_info_locked(policy
, &tmp
);
526 * Copy values when handling the first service
530 memcpy(infos
[pol_idx
].pi_name
, tmp
.pi_name
,
532 memcpy(&infos
[pol_idx
].pi_state
, &tmp
.pi_state
,
533 sizeof(tmp
.pi_state
));
534 infos
[pol_idx
].pi_fallback
= tmp
.pi_fallback
;
536 * For the rest of the service partitions
537 * sanity-check the values we get.
540 LASSERT(strncmp(infos
[pol_idx
].pi_name
,
542 NRS_POL_NAME_MAX
) == 0);
544 * Not asserting ptlrpc_nrs_pol_info::pi_state,
545 * because it may be different between
546 * instances of the same policy in different
547 * service partitions.
549 LASSERT(infos
[pol_idx
].pi_fallback
==
553 infos
[pol_idx
].pi_req_queued
+= tmp
.pi_req_queued
;
554 infos
[pol_idx
].pi_req_started
+= tmp
.pi_req_started
;
558 spin_unlock(&nrs
->nrs_lock
);
562 * Policy status information output is in YAML format.
578 * high_priority_requests:
591 seq_printf(m
, "%s\n",
592 !hp
? "\nregular_requests:" : "high_priority_requests:");
594 for (pol_idx
= 0; pol_idx
< num_pols
; pol_idx
++) {
595 seq_printf(m
, " - name: %s\n"
599 " active: %-20d\n\n",
600 infos
[pol_idx
].pi_name
,
601 nrs_state2str(infos
[pol_idx
].pi_state
),
602 infos
[pol_idx
].pi_fallback
? "yes" : "no",
603 (int)infos
[pol_idx
].pi_req_queued
,
604 (int)infos
[pol_idx
].pi_req_started
);
607 if (!hp
&& nrs_svc_has_hp(svc
)) {
608 memset(infos
, 0, num_pols
* sizeof(*infos
));
611 * Redo the processing for the service's HP NRS heads' policies.
619 mutex_unlock(&nrs_core
.nrs_mutex
);
625 * The longest valid command string is the maximum policy name size, plus the
626 * length of the " reg" substring
628 #define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
631 * Starts and stops a given policy on a PTLRPC service.
633 * Commands consist of the policy name, followed by an optional [reg|hp] token;
634 * if the optional token is omitted, the operation is performed on both the
635 * regular and high-priority (if the service has one) NRS head.
637 static ssize_t
ptlrpc_lprocfs_nrs_seq_write(struct file
*file
,
638 const char __user
*buffer
,
639 size_t count
, loff_t
*off
)
641 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
642 enum ptlrpc_nrs_queue_type queue
= PTLRPC_NRS_QUEUE_BOTH
;
644 char *cmd_copy
= NULL
;
648 if (count
>= LPROCFS_NRS_WR_MAX_CMD
)
651 cmd
= kzalloc(LPROCFS_NRS_WR_MAX_CMD
, GFP_NOFS
);
655 * strsep() modifies its argument, so keep a copy
659 if (copy_from_user(cmd
, buffer
, count
)) {
666 token
= strsep(&cmd
, " ");
668 if (strlen(token
) > NRS_POL_NAME_MAX
- 1) {
674 * No [reg|hp] token has been specified
680 * The second token is either NULL, or an optional [reg|hp] string
682 if (strcmp(cmd
, "reg") == 0)
683 queue
= PTLRPC_NRS_QUEUE_REG
;
684 else if (strcmp(cmd
, "hp") == 0)
685 queue
= PTLRPC_NRS_QUEUE_HP
;
693 if (queue
== PTLRPC_NRS_QUEUE_HP
&& !nrs_svc_has_hp(svc
)) {
696 } else if (queue
== PTLRPC_NRS_QUEUE_BOTH
&& !nrs_svc_has_hp(svc
))
697 queue
= PTLRPC_NRS_QUEUE_REG
;
700 * Serialize NRS core lprocfs operations with policy registration/
703 mutex_lock(&nrs_core
.nrs_mutex
);
705 rc
= ptlrpc_nrs_policy_control(svc
, queue
, token
, PTLRPC_NRS_CTL_START
,
708 mutex_unlock(&nrs_core
.nrs_mutex
);
712 return rc
< 0 ? rc
: count
;
715 LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs
);
719 struct ptlrpc_srh_iterator
{
722 struct ptlrpc_request
*srhi_req
;
726 ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part
*svcpt
,
727 struct ptlrpc_srh_iterator
*srhi
,
731 struct ptlrpc_request
*req
;
733 if (srhi
->srhi_req
&& srhi
->srhi_seq
> svcpt
->scp_hist_seq_culled
&&
734 srhi
->srhi_seq
<= seq
) {
735 /* If srhi_req was set previously, hasn't been culled and
736 * we're searching for a seq on or after it (i.e. more
737 * recent), search from it onwards.
738 * Since the service history is LRU (i.e. culled reqs will
739 * be near the head), we shouldn't have to do long
742 LASSERTF(srhi
->srhi_seq
== srhi
->srhi_req
->rq_history_seq
,
743 "%s:%d: seek seq %llu, request seq %llu\n",
744 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
745 srhi
->srhi_seq
, srhi
->srhi_req
->rq_history_seq
);
746 LASSERTF(!list_empty(&svcpt
->scp_hist_reqs
),
747 "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
748 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
749 seq
, srhi
->srhi_seq
, svcpt
->scp_hist_seq_culled
);
750 e
= &srhi
->srhi_req
->rq_history_list
;
752 /* search from start */
753 e
= svcpt
->scp_hist_reqs
.next
;
756 while (e
!= &svcpt
->scp_hist_reqs
) {
757 req
= list_entry(e
, struct ptlrpc_request
, rq_history_list
);
759 if (req
->rq_history_seq
>= seq
) {
760 srhi
->srhi_seq
= req
->rq_history_seq
;
761 srhi
->srhi_req
= req
;
771 * ptlrpc history sequence is used as "position" of seq_file, in some case,
772 * seq_read() will increase "position" to indicate reading the next
773 * element, however, low bits of history sequence are reserved for CPT id
774 * (check the details from comments before ptlrpc_req_add_history), which
775 * means seq_read() might change CPT id of history sequence and never
776 * finish reading of requests on a CPT. To make it work, we have to shift
777 * CPT id to high bits and timestamp to low bits, so seq_read() will only
778 * increase timestamp which can correctly indicate the next position.
781 /* convert seq_file pos to cpt */
782 #define PTLRPC_REQ_POS2CPT(svc, pos) \
783 ((svc)->srv_cpt_bits == 0 ? 0 : \
784 (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
786 /* make up seq_file pos from cpt */
787 #define PTLRPC_REQ_CPT2POS(svc, cpt) \
788 ((svc)->srv_cpt_bits == 0 ? 0 : \
789 (cpt) << (64 - (svc)->srv_cpt_bits))
791 /* convert sequence to position */
792 #define PTLRPC_REQ_SEQ2POS(svc, seq) \
793 ((svc)->srv_cpt_bits == 0 ? (seq) : \
794 ((seq) >> (svc)->srv_cpt_bits) | \
795 ((seq) << (64 - (svc)->srv_cpt_bits)))
797 /* convert position to sequence */
798 #define PTLRPC_REQ_POS2SEQ(svc, pos) \
799 ((svc)->srv_cpt_bits == 0 ? (pos) : \
800 ((__u64)(pos) << (svc)->srv_cpt_bits) | \
801 ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
804 ptlrpc_lprocfs_svc_req_history_start(struct seq_file
*s
, loff_t
*pos
)
806 struct ptlrpc_service
*svc
= s
->private;
807 struct ptlrpc_service_part
*svcpt
;
808 struct ptlrpc_srh_iterator
*srhi
;
813 if (sizeof(loff_t
) != sizeof(__u64
)) { /* can't support */
814 CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
815 (int)sizeof(loff_t
));
819 srhi
= kzalloc(sizeof(*srhi
), GFP_NOFS
);
824 srhi
->srhi_req
= NULL
;
826 cpt
= PTLRPC_REQ_POS2CPT(svc
, *pos
);
828 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
829 if (i
< cpt
) /* skip */
831 if (i
> cpt
) /* make up the lowest position for this CPT */
832 *pos
= PTLRPC_REQ_CPT2POS(svc
, i
);
834 spin_lock(&svcpt
->scp_lock
);
835 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
,
836 PTLRPC_REQ_POS2SEQ(svc
, *pos
));
837 spin_unlock(&svcpt
->scp_lock
);
839 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
850 ptlrpc_lprocfs_svc_req_history_stop(struct seq_file
*s
, void *iter
)
852 struct ptlrpc_srh_iterator
*srhi
= iter
;
858 ptlrpc_lprocfs_svc_req_history_next(struct seq_file
*s
,
859 void *iter
, loff_t
*pos
)
861 struct ptlrpc_service
*svc
= s
->private;
862 struct ptlrpc_srh_iterator
*srhi
= iter
;
863 struct ptlrpc_service_part
*svcpt
;
868 for (i
= srhi
->srhi_idx
; i
< svc
->srv_ncpts
; i
++) {
869 svcpt
= svc
->srv_parts
[i
];
871 if (i
> srhi
->srhi_idx
) { /* reset iterator for a new CPT */
872 srhi
->srhi_req
= NULL
;
873 seq
= srhi
->srhi_seq
= 0;
874 } else { /* the next sequence */
875 seq
= srhi
->srhi_seq
+ (1 << svc
->srv_cpt_bits
);
878 spin_lock(&svcpt
->scp_lock
);
879 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, seq
);
880 spin_unlock(&svcpt
->scp_lock
);
882 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
892 static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file
*s
, void *iter
)
894 struct ptlrpc_service
*svc
= s
->private;
895 struct ptlrpc_srh_iterator
*srhi
= iter
;
896 struct ptlrpc_service_part
*svcpt
;
897 struct ptlrpc_request
*req
;
900 LASSERT(srhi
->srhi_idx
< svc
->srv_ncpts
);
902 svcpt
= svc
->srv_parts
[srhi
->srhi_idx
];
904 spin_lock(&svcpt
->scp_lock
);
906 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, srhi
->srhi_seq
);
909 char nidstr
[LNET_NIDSTR_SIZE
];
911 req
= srhi
->srhi_req
;
913 libcfs_nid2str_r(req
->rq_self
, nidstr
, sizeof(nidstr
));
914 /* Print common req fields.
915 * CAVEAT EMPTOR: we're racing with the service handler
916 * here. The request could contain any old crap, so you
917 * must be just as careful as the service's request
918 * parser. Currently I only print stuff here I know is OK
919 * to look at coz it was set up in request_in_callback()!!!
921 seq_printf(s
, "%lld:%s:%s:x%llu:%d:%s:%lld:%lds(%+lds) ",
922 req
->rq_history_seq
, nidstr
,
923 libcfs_id2str(req
->rq_peer
), req
->rq_xid
,
924 req
->rq_reqlen
, ptlrpc_rqphase2str(req
),
925 (s64
)req
->rq_arrival_time
.tv_sec
,
926 (long)(req
->rq_sent
- req
->rq_arrival_time
.tv_sec
),
927 (long)(req
->rq_sent
- req
->rq_deadline
));
928 if (!svc
->srv_ops
.so_req_printer
)
931 svc
->srv_ops
.so_req_printer(s
, srhi
->srhi_req
);
934 spin_unlock(&svcpt
->scp_lock
);
939 ptlrpc_lprocfs_svc_req_history_open(struct inode
*inode
, struct file
*file
)
941 static struct seq_operations sops
= {
942 .start
= ptlrpc_lprocfs_svc_req_history_start
,
943 .stop
= ptlrpc_lprocfs_svc_req_history_stop
,
944 .next
= ptlrpc_lprocfs_svc_req_history_next
,
945 .show
= ptlrpc_lprocfs_svc_req_history_show
,
947 struct seq_file
*seqf
;
950 rc
= seq_open(file
, &sops
);
954 seqf
= file
->private_data
;
955 seqf
->private = inode
->i_private
;
959 /* See also lprocfs_rd_timeouts */
960 static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file
*m
, void *n
)
962 struct ptlrpc_service
*svc
= m
->private;
963 struct ptlrpc_service_part
*svcpt
;
971 seq_printf(m
, "adaptive timeouts off, using obd_timeout %u\n",
976 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
977 cur
= at_get(&svcpt
->scp_at_estimate
);
978 worst
= svcpt
->scp_at_estimate
.at_worst_ever
;
979 worstt
= svcpt
->scp_at_estimate
.at_worst_time
;
980 s2dhms(&ts
, ktime_get_real_seconds() - worstt
);
982 seq_printf(m
, "%10s : cur %3u worst %3u (at %lld, "
983 DHMS_FMT
" ago) ", "service",
984 cur
, worst
, (s64
)worstt
, DHMS_VARS(&ts
));
986 lprocfs_at_hist_helper(m
, &svcpt
->scp_at_estimate
);
992 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts
);
994 static ssize_t
high_priority_ratio_show(struct kobject
*kobj
,
995 struct attribute
*attr
,
998 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1000 return sprintf(buf
, "%d\n", svc
->srv_hpreq_ratio
);
1003 static ssize_t
high_priority_ratio_store(struct kobject
*kobj
,
1004 struct attribute
*attr
,
1008 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1013 rc
= kstrtoint(buffer
, 10, &val
);
1020 spin_lock(&svc
->srv_lock
);
1021 svc
->srv_hpreq_ratio
= val
;
1022 spin_unlock(&svc
->srv_lock
);
1026 LUSTRE_RW_ATTR(high_priority_ratio
);
1028 static struct attribute
*ptlrpc_svc_attrs
[] = {
1029 &lustre_attr_threads_min
.attr
,
1030 &lustre_attr_threads_started
.attr
,
1031 &lustre_attr_threads_max
.attr
,
1032 &lustre_attr_high_priority_ratio
.attr
,
1036 static void ptlrpc_sysfs_svc_release(struct kobject
*kobj
)
1038 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1041 complete(&svc
->srv_kobj_unregister
);
1044 static struct kobj_type ptlrpc_svc_ktype
= {
1045 .default_attrs
= ptlrpc_svc_attrs
,
1046 .sysfs_ops
= &lustre_sysfs_ops
,
1047 .release
= ptlrpc_sysfs_svc_release
,
1050 void ptlrpc_sysfs_unregister_service(struct ptlrpc_service
*svc
)
1052 /* Let's see if we had a chance at initialization first */
1053 if (svc
->srv_kobj
.kset
) {
1054 kobject_put(&svc
->srv_kobj
);
1055 wait_for_completion(&svc
->srv_kobj_unregister
);
1059 int ptlrpc_sysfs_register_service(struct kset
*parent
,
1060 struct ptlrpc_service
*svc
)
1064 svc
->srv_kobj
.kset
= parent
;
1065 init_completion(&svc
->srv_kobj_unregister
);
1066 rc
= kobject_init_and_add(&svc
->srv_kobj
, &ptlrpc_svc_ktype
, NULL
,
1067 "%s", svc
->srv_name
);
1072 void ptlrpc_ldebugfs_register_service(struct dentry
*entry
,
1073 struct ptlrpc_service
*svc
)
1075 struct lprocfs_vars lproc_vars
[] = {
1076 {.name
= "req_buffer_history_len",
1077 .fops
= &ptlrpc_lprocfs_req_history_len_fops
,
1079 {.name
= "req_buffer_history_max",
1080 .fops
= &ptlrpc_lprocfs_req_history_max_fops
,
1082 {.name
= "timeouts",
1083 .fops
= &ptlrpc_lprocfs_timeouts_fops
,
1085 {.name
= "nrs_policies",
1086 .fops
= &ptlrpc_lprocfs_nrs_fops
,
1090 static const struct file_operations req_history_fops
= {
1091 .owner
= THIS_MODULE
,
1092 .open
= ptlrpc_lprocfs_svc_req_history_open
,
1094 .llseek
= seq_lseek
,
1095 .release
= lprocfs_seq_release
,
1100 ptlrpc_ldebugfs_register(entry
, svc
->srv_name
,
1101 "stats", &svc
->srv_debugfs_entry
,
1104 if (IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1107 ldebugfs_add_vars(svc
->srv_debugfs_entry
, lproc_vars
, NULL
);
1109 rc
= ldebugfs_seq_create(svc
->srv_debugfs_entry
, "req_history",
1110 0400, &req_history_fops
, svc
);
1112 CWARN("Error adding the req_history file\n");
1115 void ptlrpc_lprocfs_register_obd(struct obd_device
*obddev
)
1117 ptlrpc_ldebugfs_register(obddev
->obd_debugfs_entry
, NULL
, "stats",
1118 &obddev
->obd_svc_debugfs_entry
,
1119 &obddev
->obd_svc_stats
);
1121 EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd
);
1123 void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request
*req
, long amount
)
1125 struct lprocfs_stats
*svc_stats
;
1126 __u32 op
= lustre_msg_get_opc(req
->rq_reqmsg
);
1127 int opc
= opcode_offset(op
);
1129 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1130 if (!svc_stats
|| opc
<= 0)
1132 LASSERT(opc
< LUSTRE_MAX_OPCODES
);
1133 if (!(op
== LDLM_ENQUEUE
|| op
== MDS_REINT
))
1134 lprocfs_counter_add(svc_stats
, opc
+ EXTRA_MAX_OPCODES
, amount
);
1137 void ptlrpc_lprocfs_brw(struct ptlrpc_request
*req
, int bytes
)
1139 struct lprocfs_stats
*svc_stats
;
1142 if (!req
->rq_import
)
1144 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1147 idx
= lustre_msg_get_opc(req
->rq_reqmsg
);
1150 idx
= BRW_READ_BYTES
+ PTLRPC_LAST_CNTR
;
1153 idx
= BRW_WRITE_BYTES
+ PTLRPC_LAST_CNTR
;
1156 LASSERTF(0, "unsupported opcode %u\n", idx
);
1160 lprocfs_counter_add(svc_stats
, idx
, bytes
);
1163 EXPORT_SYMBOL(ptlrpc_lprocfs_brw
);
1165 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service
*svc
)
1167 if (!IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1168 ldebugfs_remove(&svc
->srv_debugfs_entry
);
1171 lprocfs_free_stats(&svc
->srv_stats
);
1174 void ptlrpc_lprocfs_unregister_obd(struct obd_device
*obd
)
1176 if (!IS_ERR_OR_NULL(obd
->obd_svc_debugfs_entry
))
1177 ldebugfs_remove(&obd
->obd_svc_debugfs_entry
);
1179 if (obd
->obd_svc_stats
)
1180 lprocfs_free_stats(&obd
->obd_svc_stats
);
1182 EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd
);
1186 int lprocfs_wr_ping(struct file
*file
, const char __user
*buffer
,
1187 size_t count
, loff_t
*off
)
1189 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1190 struct ptlrpc_request
*req
;
1193 rc
= lprocfs_climp_check(obd
);
1197 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
1198 up_read(&obd
->u
.cli
.cl_sem
);
1202 req
->rq_send_state
= LUSTRE_IMP_FULL
;
1204 rc
= ptlrpc_queue_wait(req
);
1206 ptlrpc_req_finished(req
);
1211 EXPORT_SYMBOL(lprocfs_wr_ping
);
1213 /* Write the connection UUID to this file to attempt to connect to that node.
1214 * The connection UUID is a node's primary NID. For example,
1215 * "echo connection=192.168.0.1@tcp0::instance > .../import".
1217 int lprocfs_wr_import(struct file
*file
, const char __user
*buffer
,
1218 size_t count
, loff_t
*off
)
1220 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1221 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1226 const char prefix
[] = "connection=";
1227 const int prefix_len
= sizeof(prefix
) - 1;
1229 if (count
> PAGE_SIZE
- 1 || count
<= prefix_len
)
1232 kbuf
= kzalloc(count
+ 1, GFP_NOFS
);
1236 if (copy_from_user(kbuf
, buffer
, count
)) {
1243 /* only support connection=uuid::instance now */
1244 if (strncmp(prefix
, kbuf
, prefix_len
) != 0) {
1249 uuid
= kbuf
+ prefix_len
;
1250 ptr
= strstr(uuid
, "::");
1257 ptr
+= strlen("::");
1258 inst
= simple_strtoul(ptr
, &endptr
, 10);
1260 CERROR("config: wrong instance # %s\n", ptr
);
1261 } else if (inst
!= imp
->imp_connect_data
.ocd_instance
) {
1262 CDEBUG(D_INFO
, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
1263 imp
->imp_obd
->obd_name
,
1264 imp
->imp_connect_data
.ocd_instance
, inst
);
1267 CDEBUG(D_INFO
, "IR: %s has already been connecting to new target(%u)\n",
1268 imp
->imp_obd
->obd_name
, inst
);
1273 ptlrpc_recover_import(imp
, uuid
, 1);
1279 EXPORT_SYMBOL(lprocfs_wr_import
);
1281 int lprocfs_rd_pinger_recov(struct seq_file
*m
, void *n
)
1283 struct obd_device
*obd
= m
->private;
1284 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1287 rc
= lprocfs_climp_check(obd
);
1291 seq_printf(m
, "%d\n", !imp
->imp_no_pinger_recover
);
1292 up_read(&obd
->u
.cli
.cl_sem
);
1296 EXPORT_SYMBOL(lprocfs_rd_pinger_recov
);
1298 int lprocfs_wr_pinger_recov(struct file
*file
, const char __user
*buffer
,
1299 size_t count
, loff_t
*off
)
1301 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1302 struct client_obd
*cli
= &obd
->u
.cli
;
1303 struct obd_import
*imp
= cli
->cl_import
;
1306 rc
= lprocfs_write_helper(buffer
, count
, &val
);
1310 if (val
!= 0 && val
!= 1)
1313 rc
= lprocfs_climp_check(obd
);
1317 spin_lock(&imp
->imp_lock
);
1318 imp
->imp_no_pinger_recover
= !val
;
1319 spin_unlock(&imp
->imp_lock
);
1320 up_read(&obd
->u
.cli
.cl_sem
);
1325 EXPORT_SYMBOL(lprocfs_wr_pinger_recov
);