4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 #define DEBUG_SUBSYSTEM S_CLASS
34 #include "../include/obd_support.h"
35 #include "../include/obd.h"
36 #include "../include/lprocfs_status.h"
37 #include "../include/lustre/lustre_idl.h"
38 #include "../include/lustre_net.h"
39 #include "../include/obd_class.h"
40 #include "ptlrpc_internal.h"
42 static struct ll_rpc_opcode
{
45 } ll_rpc_opcode_table
[LUSTRE_MAX_OPCODES
] = {
46 { OST_REPLY
, "ost_reply" },
47 { OST_GETATTR
, "ost_getattr" },
48 { OST_SETATTR
, "ost_setattr" },
49 { OST_READ
, "ost_read" },
50 { OST_WRITE
, "ost_write" },
51 { OST_CREATE
, "ost_create" },
52 { OST_DESTROY
, "ost_destroy" },
53 { OST_GET_INFO
, "ost_get_info" },
54 { OST_CONNECT
, "ost_connect" },
55 { OST_DISCONNECT
, "ost_disconnect" },
56 { OST_PUNCH
, "ost_punch" },
57 { OST_OPEN
, "ost_open" },
58 { OST_CLOSE
, "ost_close" },
59 { OST_STATFS
, "ost_statfs" },
60 { 14, NULL
}, /* formerly OST_SAN_READ */
61 { 15, NULL
}, /* formerly OST_SAN_WRITE */
62 { OST_SYNC
, "ost_sync" },
63 { OST_SET_INFO
, "ost_set_info" },
64 { OST_QUOTACHECK
, "ost_quotacheck" },
65 { OST_QUOTACTL
, "ost_quotactl" },
66 { OST_QUOTA_ADJUST_QUNIT
, "ost_quota_adjust_qunit" },
67 { MDS_GETATTR
, "mds_getattr" },
68 { MDS_GETATTR_NAME
, "mds_getattr_lock" },
69 { MDS_CLOSE
, "mds_close" },
70 { MDS_REINT
, "mds_reint" },
71 { MDS_READPAGE
, "mds_readpage" },
72 { MDS_CONNECT
, "mds_connect" },
73 { MDS_DISCONNECT
, "mds_disconnect" },
74 { MDS_GETSTATUS
, "mds_getstatus" },
75 { MDS_STATFS
, "mds_statfs" },
76 { MDS_PIN
, "mds_pin" },
77 { MDS_UNPIN
, "mds_unpin" },
78 { MDS_SYNC
, "mds_sync" },
79 { MDS_DONE_WRITING
, "mds_done_writing" },
80 { MDS_SET_INFO
, "mds_set_info" },
81 { MDS_QUOTACHECK
, "mds_quotacheck" },
82 { MDS_QUOTACTL
, "mds_quotactl" },
83 { MDS_GETXATTR
, "mds_getxattr" },
84 { MDS_SETXATTR
, "mds_setxattr" },
85 { MDS_WRITEPAGE
, "mds_writepage" },
86 { MDS_IS_SUBDIR
, "mds_is_subdir" },
87 { MDS_GET_INFO
, "mds_get_info" },
88 { MDS_HSM_STATE_GET
, "mds_hsm_state_get" },
89 { MDS_HSM_STATE_SET
, "mds_hsm_state_set" },
90 { MDS_HSM_ACTION
, "mds_hsm_action" },
91 { MDS_HSM_PROGRESS
, "mds_hsm_progress" },
92 { MDS_HSM_REQUEST
, "mds_hsm_request" },
93 { MDS_HSM_CT_REGISTER
, "mds_hsm_ct_register" },
94 { MDS_HSM_CT_UNREGISTER
, "mds_hsm_ct_unregister" },
95 { MDS_SWAP_LAYOUTS
, "mds_swap_layouts" },
96 { LDLM_ENQUEUE
, "ldlm_enqueue" },
97 { LDLM_CONVERT
, "ldlm_convert" },
98 { LDLM_CANCEL
, "ldlm_cancel" },
99 { LDLM_BL_CALLBACK
, "ldlm_bl_callback" },
100 { LDLM_CP_CALLBACK
, "ldlm_cp_callback" },
101 { LDLM_GL_CALLBACK
, "ldlm_gl_callback" },
102 { LDLM_SET_INFO
, "ldlm_set_info" },
103 { MGS_CONNECT
, "mgs_connect" },
104 { MGS_DISCONNECT
, "mgs_disconnect" },
105 { MGS_EXCEPTION
, "mgs_exception" },
106 { MGS_TARGET_REG
, "mgs_target_reg" },
107 { MGS_TARGET_DEL
, "mgs_target_del" },
108 { MGS_SET_INFO
, "mgs_set_info" },
109 { MGS_CONFIG_READ
, "mgs_config_read" },
110 { OBD_PING
, "obd_ping" },
111 { OBD_LOG_CANCEL
, "llog_cancel" },
112 { OBD_QC_CALLBACK
, "obd_quota_callback" },
113 { OBD_IDX_READ
, "dt_index_read" },
114 { LLOG_ORIGIN_HANDLE_CREATE
, "llog_origin_handle_open" },
115 { LLOG_ORIGIN_HANDLE_NEXT_BLOCK
, "llog_origin_handle_next_block" },
116 { LLOG_ORIGIN_HANDLE_READ_HEADER
, "llog_origin_handle_read_header" },
117 { LLOG_ORIGIN_HANDLE_WRITE_REC
, "llog_origin_handle_write_rec" },
118 { LLOG_ORIGIN_HANDLE_CLOSE
, "llog_origin_handle_close" },
119 { LLOG_ORIGIN_CONNECT
, "llog_origin_connect" },
120 { LLOG_CATINFO
, "llog_catinfo" },
121 { LLOG_ORIGIN_HANDLE_PREV_BLOCK
, "llog_origin_handle_prev_block" },
122 { LLOG_ORIGIN_HANDLE_DESTROY
, "llog_origin_handle_destroy" },
123 { QUOTA_DQACQ
, "quota_acquire" },
124 { QUOTA_DQREL
, "quota_release" },
125 { SEQ_QUERY
, "seq_query" },
126 { SEC_CTX_INIT
, "sec_ctx_init" },
127 { SEC_CTX_INIT_CONT
, "sec_ctx_init_cont" },
128 { SEC_CTX_FINI
, "sec_ctx_fini" },
129 { FLD_QUERY
, "fld_query" },
130 { FLD_READ
, "fld_read" },
133 static struct ll_eopcode
{
136 } ll_eopcode_table
[EXTRA_LAST_OPC
] = {
137 { LDLM_GLIMPSE_ENQUEUE
, "ldlm_glimpse_enqueue" },
138 { LDLM_PLAIN_ENQUEUE
, "ldlm_plain_enqueue" },
139 { LDLM_EXTENT_ENQUEUE
, "ldlm_extent_enqueue" },
140 { LDLM_FLOCK_ENQUEUE
, "ldlm_flock_enqueue" },
141 { LDLM_IBITS_ENQUEUE
, "ldlm_ibits_enqueue" },
142 { MDS_REINT_SETATTR
, "mds_reint_setattr" },
143 { MDS_REINT_CREATE
, "mds_reint_create" },
144 { MDS_REINT_LINK
, "mds_reint_link" },
145 { MDS_REINT_UNLINK
, "mds_reint_unlink" },
146 { MDS_REINT_RENAME
, "mds_reint_rename" },
147 { MDS_REINT_OPEN
, "mds_reint_open" },
148 { MDS_REINT_SETXATTR
, "mds_reint_setxattr" },
149 { BRW_READ_BYTES
, "read_bytes" },
150 { BRW_WRITE_BYTES
, "write_bytes" },
153 const char *ll_opcode2str(__u32 opcode
)
155 /* When one of the assertions below fail, chances are that:
156 * 1) A new opcode was added in include/lustre/lustre_idl.h,
157 * but is missing from the table above.
158 * or 2) The opcode space was renumbered or rearranged,
159 * and the opcode_offset() function in
160 * ptlrpc_internal.h needs to be modified.
162 __u32 offset
= opcode_offset(opcode
);
164 LASSERTF(offset
< LUSTRE_MAX_OPCODES
,
165 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
166 offset
, LUSTRE_MAX_OPCODES
);
167 LASSERTF(ll_rpc_opcode_table
[offset
].opcode
== opcode
,
168 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
169 offset
, ll_rpc_opcode_table
[offset
].opcode
, opcode
);
170 return ll_rpc_opcode_table
[offset
].opname
;
173 static const char *ll_eopcode2str(__u32 opcode
)
175 LASSERT(ll_eopcode_table
[opcode
].opcode
== opcode
);
176 return ll_eopcode_table
[opcode
].opname
;
180 ptlrpc_ldebugfs_register(struct dentry
*root
, char *dir
,
182 struct dentry
**debugfs_root_ret
,
183 struct lprocfs_stats
**stats_ret
)
185 struct dentry
*svc_debugfs_entry
;
186 struct lprocfs_stats
*svc_stats
;
188 unsigned int svc_counter_config
= LPROCFS_CNTR_AVGMINMAX
|
191 LASSERT(!*debugfs_root_ret
);
192 LASSERT(!*stats_ret
);
194 svc_stats
= lprocfs_alloc_stats(EXTRA_MAX_OPCODES
+LUSTRE_MAX_OPCODES
,
200 svc_debugfs_entry
= ldebugfs_register(dir
, root
, NULL
, NULL
);
201 if (IS_ERR(svc_debugfs_entry
)) {
202 lprocfs_free_stats(&svc_stats
);
206 svc_debugfs_entry
= root
;
209 lprocfs_counter_init(svc_stats
, PTLRPC_REQWAIT_CNTR
,
210 svc_counter_config
, "req_waittime", "usec");
211 lprocfs_counter_init(svc_stats
, PTLRPC_REQQDEPTH_CNTR
,
212 svc_counter_config
, "req_qdepth", "reqs");
213 lprocfs_counter_init(svc_stats
, PTLRPC_REQACTIVE_CNTR
,
214 svc_counter_config
, "req_active", "reqs");
215 lprocfs_counter_init(svc_stats
, PTLRPC_TIMEOUT
,
216 svc_counter_config
, "req_timeout", "sec");
217 lprocfs_counter_init(svc_stats
, PTLRPC_REQBUF_AVAIL_CNTR
,
218 svc_counter_config
, "reqbuf_avail", "bufs");
219 for (i
= 0; i
< EXTRA_LAST_OPC
; i
++) {
223 case BRW_WRITE_BYTES
:
231 lprocfs_counter_init(svc_stats
, PTLRPC_LAST_CNTR
+ i
,
233 ll_eopcode2str(i
), units
);
235 for (i
= 0; i
< LUSTRE_MAX_OPCODES
; i
++) {
236 __u32 opcode
= ll_rpc_opcode_table
[i
].opcode
;
238 lprocfs_counter_init(svc_stats
,
239 EXTRA_MAX_OPCODES
+ i
, svc_counter_config
,
240 ll_opcode2str(opcode
), "usec");
243 rc
= ldebugfs_register_stats(svc_debugfs_entry
, name
, svc_stats
);
246 ldebugfs_remove(&svc_debugfs_entry
);
247 lprocfs_free_stats(&svc_stats
);
250 *debugfs_root_ret
= svc_debugfs_entry
;
251 *stats_ret
= svc_stats
;
256 ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file
*m
, void *v
)
258 struct ptlrpc_service
*svc
= m
->private;
259 struct ptlrpc_service_part
*svcpt
;
263 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
264 total
+= svcpt
->scp_hist_nrqbds
;
266 seq_printf(m
, "%d\n", total
);
270 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len
);
273 ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file
*m
, void *n
)
275 struct ptlrpc_service
*svc
= m
->private;
276 struct ptlrpc_service_part
*svcpt
;
280 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
281 total
+= svc
->srv_hist_nrqbds_cpt_max
;
283 seq_printf(m
, "%d\n", total
);
288 ptlrpc_lprocfs_req_history_max_seq_write(struct file
*file
,
289 const char __user
*buffer
,
290 size_t count
, loff_t
*off
)
292 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
297 rc
= lprocfs_write_helper(buffer
, count
, &val
);
304 /* This sanity check is more of an insanity check; we can still
305 * hose a kernel by allowing the request history to grow too
308 bufpages
= (svc
->srv_buf_size
+ PAGE_SIZE
- 1) >> PAGE_SHIFT
;
309 if (val
> totalram_pages
/ (2 * bufpages
))
312 spin_lock(&svc
->srv_lock
);
315 svc
->srv_hist_nrqbds_cpt_max
= 0;
317 svc
->srv_hist_nrqbds_cpt_max
= max(1, (val
/ svc
->srv_ncpts
));
319 spin_unlock(&svc
->srv_lock
);
324 LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max
);
326 static ssize_t
threads_min_show(struct kobject
*kobj
, struct attribute
*attr
,
329 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
332 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
);
335 static ssize_t
threads_min_store(struct kobject
*kobj
, struct attribute
*attr
,
336 const char *buffer
, size_t count
)
338 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
341 int rc
= kstrtoul(buffer
, 10, &val
);
346 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
349 spin_lock(&svc
->srv_lock
);
350 if (val
> svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
) {
351 spin_unlock(&svc
->srv_lock
);
355 svc
->srv_nthrs_cpt_init
= val
/ svc
->srv_ncpts
;
357 spin_unlock(&svc
->srv_lock
);
361 LUSTRE_RW_ATTR(threads_min
);
363 static ssize_t
threads_started_show(struct kobject
*kobj
,
364 struct attribute
*attr
,
367 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
369 struct ptlrpc_service_part
*svcpt
;
373 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
374 total
+= svcpt
->scp_nthrs_running
;
376 return sprintf(buf
, "%d\n", total
);
378 LUSTRE_RO_ATTR(threads_started
);
380 static ssize_t
threads_max_show(struct kobject
*kobj
, struct attribute
*attr
,
383 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
386 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
);
389 static ssize_t
threads_max_store(struct kobject
*kobj
, struct attribute
*attr
,
390 const char *buffer
, size_t count
)
392 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
395 int rc
= kstrtoul(buffer
, 10, &val
);
400 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
403 spin_lock(&svc
->srv_lock
);
404 if (val
< svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
) {
405 spin_unlock(&svc
->srv_lock
);
409 svc
->srv_nthrs_cpt_limit
= val
/ svc
->srv_ncpts
;
411 spin_unlock(&svc
->srv_lock
);
415 LUSTRE_RW_ATTR(threads_max
);
423 * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
425 * \param[in] state The policy state
427 static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state
)
432 case NRS_POL_STATE_INVALID
:
434 case NRS_POL_STATE_STOPPED
:
436 case NRS_POL_STATE_STOPPING
:
438 case NRS_POL_STATE_STARTING
:
440 case NRS_POL_STATE_STARTED
:
446 * Obtains status information for \a policy.
448 * Information is copied in \a info.
450 * \param[in] policy The policy
451 * \param[out] info Holds returned status information
453 static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy
*policy
,
454 struct ptlrpc_nrs_pol_info
*info
)
456 assert_spin_locked(&policy
->pol_nrs
->nrs_lock
);
458 memcpy(info
->pi_name
, policy
->pol_desc
->pd_name
, NRS_POL_NAME_MAX
);
460 info
->pi_fallback
= !!(policy
->pol_flags
& PTLRPC_NRS_FL_FALLBACK
);
461 info
->pi_state
= policy
->pol_state
;
463 * XXX: These are accessed without holding
464 * ptlrpc_service_part::scp_req_lock.
466 info
->pi_req_queued
= policy
->pol_req_queued
;
467 info
->pi_req_started
= policy
->pol_req_started
;
471 * Reads and prints policy status information for all policies of a PTLRPC
474 static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file
*m
, void *n
)
476 struct ptlrpc_service
*svc
= m
->private;
477 struct ptlrpc_service_part
*svcpt
;
478 struct ptlrpc_nrs
*nrs
;
479 struct ptlrpc_nrs_policy
*policy
;
480 struct ptlrpc_nrs_pol_info
*infos
;
481 struct ptlrpc_nrs_pol_info tmp
;
483 unsigned pol_idx
= 0;
489 * Serialize NRS core lprocfs operations with policy registration/
492 mutex_lock(&nrs_core
.nrs_mutex
);
495 * Use the first service partition's regular NRS head in order to obtain
496 * the number of policies registered with NRS heads of this service. All
497 * service partitions will have the same number of policies.
499 nrs
= nrs_svcpt2nrs(svc
->srv_parts
[0], false);
501 spin_lock(&nrs
->nrs_lock
);
502 num_pols
= svc
->srv_parts
[0]->scp_nrs_reg
.nrs_num_pols
;
503 spin_unlock(&nrs
->nrs_lock
);
505 infos
= kcalloc(num_pols
, sizeof(*infos
), GFP_NOFS
);
512 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
513 nrs
= nrs_svcpt2nrs(svcpt
, hp
);
514 spin_lock(&nrs
->nrs_lock
);
518 list_for_each_entry(policy
, &nrs
->nrs_policy_list
, pol_list
) {
519 LASSERT(pol_idx
< num_pols
);
521 nrs_policy_get_info_locked(policy
, &tmp
);
523 * Copy values when handling the first service
527 memcpy(infos
[pol_idx
].pi_name
, tmp
.pi_name
,
529 memcpy(&infos
[pol_idx
].pi_state
, &tmp
.pi_state
,
530 sizeof(tmp
.pi_state
));
531 infos
[pol_idx
].pi_fallback
= tmp
.pi_fallback
;
533 * For the rest of the service partitions
534 * sanity-check the values we get.
537 LASSERT(strncmp(infos
[pol_idx
].pi_name
,
539 NRS_POL_NAME_MAX
) == 0);
541 * Not asserting ptlrpc_nrs_pol_info::pi_state,
542 * because it may be different between
543 * instances of the same policy in different
544 * service partitions.
546 LASSERT(infos
[pol_idx
].pi_fallback
==
550 infos
[pol_idx
].pi_req_queued
+= tmp
.pi_req_queued
;
551 infos
[pol_idx
].pi_req_started
+= tmp
.pi_req_started
;
555 spin_unlock(&nrs
->nrs_lock
);
559 * Policy status information output is in YAML format.
575 * high_priority_requests:
588 seq_printf(m
, "%s\n",
589 !hp
? "\nregular_requests:" : "high_priority_requests:");
591 for (pol_idx
= 0; pol_idx
< num_pols
; pol_idx
++) {
592 seq_printf(m
, " - name: %s\n"
596 " active: %-20d\n\n",
597 infos
[pol_idx
].pi_name
,
598 nrs_state2str(infos
[pol_idx
].pi_state
),
599 infos
[pol_idx
].pi_fallback
? "yes" : "no",
600 (int)infos
[pol_idx
].pi_req_queued
,
601 (int)infos
[pol_idx
].pi_req_started
);
604 if (!hp
&& nrs_svc_has_hp(svc
)) {
605 memset(infos
, 0, num_pols
* sizeof(*infos
));
608 * Redo the processing for the service's HP NRS heads' policies.
616 mutex_unlock(&nrs_core
.nrs_mutex
);
622 * The longest valid command string is the maximum policy name size, plus the
623 * length of the " reg" substring
625 #define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
628 * Starts and stops a given policy on a PTLRPC service.
630 * Commands consist of the policy name, followed by an optional [reg|hp] token;
631 * if the optional token is omitted, the operation is performed on both the
632 * regular and high-priority (if the service has one) NRS head.
634 static ssize_t
ptlrpc_lprocfs_nrs_seq_write(struct file
*file
,
635 const char __user
*buffer
,
636 size_t count
, loff_t
*off
)
638 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
639 enum ptlrpc_nrs_queue_type queue
= PTLRPC_NRS_QUEUE_BOTH
;
641 char *cmd_copy
= NULL
;
645 if (count
>= LPROCFS_NRS_WR_MAX_CMD
)
648 cmd
= kzalloc(LPROCFS_NRS_WR_MAX_CMD
, GFP_NOFS
);
652 * strsep() modifies its argument, so keep a copy
656 if (copy_from_user(cmd
, buffer
, count
)) {
663 token
= strsep(&cmd
, " ");
665 if (strlen(token
) > NRS_POL_NAME_MAX
- 1) {
671 * No [reg|hp] token has been specified
677 * The second token is either NULL, or an optional [reg|hp] string
679 if (strcmp(cmd
, "reg") == 0) {
680 queue
= PTLRPC_NRS_QUEUE_REG
;
681 } else if (strcmp(cmd
, "hp") == 0) {
682 queue
= PTLRPC_NRS_QUEUE_HP
;
690 if (queue
== PTLRPC_NRS_QUEUE_HP
&& !nrs_svc_has_hp(svc
)) {
693 } else if (queue
== PTLRPC_NRS_QUEUE_BOTH
&& !nrs_svc_has_hp(svc
)) {
694 queue
= PTLRPC_NRS_QUEUE_REG
;
698 * Serialize NRS core lprocfs operations with policy registration/
701 mutex_lock(&nrs_core
.nrs_mutex
);
703 rc
= ptlrpc_nrs_policy_control(svc
, queue
, token
, PTLRPC_NRS_CTL_START
,
706 mutex_unlock(&nrs_core
.nrs_mutex
);
710 return rc
< 0 ? rc
: count
;
713 LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs
);
717 struct ptlrpc_srh_iterator
{
720 struct ptlrpc_request
*srhi_req
;
724 ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part
*svcpt
,
725 struct ptlrpc_srh_iterator
*srhi
,
729 struct ptlrpc_request
*req
;
731 if (srhi
->srhi_req
&& srhi
->srhi_seq
> svcpt
->scp_hist_seq_culled
&&
732 srhi
->srhi_seq
<= seq
) {
733 /* If srhi_req was set previously, hasn't been culled and
734 * we're searching for a seq on or after it (i.e. more
735 * recent), search from it onwards.
736 * Since the service history is LRU (i.e. culled reqs will
737 * be near the head), we shouldn't have to do long
740 LASSERTF(srhi
->srhi_seq
== srhi
->srhi_req
->rq_history_seq
,
741 "%s:%d: seek seq %llu, request seq %llu\n",
742 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
743 srhi
->srhi_seq
, srhi
->srhi_req
->rq_history_seq
);
744 LASSERTF(!list_empty(&svcpt
->scp_hist_reqs
),
745 "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
746 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
747 seq
, srhi
->srhi_seq
, svcpt
->scp_hist_seq_culled
);
748 e
= &srhi
->srhi_req
->rq_history_list
;
750 /* search from start */
751 e
= svcpt
->scp_hist_reqs
.next
;
754 while (e
!= &svcpt
->scp_hist_reqs
) {
755 req
= list_entry(e
, struct ptlrpc_request
, rq_history_list
);
757 if (req
->rq_history_seq
>= seq
) {
758 srhi
->srhi_seq
= req
->rq_history_seq
;
759 srhi
->srhi_req
= req
;
769 * ptlrpc history sequence is used as "position" of seq_file, in some case,
770 * seq_read() will increase "position" to indicate reading the next
771 * element, however, low bits of history sequence are reserved for CPT id
772 * (check the details from comments before ptlrpc_req_add_history), which
773 * means seq_read() might change CPT id of history sequence and never
774 * finish reading of requests on a CPT. To make it work, we have to shift
775 * CPT id to high bits and timestamp to low bits, so seq_read() will only
776 * increase timestamp which can correctly indicate the next position.
779 /* convert seq_file pos to cpt */
780 #define PTLRPC_REQ_POS2CPT(svc, pos) \
781 ((svc)->srv_cpt_bits == 0 ? 0 : \
782 (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
784 /* make up seq_file pos from cpt */
785 #define PTLRPC_REQ_CPT2POS(svc, cpt) \
786 ((svc)->srv_cpt_bits == 0 ? 0 : \
787 (cpt) << (64 - (svc)->srv_cpt_bits))
789 /* convert sequence to position */
790 #define PTLRPC_REQ_SEQ2POS(svc, seq) \
791 ((svc)->srv_cpt_bits == 0 ? (seq) : \
792 ((seq) >> (svc)->srv_cpt_bits) | \
793 ((seq) << (64 - (svc)->srv_cpt_bits)))
795 /* convert position to sequence */
796 #define PTLRPC_REQ_POS2SEQ(svc, pos) \
797 ((svc)->srv_cpt_bits == 0 ? (pos) : \
798 ((__u64)(pos) << (svc)->srv_cpt_bits) | \
799 ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
802 ptlrpc_lprocfs_svc_req_history_start(struct seq_file
*s
, loff_t
*pos
)
804 struct ptlrpc_service
*svc
= s
->private;
805 struct ptlrpc_service_part
*svcpt
;
806 struct ptlrpc_srh_iterator
*srhi
;
811 if (sizeof(loff_t
) != sizeof(__u64
)) { /* can't support */
812 CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
813 (int)sizeof(loff_t
));
817 srhi
= kzalloc(sizeof(*srhi
), GFP_NOFS
);
822 srhi
->srhi_req
= NULL
;
824 cpt
= PTLRPC_REQ_POS2CPT(svc
, *pos
);
826 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
827 if (i
< cpt
) /* skip */
829 if (i
> cpt
) /* make up the lowest position for this CPT */
830 *pos
= PTLRPC_REQ_CPT2POS(svc
, i
);
832 spin_lock(&svcpt
->scp_lock
);
833 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
,
834 PTLRPC_REQ_POS2SEQ(svc
, *pos
));
835 spin_unlock(&svcpt
->scp_lock
);
837 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
848 ptlrpc_lprocfs_svc_req_history_stop(struct seq_file
*s
, void *iter
)
850 struct ptlrpc_srh_iterator
*srhi
= iter
;
856 ptlrpc_lprocfs_svc_req_history_next(struct seq_file
*s
,
857 void *iter
, loff_t
*pos
)
859 struct ptlrpc_service
*svc
= s
->private;
860 struct ptlrpc_srh_iterator
*srhi
= iter
;
861 struct ptlrpc_service_part
*svcpt
;
866 for (i
= srhi
->srhi_idx
; i
< svc
->srv_ncpts
; i
++) {
867 svcpt
= svc
->srv_parts
[i
];
869 if (i
> srhi
->srhi_idx
) { /* reset iterator for a new CPT */
870 srhi
->srhi_req
= NULL
;
873 } else { /* the next sequence */
874 seq
= srhi
->srhi_seq
+ (1 << svc
->srv_cpt_bits
);
877 spin_lock(&svcpt
->scp_lock
);
878 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, seq
);
879 spin_unlock(&svcpt
->scp_lock
);
881 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
891 static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file
*s
, void *iter
)
893 struct ptlrpc_service
*svc
= s
->private;
894 struct ptlrpc_srh_iterator
*srhi
= iter
;
895 struct ptlrpc_service_part
*svcpt
;
896 struct ptlrpc_request
*req
;
899 LASSERT(srhi
->srhi_idx
< svc
->srv_ncpts
);
901 svcpt
= svc
->srv_parts
[srhi
->srhi_idx
];
903 spin_lock(&svcpt
->scp_lock
);
905 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, srhi
->srhi_seq
);
908 char nidstr
[LNET_NIDSTR_SIZE
];
910 req
= srhi
->srhi_req
;
912 libcfs_nid2str_r(req
->rq_self
, nidstr
, sizeof(nidstr
));
913 /* Print common req fields.
914 * CAVEAT EMPTOR: we're racing with the service handler
915 * here. The request could contain any old crap, so you
916 * must be just as careful as the service's request
917 * parser. Currently I only print stuff here I know is OK
918 * to look at coz it was set up in request_in_callback()!!!
920 seq_printf(s
, "%lld:%s:%s:x%llu:%d:%s:%lld:%lds(%+lds) ",
921 req
->rq_history_seq
, nidstr
,
922 libcfs_id2str(req
->rq_peer
), req
->rq_xid
,
923 req
->rq_reqlen
, ptlrpc_rqphase2str(req
),
924 (s64
)req
->rq_arrival_time
.tv_sec
,
925 (long)(req
->rq_sent
- req
->rq_arrival_time
.tv_sec
),
926 (long)(req
->rq_sent
- req
->rq_deadline
));
927 if (!svc
->srv_ops
.so_req_printer
)
930 svc
->srv_ops
.so_req_printer(s
, srhi
->srhi_req
);
933 spin_unlock(&svcpt
->scp_lock
);
938 ptlrpc_lprocfs_svc_req_history_open(struct inode
*inode
, struct file
*file
)
940 static struct seq_operations sops
= {
941 .start
= ptlrpc_lprocfs_svc_req_history_start
,
942 .stop
= ptlrpc_lprocfs_svc_req_history_stop
,
943 .next
= ptlrpc_lprocfs_svc_req_history_next
,
944 .show
= ptlrpc_lprocfs_svc_req_history_show
,
946 struct seq_file
*seqf
;
949 rc
= seq_open(file
, &sops
);
953 seqf
= file
->private_data
;
954 seqf
->private = inode
->i_private
;
958 /* See also lprocfs_rd_timeouts */
959 static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file
*m
, void *n
)
961 struct ptlrpc_service
*svc
= m
->private;
962 struct ptlrpc_service_part
*svcpt
;
970 seq_printf(m
, "adaptive timeouts off, using obd_timeout %u\n",
975 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
976 cur
= at_get(&svcpt
->scp_at_estimate
);
977 worst
= svcpt
->scp_at_estimate
.at_worst_ever
;
978 worstt
= svcpt
->scp_at_estimate
.at_worst_time
;
979 s2dhms(&ts
, ktime_get_real_seconds() - worstt
);
981 seq_printf(m
, "%10s : cur %3u worst %3u (at %lld, "
982 DHMS_FMT
" ago) ", "service",
983 cur
, worst
, (s64
)worstt
, DHMS_VARS(&ts
));
985 lprocfs_at_hist_helper(m
, &svcpt
->scp_at_estimate
);
991 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts
);
993 static ssize_t
high_priority_ratio_show(struct kobject
*kobj
,
994 struct attribute
*attr
,
997 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
999 return sprintf(buf
, "%d\n", svc
->srv_hpreq_ratio
);
1002 static ssize_t
high_priority_ratio_store(struct kobject
*kobj
,
1003 struct attribute
*attr
,
1007 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1012 rc
= kstrtoint(buffer
, 10, &val
);
1019 spin_lock(&svc
->srv_lock
);
1020 svc
->srv_hpreq_ratio
= val
;
1021 spin_unlock(&svc
->srv_lock
);
1025 LUSTRE_RW_ATTR(high_priority_ratio
);
1027 static struct attribute
*ptlrpc_svc_attrs
[] = {
1028 &lustre_attr_threads_min
.attr
,
1029 &lustre_attr_threads_started
.attr
,
1030 &lustre_attr_threads_max
.attr
,
1031 &lustre_attr_high_priority_ratio
.attr
,
1035 static void ptlrpc_sysfs_svc_release(struct kobject
*kobj
)
1037 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1040 complete(&svc
->srv_kobj_unregister
);
1043 static struct kobj_type ptlrpc_svc_ktype
= {
1044 .default_attrs
= ptlrpc_svc_attrs
,
1045 .sysfs_ops
= &lustre_sysfs_ops
,
1046 .release
= ptlrpc_sysfs_svc_release
,
1049 void ptlrpc_sysfs_unregister_service(struct ptlrpc_service
*svc
)
1051 /* Let's see if we had a chance at initialization first */
1052 if (svc
->srv_kobj
.kset
) {
1053 kobject_put(&svc
->srv_kobj
);
1054 wait_for_completion(&svc
->srv_kobj_unregister
);
1058 int ptlrpc_sysfs_register_service(struct kset
*parent
,
1059 struct ptlrpc_service
*svc
)
1063 svc
->srv_kobj
.kset
= parent
;
1064 init_completion(&svc
->srv_kobj_unregister
);
1065 rc
= kobject_init_and_add(&svc
->srv_kobj
, &ptlrpc_svc_ktype
, NULL
,
1066 "%s", svc
->srv_name
);
1071 void ptlrpc_ldebugfs_register_service(struct dentry
*entry
,
1072 struct ptlrpc_service
*svc
)
1074 struct lprocfs_vars lproc_vars
[] = {
1075 {.name
= "req_buffer_history_len",
1076 .fops
= &ptlrpc_lprocfs_req_history_len_fops
,
1078 {.name
= "req_buffer_history_max",
1079 .fops
= &ptlrpc_lprocfs_req_history_max_fops
,
1081 {.name
= "timeouts",
1082 .fops
= &ptlrpc_lprocfs_timeouts_fops
,
1084 {.name
= "nrs_policies",
1085 .fops
= &ptlrpc_lprocfs_nrs_fops
,
1089 static const struct file_operations req_history_fops
= {
1090 .owner
= THIS_MODULE
,
1091 .open
= ptlrpc_lprocfs_svc_req_history_open
,
1093 .llseek
= seq_lseek
,
1094 .release
= lprocfs_seq_release
,
1099 ptlrpc_ldebugfs_register(entry
, svc
->srv_name
,
1100 "stats", &svc
->srv_debugfs_entry
,
1103 if (IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1106 ldebugfs_add_vars(svc
->srv_debugfs_entry
, lproc_vars
, NULL
);
1108 rc
= ldebugfs_seq_create(svc
->srv_debugfs_entry
, "req_history",
1109 0400, &req_history_fops
, svc
);
1111 CWARN("Error adding the req_history file\n");
1114 void ptlrpc_lprocfs_register_obd(struct obd_device
*obddev
)
1116 ptlrpc_ldebugfs_register(obddev
->obd_debugfs_entry
, NULL
, "stats",
1117 &obddev
->obd_svc_debugfs_entry
,
1118 &obddev
->obd_svc_stats
);
1120 EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd
);
1122 void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request
*req
, long amount
)
1124 struct lprocfs_stats
*svc_stats
;
1125 __u32 op
= lustre_msg_get_opc(req
->rq_reqmsg
);
1126 int opc
= opcode_offset(op
);
1128 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1129 if (!svc_stats
|| opc
<= 0)
1131 LASSERT(opc
< LUSTRE_MAX_OPCODES
);
1132 if (!(op
== LDLM_ENQUEUE
|| op
== MDS_REINT
))
1133 lprocfs_counter_add(svc_stats
, opc
+ EXTRA_MAX_OPCODES
, amount
);
1136 void ptlrpc_lprocfs_brw(struct ptlrpc_request
*req
, int bytes
)
1138 struct lprocfs_stats
*svc_stats
;
1141 if (!req
->rq_import
)
1143 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1146 idx
= lustre_msg_get_opc(req
->rq_reqmsg
);
1149 idx
= BRW_READ_BYTES
+ PTLRPC_LAST_CNTR
;
1152 idx
= BRW_WRITE_BYTES
+ PTLRPC_LAST_CNTR
;
1155 LASSERTF(0, "unsupported opcode %u\n", idx
);
1159 lprocfs_counter_add(svc_stats
, idx
, bytes
);
1161 EXPORT_SYMBOL(ptlrpc_lprocfs_brw
);
1163 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service
*svc
)
1165 if (!IS_ERR_OR_NULL(svc
->srv_debugfs_entry
))
1166 ldebugfs_remove(&svc
->srv_debugfs_entry
);
1169 lprocfs_free_stats(&svc
->srv_stats
);
1172 void ptlrpc_lprocfs_unregister_obd(struct obd_device
*obd
)
1174 if (!IS_ERR_OR_NULL(obd
->obd_svc_debugfs_entry
))
1175 ldebugfs_remove(&obd
->obd_svc_debugfs_entry
);
1177 if (obd
->obd_svc_stats
)
1178 lprocfs_free_stats(&obd
->obd_svc_stats
);
1180 EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd
);
1184 int lprocfs_wr_ping(struct file
*file
, const char __user
*buffer
,
1185 size_t count
, loff_t
*off
)
1187 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1188 struct ptlrpc_request
*req
;
1191 rc
= lprocfs_climp_check(obd
);
1195 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
1196 up_read(&obd
->u
.cli
.cl_sem
);
1200 req
->rq_send_state
= LUSTRE_IMP_FULL
;
1202 rc
= ptlrpc_queue_wait(req
);
1204 ptlrpc_req_finished(req
);
1209 EXPORT_SYMBOL(lprocfs_wr_ping
);
1211 /* Write the connection UUID to this file to attempt to connect to that node.
1212 * The connection UUID is a node's primary NID. For example,
1213 * "echo connection=192.168.0.1@tcp0::instance > .../import".
1215 int lprocfs_wr_import(struct file
*file
, const char __user
*buffer
,
1216 size_t count
, loff_t
*off
)
1218 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1219 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1224 const char prefix
[] = "connection=";
1225 const int prefix_len
= sizeof(prefix
) - 1;
1227 if (count
> PAGE_SIZE
- 1 || count
<= prefix_len
)
1230 kbuf
= kzalloc(count
+ 1, GFP_NOFS
);
1234 if (copy_from_user(kbuf
, buffer
, count
)) {
1241 /* only support connection=uuid::instance now */
1242 if (strncmp(prefix
, kbuf
, prefix_len
) != 0) {
1247 uuid
= kbuf
+ prefix_len
;
1248 ptr
= strstr(uuid
, "::");
1255 ptr
+= strlen("::");
1256 inst
= simple_strtoul(ptr
, &endptr
, 10);
1258 CERROR("config: wrong instance # %s\n", ptr
);
1259 } else if (inst
!= imp
->imp_connect_data
.ocd_instance
) {
1260 CDEBUG(D_INFO
, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
1261 imp
->imp_obd
->obd_name
,
1262 imp
->imp_connect_data
.ocd_instance
, inst
);
1265 CDEBUG(D_INFO
, "IR: %s has already been connecting to new target(%u)\n",
1266 imp
->imp_obd
->obd_name
, inst
);
1271 ptlrpc_recover_import(imp
, uuid
, 1);
1277 EXPORT_SYMBOL(lprocfs_wr_import
);
1279 int lprocfs_rd_pinger_recov(struct seq_file
*m
, void *n
)
1281 struct obd_device
*obd
= m
->private;
1282 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1285 rc
= lprocfs_climp_check(obd
);
1289 seq_printf(m
, "%d\n", !imp
->imp_no_pinger_recover
);
1290 up_read(&obd
->u
.cli
.cl_sem
);
1294 EXPORT_SYMBOL(lprocfs_rd_pinger_recov
);
1296 int lprocfs_wr_pinger_recov(struct file
*file
, const char __user
*buffer
,
1297 size_t count
, loff_t
*off
)
1299 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1300 struct client_obd
*cli
= &obd
->u
.cli
;
1301 struct obd_import
*imp
= cli
->cl_import
;
1304 rc
= lprocfs_write_helper(buffer
, count
, &val
);
1308 if (val
!= 0 && val
!= 1)
1311 rc
= lprocfs_climp_check(obd
);
1315 spin_lock(&imp
->imp_lock
);
1316 imp
->imp_no_pinger_recover
= !val
;
1317 spin_unlock(&imp
->imp_lock
);
1318 up_read(&obd
->u
.cli
.cl_sem
);
1322 EXPORT_SYMBOL(lprocfs_wr_pinger_recov
);