4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 #define DEBUG_SUBSYSTEM S_CLASS
38 #include "../include/obd_support.h"
39 #include "../include/obd.h"
40 #include "../include/lprocfs_status.h"
41 #include "../include/lustre/lustre_idl.h"
42 #include "../include/lustre_net.h"
43 #include "../include/obd_class.h"
44 #include "ptlrpc_internal.h"
46 static struct ll_rpc_opcode
{
49 } ll_rpc_opcode_table
[LUSTRE_MAX_OPCODES
] = {
50 { OST_REPLY
, "ost_reply" },
51 { OST_GETATTR
, "ost_getattr" },
52 { OST_SETATTR
, "ost_setattr" },
53 { OST_READ
, "ost_read" },
54 { OST_WRITE
, "ost_write" },
55 { OST_CREATE
, "ost_create" },
56 { OST_DESTROY
, "ost_destroy" },
57 { OST_GET_INFO
, "ost_get_info" },
58 { OST_CONNECT
, "ost_connect" },
59 { OST_DISCONNECT
, "ost_disconnect" },
60 { OST_PUNCH
, "ost_punch" },
61 { OST_OPEN
, "ost_open" },
62 { OST_CLOSE
, "ost_close" },
63 { OST_STATFS
, "ost_statfs" },
64 { 14, NULL
}, /* formerly OST_SAN_READ */
65 { 15, NULL
}, /* formerly OST_SAN_WRITE */
66 { OST_SYNC
, "ost_sync" },
67 { OST_SET_INFO
, "ost_set_info" },
68 { OST_QUOTACHECK
, "ost_quotacheck" },
69 { OST_QUOTACTL
, "ost_quotactl" },
70 { OST_QUOTA_ADJUST_QUNIT
, "ost_quota_adjust_qunit" },
71 { MDS_GETATTR
, "mds_getattr" },
72 { MDS_GETATTR_NAME
, "mds_getattr_lock" },
73 { MDS_CLOSE
, "mds_close" },
74 { MDS_REINT
, "mds_reint" },
75 { MDS_READPAGE
, "mds_readpage" },
76 { MDS_CONNECT
, "mds_connect" },
77 { MDS_DISCONNECT
, "mds_disconnect" },
78 { MDS_GETSTATUS
, "mds_getstatus" },
79 { MDS_STATFS
, "mds_statfs" },
80 { MDS_PIN
, "mds_pin" },
81 { MDS_UNPIN
, "mds_unpin" },
82 { MDS_SYNC
, "mds_sync" },
83 { MDS_DONE_WRITING
, "mds_done_writing" },
84 { MDS_SET_INFO
, "mds_set_info" },
85 { MDS_QUOTACHECK
, "mds_quotacheck" },
86 { MDS_QUOTACTL
, "mds_quotactl" },
87 { MDS_GETXATTR
, "mds_getxattr" },
88 { MDS_SETXATTR
, "mds_setxattr" },
89 { MDS_WRITEPAGE
, "mds_writepage" },
90 { MDS_IS_SUBDIR
, "mds_is_subdir" },
91 { MDS_GET_INFO
, "mds_get_info" },
92 { MDS_HSM_STATE_GET
, "mds_hsm_state_get" },
93 { MDS_HSM_STATE_SET
, "mds_hsm_state_set" },
94 { MDS_HSM_ACTION
, "mds_hsm_action" },
95 { MDS_HSM_PROGRESS
, "mds_hsm_progress" },
96 { MDS_HSM_REQUEST
, "mds_hsm_request" },
97 { MDS_HSM_CT_REGISTER
, "mds_hsm_ct_register" },
98 { MDS_HSM_CT_UNREGISTER
, "mds_hsm_ct_unregister" },
99 { MDS_SWAP_LAYOUTS
, "mds_swap_layouts" },
100 { LDLM_ENQUEUE
, "ldlm_enqueue" },
101 { LDLM_CONVERT
, "ldlm_convert" },
102 { LDLM_CANCEL
, "ldlm_cancel" },
103 { LDLM_BL_CALLBACK
, "ldlm_bl_callback" },
104 { LDLM_CP_CALLBACK
, "ldlm_cp_callback" },
105 { LDLM_GL_CALLBACK
, "ldlm_gl_callback" },
106 { LDLM_SET_INFO
, "ldlm_set_info" },
107 { MGS_CONNECT
, "mgs_connect" },
108 { MGS_DISCONNECT
, "mgs_disconnect" },
109 { MGS_EXCEPTION
, "mgs_exception" },
110 { MGS_TARGET_REG
, "mgs_target_reg" },
111 { MGS_TARGET_DEL
, "mgs_target_del" },
112 { MGS_SET_INFO
, "mgs_set_info" },
113 { MGS_CONFIG_READ
, "mgs_config_read" },
114 { OBD_PING
, "obd_ping" },
115 { OBD_LOG_CANCEL
, "llog_cancel" },
116 { OBD_QC_CALLBACK
, "obd_quota_callback" },
117 { OBD_IDX_READ
, "dt_index_read" },
118 { LLOG_ORIGIN_HANDLE_CREATE
, "llog_origin_handle_open" },
119 { LLOG_ORIGIN_HANDLE_NEXT_BLOCK
, "llog_origin_handle_next_block" },
120 { LLOG_ORIGIN_HANDLE_READ_HEADER
, "llog_origin_handle_read_header" },
121 { LLOG_ORIGIN_HANDLE_WRITE_REC
, "llog_origin_handle_write_rec" },
122 { LLOG_ORIGIN_HANDLE_CLOSE
, "llog_origin_handle_close" },
123 { LLOG_ORIGIN_CONNECT
, "llog_origin_connect" },
124 { LLOG_CATINFO
, "llog_catinfo" },
125 { LLOG_ORIGIN_HANDLE_PREV_BLOCK
, "llog_origin_handle_prev_block" },
126 { LLOG_ORIGIN_HANDLE_DESTROY
, "llog_origin_handle_destroy" },
127 { QUOTA_DQACQ
, "quota_acquire" },
128 { QUOTA_DQREL
, "quota_release" },
129 { SEQ_QUERY
, "seq_query" },
130 { SEC_CTX_INIT
, "sec_ctx_init" },
131 { SEC_CTX_INIT_CONT
, "sec_ctx_init_cont" },
132 { SEC_CTX_FINI
, "sec_ctx_fini" },
133 { FLD_QUERY
, "fld_query" },
134 { UPDATE_OBJ
, "update_obj" },
137 static struct ll_eopcode
{
140 } ll_eopcode_table
[EXTRA_LAST_OPC
] = {
141 { LDLM_GLIMPSE_ENQUEUE
, "ldlm_glimpse_enqueue" },
142 { LDLM_PLAIN_ENQUEUE
, "ldlm_plain_enqueue" },
143 { LDLM_EXTENT_ENQUEUE
, "ldlm_extent_enqueue" },
144 { LDLM_FLOCK_ENQUEUE
, "ldlm_flock_enqueue" },
145 { LDLM_IBITS_ENQUEUE
, "ldlm_ibits_enqueue" },
146 { MDS_REINT_SETATTR
, "mds_reint_setattr" },
147 { MDS_REINT_CREATE
, "mds_reint_create" },
148 { MDS_REINT_LINK
, "mds_reint_link" },
149 { MDS_REINT_UNLINK
, "mds_reint_unlink" },
150 { MDS_REINT_RENAME
, "mds_reint_rename" },
151 { MDS_REINT_OPEN
, "mds_reint_open" },
152 { MDS_REINT_SETXATTR
, "mds_reint_setxattr" },
153 { BRW_READ_BYTES
, "read_bytes" },
154 { BRW_WRITE_BYTES
, "write_bytes" },
157 const char *ll_opcode2str(__u32 opcode
)
159 /* When one of the assertions below fail, chances are that:
160 * 1) A new opcode was added in include/lustre/lustre_idl.h,
161 * but is missing from the table above.
162 * or 2) The opcode space was renumbered or rearranged,
163 * and the opcode_offset() function in
164 * ptlrpc_internal.h needs to be modified.
166 __u32 offset
= opcode_offset(opcode
);
168 LASSERTF(offset
< LUSTRE_MAX_OPCODES
,
169 "offset %u >= LUSTRE_MAX_OPCODES %u\n",
170 offset
, LUSTRE_MAX_OPCODES
);
171 LASSERTF(ll_rpc_opcode_table
[offset
].opcode
== opcode
,
172 "ll_rpc_opcode_table[%u].opcode %u != opcode %u\n",
173 offset
, ll_rpc_opcode_table
[offset
].opcode
, opcode
);
174 return ll_rpc_opcode_table
[offset
].opname
;
177 static const char *ll_eopcode2str(__u32 opcode
)
179 LASSERT(ll_eopcode_table
[opcode
].opcode
== opcode
);
180 return ll_eopcode_table
[opcode
].opname
;
184 ptlrpc_ldebugfs_register(struct dentry
*root
, char *dir
,
186 struct dentry
**debugfs_root_ret
,
187 struct lprocfs_stats
**stats_ret
)
189 struct dentry
*svc_debugfs_entry
;
190 struct lprocfs_stats
*svc_stats
;
192 unsigned int svc_counter_config
= LPROCFS_CNTR_AVGMINMAX
|
195 LASSERT(*debugfs_root_ret
== NULL
);
196 LASSERT(*stats_ret
== NULL
);
198 svc_stats
= lprocfs_alloc_stats(EXTRA_MAX_OPCODES
+LUSTRE_MAX_OPCODES
,
200 if (svc_stats
== NULL
)
204 svc_debugfs_entry
= ldebugfs_register(dir
, root
, NULL
, NULL
);
205 if (IS_ERR(svc_debugfs_entry
)) {
206 lprocfs_free_stats(&svc_stats
);
210 svc_debugfs_entry
= root
;
213 lprocfs_counter_init(svc_stats
, PTLRPC_REQWAIT_CNTR
,
214 svc_counter_config
, "req_waittime", "usec");
215 lprocfs_counter_init(svc_stats
, PTLRPC_REQQDEPTH_CNTR
,
216 svc_counter_config
, "req_qdepth", "reqs");
217 lprocfs_counter_init(svc_stats
, PTLRPC_REQACTIVE_CNTR
,
218 svc_counter_config
, "req_active", "reqs");
219 lprocfs_counter_init(svc_stats
, PTLRPC_TIMEOUT
,
220 svc_counter_config
, "req_timeout", "sec");
221 lprocfs_counter_init(svc_stats
, PTLRPC_REQBUF_AVAIL_CNTR
,
222 svc_counter_config
, "reqbuf_avail", "bufs");
223 for (i
= 0; i
< EXTRA_LAST_OPC
; i
++) {
227 case BRW_WRITE_BYTES
:
235 lprocfs_counter_init(svc_stats
, PTLRPC_LAST_CNTR
+ i
,
237 ll_eopcode2str(i
), units
);
239 for (i
= 0; i
< LUSTRE_MAX_OPCODES
; i
++) {
240 __u32 opcode
= ll_rpc_opcode_table
[i
].opcode
;
242 lprocfs_counter_init(svc_stats
,
243 EXTRA_MAX_OPCODES
+ i
, svc_counter_config
,
244 ll_opcode2str(opcode
), "usec");
247 rc
= ldebugfs_register_stats(svc_debugfs_entry
, name
, svc_stats
);
250 ldebugfs_remove(&svc_debugfs_entry
);
251 lprocfs_free_stats(&svc_stats
);
254 *debugfs_root_ret
= svc_debugfs_entry
;
255 *stats_ret
= svc_stats
;
260 ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file
*m
, void *v
)
262 struct ptlrpc_service
*svc
= m
->private;
263 struct ptlrpc_service_part
*svcpt
;
267 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
268 total
+= svcpt
->scp_hist_nrqbds
;
270 seq_printf(m
, "%d\n", total
);
274 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len
);
277 ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file
*m
, void *n
)
279 struct ptlrpc_service
*svc
= m
->private;
280 struct ptlrpc_service_part
*svcpt
;
284 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
285 total
+= svc
->srv_hist_nrqbds_cpt_max
;
287 seq_printf(m
, "%d\n", total
);
292 ptlrpc_lprocfs_req_history_max_seq_write(struct file
*file
,
293 const char __user
*buffer
,
294 size_t count
, loff_t
*off
)
296 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
301 rc
= lprocfs_write_helper(buffer
, count
, &val
);
308 /* This sanity check is more of an insanity check; we can still
309 * hose a kernel by allowing the request history to grow too
311 bufpages
= (svc
->srv_buf_size
+ PAGE_CACHE_SIZE
- 1) >> PAGE_CACHE_SHIFT
;
312 if (val
> totalram_pages
/ (2 * bufpages
))
315 spin_lock(&svc
->srv_lock
);
318 svc
->srv_hist_nrqbds_cpt_max
= 0;
320 svc
->srv_hist_nrqbds_cpt_max
= max(1, (val
/ svc
->srv_ncpts
));
322 spin_unlock(&svc
->srv_lock
);
327 LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max
);
329 static ssize_t
threads_min_show(struct kobject
*kobj
, struct attribute
*attr
,
332 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
335 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
);
338 static ssize_t
threads_min_store(struct kobject
*kobj
, struct attribute
*attr
,
339 const char *buffer
, size_t count
)
341 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
344 int rc
= kstrtoul(buffer
, 10, &val
);
349 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
352 spin_lock(&svc
->srv_lock
);
353 if (val
> svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
) {
354 spin_unlock(&svc
->srv_lock
);
358 svc
->srv_nthrs_cpt_init
= val
/ svc
->srv_ncpts
;
360 spin_unlock(&svc
->srv_lock
);
364 LUSTRE_RW_ATTR(threads_min
);
366 static ssize_t
threads_started_show(struct kobject
*kobj
,
367 struct attribute
*attr
,
370 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
372 struct ptlrpc_service_part
*svcpt
;
376 ptlrpc_service_for_each_part(svcpt
, i
, svc
)
377 total
+= svcpt
->scp_nthrs_running
;
379 return sprintf(buf
, "%d\n", total
);
381 LUSTRE_RO_ATTR(threads_started
);
383 static ssize_t
threads_max_show(struct kobject
*kobj
, struct attribute
*attr
,
386 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
389 return sprintf(buf
, "%d\n", svc
->srv_nthrs_cpt_limit
* svc
->srv_ncpts
);
392 static ssize_t
threads_max_store(struct kobject
*kobj
, struct attribute
*attr
,
393 const char *buffer
, size_t count
)
395 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
398 int rc
= kstrtoul(buffer
, 10, &val
);
403 if (val
/ svc
->srv_ncpts
< PTLRPC_NTHRS_INIT
)
406 spin_lock(&svc
->srv_lock
);
407 if (val
< svc
->srv_nthrs_cpt_init
* svc
->srv_ncpts
) {
408 spin_unlock(&svc
->srv_lock
);
412 svc
->srv_nthrs_cpt_limit
= val
/ svc
->srv_ncpts
;
414 spin_unlock(&svc
->srv_lock
);
418 LUSTRE_RW_ATTR(threads_max
);
426 * Translates \e ptlrpc_nrs_pol_state values to human-readable strings.
428 * \param[in] state The policy state
430 static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state
)
435 case NRS_POL_STATE_INVALID
:
437 case NRS_POL_STATE_STOPPED
:
439 case NRS_POL_STATE_STOPPING
:
441 case NRS_POL_STATE_STARTING
:
443 case NRS_POL_STATE_STARTED
:
449 * Obtains status information for \a policy.
451 * Information is copied in \a info.
453 * \param[in] policy The policy
454 * \param[out] info Holds returned status information
456 static void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy
*policy
,
457 struct ptlrpc_nrs_pol_info
*info
)
459 LASSERT(policy
!= NULL
);
460 LASSERT(info
!= NULL
);
461 assert_spin_locked(&policy
->pol_nrs
->nrs_lock
);
463 memcpy(info
->pi_name
, policy
->pol_desc
->pd_name
, NRS_POL_NAME_MAX
);
465 info
->pi_fallback
= !!(policy
->pol_flags
& PTLRPC_NRS_FL_FALLBACK
);
466 info
->pi_state
= policy
->pol_state
;
468 * XXX: These are accessed without holding
469 * ptlrpc_service_part::scp_req_lock.
471 info
->pi_req_queued
= policy
->pol_req_queued
;
472 info
->pi_req_started
= policy
->pol_req_started
;
476 * Reads and prints policy status information for all policies of a PTLRPC
479 static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file
*m
, void *n
)
481 struct ptlrpc_service
*svc
= m
->private;
482 struct ptlrpc_service_part
*svcpt
;
483 struct ptlrpc_nrs
*nrs
;
484 struct ptlrpc_nrs_policy
*policy
;
485 struct ptlrpc_nrs_pol_info
*infos
;
486 struct ptlrpc_nrs_pol_info tmp
;
488 unsigned pol_idx
= 0;
494 * Serialize NRS core lprocfs operations with policy registration/
497 mutex_lock(&nrs_core
.nrs_mutex
);
500 * Use the first service partition's regular NRS head in order to obtain
501 * the number of policies registered with NRS heads of this service. All
502 * service partitions will have the same number of policies.
504 nrs
= nrs_svcpt2nrs(svc
->srv_parts
[0], false);
506 spin_lock(&nrs
->nrs_lock
);
507 num_pols
= svc
->srv_parts
[0]->scp_nrs_reg
.nrs_num_pols
;
508 spin_unlock(&nrs
->nrs_lock
);
510 infos
= kcalloc(num_pols
, sizeof(*infos
), GFP_NOFS
);
517 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
518 nrs
= nrs_svcpt2nrs(svcpt
, hp
);
519 spin_lock(&nrs
->nrs_lock
);
523 list_for_each_entry(policy
, &nrs
->nrs_policy_list
,
525 LASSERT(pol_idx
< num_pols
);
527 nrs_policy_get_info_locked(policy
, &tmp
);
529 * Copy values when handling the first service
533 memcpy(infos
[pol_idx
].pi_name
, tmp
.pi_name
,
535 memcpy(&infos
[pol_idx
].pi_state
, &tmp
.pi_state
,
536 sizeof(tmp
.pi_state
));
537 infos
[pol_idx
].pi_fallback
= tmp
.pi_fallback
;
539 * For the rest of the service partitions
540 * sanity-check the values we get.
543 LASSERT(strncmp(infos
[pol_idx
].pi_name
,
545 NRS_POL_NAME_MAX
) == 0);
547 * Not asserting ptlrpc_nrs_pol_info::pi_state,
548 * because it may be different between
549 * instances of the same policy in different
550 * service partitions.
552 LASSERT(infos
[pol_idx
].pi_fallback
==
556 infos
[pol_idx
].pi_req_queued
+= tmp
.pi_req_queued
;
557 infos
[pol_idx
].pi_req_started
+= tmp
.pi_req_started
;
561 spin_unlock(&nrs
->nrs_lock
);
565 * Policy status information output is in YAML format.
581 * high_priority_requests:
594 seq_printf(m
, "%s\n",
595 !hp
? "\nregular_requests:" : "high_priority_requests:");
597 for (pol_idx
= 0; pol_idx
< num_pols
; pol_idx
++) {
598 seq_printf(m
, " - name: %s\n"
602 " active: %-20d\n\n",
603 infos
[pol_idx
].pi_name
,
604 nrs_state2str(infos
[pol_idx
].pi_state
),
605 infos
[pol_idx
].pi_fallback
? "yes" : "no",
606 (int)infos
[pol_idx
].pi_req_queued
,
607 (int)infos
[pol_idx
].pi_req_started
);
610 if (!hp
&& nrs_svc_has_hp(svc
)) {
611 memset(infos
, 0, num_pols
* sizeof(*infos
));
614 * Redo the processing for the service's HP NRS heads' policies.
622 mutex_unlock(&nrs_core
.nrs_mutex
);
628 * The longest valid command string is the maximum policy name size, plus the
629 * length of the " reg" substring
631 #define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1)
634 * Starts and stops a given policy on a PTLRPC service.
636 * Commands consist of the policy name, followed by an optional [reg|hp] token;
637 * if the optional token is omitted, the operation is performed on both the
638 * regular and high-priority (if the service has one) NRS head.
640 static ssize_t
ptlrpc_lprocfs_nrs_seq_write(struct file
*file
,
641 const char __user
*buffer
,
642 size_t count
, loff_t
*off
)
644 struct ptlrpc_service
*svc
= ((struct seq_file
*)file
->private_data
)->private;
645 enum ptlrpc_nrs_queue_type queue
= PTLRPC_NRS_QUEUE_BOTH
;
647 char *cmd_copy
= NULL
;
651 if (count
>= LPROCFS_NRS_WR_MAX_CMD
)
654 cmd
= kzalloc(LPROCFS_NRS_WR_MAX_CMD
, GFP_NOFS
);
658 * strsep() modifies its argument, so keep a copy
662 if (copy_from_user(cmd
, buffer
, count
)) {
669 token
= strsep(&cmd
, " ");
671 if (strlen(token
) > NRS_POL_NAME_MAX
- 1) {
677 * No [reg|hp] token has been specified
683 * The second token is either NULL, or an optional [reg|hp] string
685 if (strcmp(cmd
, "reg") == 0)
686 queue
= PTLRPC_NRS_QUEUE_REG
;
687 else if (strcmp(cmd
, "hp") == 0)
688 queue
= PTLRPC_NRS_QUEUE_HP
;
696 if (queue
== PTLRPC_NRS_QUEUE_HP
&& !nrs_svc_has_hp(svc
)) {
699 } else if (queue
== PTLRPC_NRS_QUEUE_BOTH
&& !nrs_svc_has_hp(svc
))
700 queue
= PTLRPC_NRS_QUEUE_REG
;
703 * Serialize NRS core lprocfs operations with policy registration/
706 mutex_lock(&nrs_core
.nrs_mutex
);
708 rc
= ptlrpc_nrs_policy_control(svc
, queue
, token
, PTLRPC_NRS_CTL_START
,
711 mutex_unlock(&nrs_core
.nrs_mutex
);
715 return rc
< 0 ? rc
: count
;
718 LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs
);
722 struct ptlrpc_srh_iterator
{
725 struct ptlrpc_request
*srhi_req
;
729 ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part
*svcpt
,
730 struct ptlrpc_srh_iterator
*srhi
,
734 struct ptlrpc_request
*req
;
736 if (srhi
->srhi_req
!= NULL
&&
737 srhi
->srhi_seq
> svcpt
->scp_hist_seq_culled
&&
738 srhi
->srhi_seq
<= seq
) {
739 /* If srhi_req was set previously, hasn't been culled and
740 * we're searching for a seq on or after it (i.e. more
741 * recent), search from it onwards.
742 * Since the service history is LRU (i.e. culled reqs will
743 * be near the head), we shouldn't have to do long
745 LASSERTF(srhi
->srhi_seq
== srhi
->srhi_req
->rq_history_seq
,
746 "%s:%d: seek seq %llu, request seq %llu\n",
747 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
748 srhi
->srhi_seq
, srhi
->srhi_req
->rq_history_seq
);
749 LASSERTF(!list_empty(&svcpt
->scp_hist_reqs
),
750 "%s:%d: seek offset %llu, request seq %llu, last culled %llu\n",
751 svcpt
->scp_service
->srv_name
, svcpt
->scp_cpt
,
752 seq
, srhi
->srhi_seq
, svcpt
->scp_hist_seq_culled
);
753 e
= &srhi
->srhi_req
->rq_history_list
;
755 /* search from start */
756 e
= svcpt
->scp_hist_reqs
.next
;
759 while (e
!= &svcpt
->scp_hist_reqs
) {
760 req
= list_entry(e
, struct ptlrpc_request
, rq_history_list
);
762 if (req
->rq_history_seq
>= seq
) {
763 srhi
->srhi_seq
= req
->rq_history_seq
;
764 srhi
->srhi_req
= req
;
774 * ptlrpc history sequence is used as "position" of seq_file, in some case,
775 * seq_read() will increase "position" to indicate reading the next
776 * element, however, low bits of history sequence are reserved for CPT id
777 * (check the details from comments before ptlrpc_req_add_history), which
778 * means seq_read() might change CPT id of history sequence and never
779 * finish reading of requests on a CPT. To make it work, we have to shift
780 * CPT id to high bits and timestamp to low bits, so seq_read() will only
781 * increase timestamp which can correctly indicate the next position.
784 /* convert seq_file pos to cpt */
785 #define PTLRPC_REQ_POS2CPT(svc, pos) \
786 ((svc)->srv_cpt_bits == 0 ? 0 : \
787 (__u64)(pos) >> (64 - (svc)->srv_cpt_bits))
789 /* make up seq_file pos from cpt */
790 #define PTLRPC_REQ_CPT2POS(svc, cpt) \
791 ((svc)->srv_cpt_bits == 0 ? 0 : \
792 (cpt) << (64 - (svc)->srv_cpt_bits))
794 /* convert sequence to position */
795 #define PTLRPC_REQ_SEQ2POS(svc, seq) \
796 ((svc)->srv_cpt_bits == 0 ? (seq) : \
797 ((seq) >> (svc)->srv_cpt_bits) | \
798 ((seq) << (64 - (svc)->srv_cpt_bits)))
800 /* convert position to sequence */
801 #define PTLRPC_REQ_POS2SEQ(svc, pos) \
802 ((svc)->srv_cpt_bits == 0 ? (pos) : \
803 ((__u64)(pos) << (svc)->srv_cpt_bits) | \
804 ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits)))
807 ptlrpc_lprocfs_svc_req_history_start(struct seq_file
*s
, loff_t
*pos
)
809 struct ptlrpc_service
*svc
= s
->private;
810 struct ptlrpc_service_part
*svcpt
;
811 struct ptlrpc_srh_iterator
*srhi
;
816 if (sizeof(loff_t
) != sizeof(__u64
)) { /* can't support */
817 CWARN("Failed to read request history because size of loff_t %d can't match size of u64\n",
818 (int)sizeof(loff_t
));
822 srhi
= kzalloc(sizeof(*srhi
), GFP_NOFS
);
827 srhi
->srhi_req
= NULL
;
829 cpt
= PTLRPC_REQ_POS2CPT(svc
, *pos
);
831 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
832 if (i
< cpt
) /* skip */
834 if (i
> cpt
) /* make up the lowest position for this CPT */
835 *pos
= PTLRPC_REQ_CPT2POS(svc
, i
);
837 spin_lock(&svcpt
->scp_lock
);
838 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
,
839 PTLRPC_REQ_POS2SEQ(svc
, *pos
));
840 spin_unlock(&svcpt
->scp_lock
);
842 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
853 ptlrpc_lprocfs_svc_req_history_stop(struct seq_file
*s
, void *iter
)
855 struct ptlrpc_srh_iterator
*srhi
= iter
;
861 ptlrpc_lprocfs_svc_req_history_next(struct seq_file
*s
,
862 void *iter
, loff_t
*pos
)
864 struct ptlrpc_service
*svc
= s
->private;
865 struct ptlrpc_srh_iterator
*srhi
= iter
;
866 struct ptlrpc_service_part
*svcpt
;
871 for (i
= srhi
->srhi_idx
; i
< svc
->srv_ncpts
; i
++) {
872 svcpt
= svc
->srv_parts
[i
];
874 if (i
> srhi
->srhi_idx
) { /* reset iterator for a new CPT */
875 srhi
->srhi_req
= NULL
;
876 seq
= srhi
->srhi_seq
= 0;
877 } else { /* the next sequence */
878 seq
= srhi
->srhi_seq
+ (1 << svc
->srv_cpt_bits
);
881 spin_lock(&svcpt
->scp_lock
);
882 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, seq
);
883 spin_unlock(&svcpt
->scp_lock
);
885 *pos
= PTLRPC_REQ_SEQ2POS(svc
, srhi
->srhi_seq
);
895 static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file
*s
, void *iter
)
897 struct ptlrpc_service
*svc
= s
->private;
898 struct ptlrpc_srh_iterator
*srhi
= iter
;
899 struct ptlrpc_service_part
*svcpt
;
900 struct ptlrpc_request
*req
;
903 LASSERT(srhi
->srhi_idx
< svc
->srv_ncpts
);
905 svcpt
= svc
->srv_parts
[srhi
->srhi_idx
];
907 spin_lock(&svcpt
->scp_lock
);
909 rc
= ptlrpc_lprocfs_svc_req_history_seek(svcpt
, srhi
, srhi
->srhi_seq
);
912 char nidstr
[LNET_NIDSTR_SIZE
];
914 req
= srhi
->srhi_req
;
916 libcfs_nid2str_r(req
->rq_self
, nidstr
, sizeof(nidstr
));
917 /* Print common req fields.
918 * CAVEAT EMPTOR: we're racing with the service handler
919 * here. The request could contain any old crap, so you
920 * must be just as careful as the service's request
921 * parser. Currently I only print stuff here I know is OK
922 * to look at coz it was set up in request_in_callback()!!! */
923 seq_printf(s
, "%lld:%s:%s:x%llu:%d:%s:%lld:%lds(%+lds) ",
924 req
->rq_history_seq
, nidstr
,
925 libcfs_id2str(req
->rq_peer
), req
->rq_xid
,
926 req
->rq_reqlen
, ptlrpc_rqphase2str(req
),
927 (s64
)req
->rq_arrival_time
.tv_sec
,
928 (long)(req
->rq_sent
- req
->rq_arrival_time
.tv_sec
),
929 (long)(req
->rq_sent
- req
->rq_deadline
));
930 if (svc
->srv_ops
.so_req_printer
== NULL
)
933 svc
->srv_ops
.so_req_printer(s
, srhi
->srhi_req
);
936 spin_unlock(&svcpt
->scp_lock
);
941 ptlrpc_lprocfs_svc_req_history_open(struct inode
*inode
, struct file
*file
)
943 static struct seq_operations sops
= {
944 .start
= ptlrpc_lprocfs_svc_req_history_start
,
945 .stop
= ptlrpc_lprocfs_svc_req_history_stop
,
946 .next
= ptlrpc_lprocfs_svc_req_history_next
,
947 .show
= ptlrpc_lprocfs_svc_req_history_show
,
949 struct seq_file
*seqf
;
952 rc
= seq_open(file
, &sops
);
956 seqf
= file
->private_data
;
957 seqf
->private = inode
->i_private
;
961 /* See also lprocfs_rd_timeouts */
962 static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file
*m
, void *n
)
964 struct ptlrpc_service
*svc
= m
->private;
965 struct ptlrpc_service_part
*svcpt
;
973 seq_printf(m
, "adaptive timeouts off, using obd_timeout %u\n",
978 ptlrpc_service_for_each_part(svcpt
, i
, svc
) {
979 cur
= at_get(&svcpt
->scp_at_estimate
);
980 worst
= svcpt
->scp_at_estimate
.at_worst_ever
;
981 worstt
= svcpt
->scp_at_estimate
.at_worst_time
;
982 s2dhms(&ts
, ktime_get_real_seconds() - worstt
);
984 seq_printf(m
, "%10s : cur %3u worst %3u (at %lld, "
985 DHMS_FMT
" ago) ", "service",
986 cur
, worst
, (s64
)worstt
, DHMS_VARS(&ts
));
988 lprocfs_at_hist_helper(m
, &svcpt
->scp_at_estimate
);
994 LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts
);
996 static ssize_t
high_priority_ratio_show(struct kobject
*kobj
,
997 struct attribute
*attr
,
1000 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1002 return sprintf(buf
, "%d\n", svc
->srv_hpreq_ratio
);
1005 static ssize_t
high_priority_ratio_store(struct kobject
*kobj
,
1006 struct attribute
*attr
,
1010 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1015 rc
= kstrtoint(buffer
, 10, &val
);
1022 spin_lock(&svc
->srv_lock
);
1023 svc
->srv_hpreq_ratio
= val
;
1024 spin_unlock(&svc
->srv_lock
);
1028 LUSTRE_RW_ATTR(high_priority_ratio
);
1030 static struct attribute
*ptlrpc_svc_attrs
[] = {
1031 &lustre_attr_threads_min
.attr
,
1032 &lustre_attr_threads_started
.attr
,
1033 &lustre_attr_threads_max
.attr
,
1034 &lustre_attr_high_priority_ratio
.attr
,
1038 static void ptlrpc_sysfs_svc_release(struct kobject
*kobj
)
1040 struct ptlrpc_service
*svc
= container_of(kobj
, struct ptlrpc_service
,
1043 complete(&svc
->srv_kobj_unregister
);
1046 static struct kobj_type ptlrpc_svc_ktype
= {
1047 .default_attrs
= ptlrpc_svc_attrs
,
1048 .sysfs_ops
= &lustre_sysfs_ops
,
1049 .release
= ptlrpc_sysfs_svc_release
,
1052 void ptlrpc_sysfs_unregister_service(struct ptlrpc_service
*svc
)
1054 /* Let's see if we had a chance at initialization first */
1055 if (svc
->srv_kobj
.kset
) {
1056 kobject_put(&svc
->srv_kobj
);
1057 wait_for_completion(&svc
->srv_kobj_unregister
);
1061 int ptlrpc_sysfs_register_service(struct kset
*parent
,
1062 struct ptlrpc_service
*svc
)
1066 svc
->srv_kobj
.kset
= parent
;
1067 init_completion(&svc
->srv_kobj_unregister
);
1068 rc
= kobject_init_and_add(&svc
->srv_kobj
, &ptlrpc_svc_ktype
, NULL
,
1069 "%s", svc
->srv_name
);
1074 void ptlrpc_ldebugfs_register_service(struct dentry
*entry
,
1075 struct ptlrpc_service
*svc
)
1077 struct lprocfs_vars lproc_vars
[] = {
1078 {.name
= "req_buffer_history_len",
1079 .fops
= &ptlrpc_lprocfs_req_history_len_fops
,
1081 {.name
= "req_buffer_history_max",
1082 .fops
= &ptlrpc_lprocfs_req_history_max_fops
,
1084 {.name
= "timeouts",
1085 .fops
= &ptlrpc_lprocfs_timeouts_fops
,
1087 {.name
= "nrs_policies",
1088 .fops
= &ptlrpc_lprocfs_nrs_fops
,
1092 static const struct file_operations req_history_fops
= {
1093 .owner
= THIS_MODULE
,
1094 .open
= ptlrpc_lprocfs_svc_req_history_open
,
1096 .llseek
= seq_lseek
,
1097 .release
= lprocfs_seq_release
,
1102 ptlrpc_ldebugfs_register(entry
, svc
->srv_name
,
1103 "stats", &svc
->srv_debugfs_entry
,
1106 if (svc
->srv_debugfs_entry
== NULL
)
1109 ldebugfs_add_vars(svc
->srv_debugfs_entry
, lproc_vars
, NULL
);
1111 rc
= ldebugfs_seq_create(svc
->srv_debugfs_entry
, "req_history",
1112 0400, &req_history_fops
, svc
);
1114 CWARN("Error adding the req_history file\n");
1117 void ptlrpc_lprocfs_register_obd(struct obd_device
*obddev
)
1119 ptlrpc_ldebugfs_register(obddev
->obd_debugfs_entry
, NULL
, "stats",
1120 &obddev
->obd_svc_debugfs_entry
,
1121 &obddev
->obd_svc_stats
);
1123 EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd
);
1125 void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request
*req
, long amount
)
1127 struct lprocfs_stats
*svc_stats
;
1128 __u32 op
= lustre_msg_get_opc(req
->rq_reqmsg
);
1129 int opc
= opcode_offset(op
);
1131 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1132 if (svc_stats
== NULL
|| opc
<= 0)
1134 LASSERT(opc
< LUSTRE_MAX_OPCODES
);
1135 if (!(op
== LDLM_ENQUEUE
|| op
== MDS_REINT
))
1136 lprocfs_counter_add(svc_stats
, opc
+ EXTRA_MAX_OPCODES
, amount
);
1139 void ptlrpc_lprocfs_brw(struct ptlrpc_request
*req
, int bytes
)
1141 struct lprocfs_stats
*svc_stats
;
1144 if (!req
->rq_import
)
1146 svc_stats
= req
->rq_import
->imp_obd
->obd_svc_stats
;
1149 idx
= lustre_msg_get_opc(req
->rq_reqmsg
);
1152 idx
= BRW_READ_BYTES
+ PTLRPC_LAST_CNTR
;
1155 idx
= BRW_WRITE_BYTES
+ PTLRPC_LAST_CNTR
;
1158 LASSERTF(0, "unsupported opcode %u\n", idx
);
1162 lprocfs_counter_add(svc_stats
, idx
, bytes
);
1165 EXPORT_SYMBOL(ptlrpc_lprocfs_brw
);
1167 void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service
*svc
)
1169 if (svc
->srv_debugfs_entry
!= NULL
)
1170 ldebugfs_remove(&svc
->srv_debugfs_entry
);
1173 lprocfs_free_stats(&svc
->srv_stats
);
1176 void ptlrpc_lprocfs_unregister_obd(struct obd_device
*obd
)
1178 if (!IS_ERR_OR_NULL(obd
->obd_svc_debugfs_entry
))
1179 ldebugfs_remove(&obd
->obd_svc_debugfs_entry
);
1181 if (obd
->obd_svc_stats
)
1182 lprocfs_free_stats(&obd
->obd_svc_stats
);
1184 EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd
);
1188 int lprocfs_wr_ping(struct file
*file
, const char __user
*buffer
,
1189 size_t count
, loff_t
*off
)
1191 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1192 struct ptlrpc_request
*req
;
1195 rc
= lprocfs_climp_check(obd
);
1199 req
= ptlrpc_prep_ping(obd
->u
.cli
.cl_import
);
1200 LPROCFS_CLIMP_EXIT(obd
);
1204 req
->rq_send_state
= LUSTRE_IMP_FULL
;
1206 rc
= ptlrpc_queue_wait(req
);
1208 ptlrpc_req_finished(req
);
1213 EXPORT_SYMBOL(lprocfs_wr_ping
);
1215 /* Write the connection UUID to this file to attempt to connect to that node.
1216 * The connection UUID is a node's primary NID. For example,
1217 * "echo connection=192.168.0.1@tcp0::instance > .../import".
1219 int lprocfs_wr_import(struct file
*file
, const char __user
*buffer
,
1220 size_t count
, loff_t
*off
)
1222 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1223 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1228 const char prefix
[] = "connection=";
1229 const int prefix_len
= sizeof(prefix
) - 1;
1231 if (count
> PAGE_CACHE_SIZE
- 1 || count
<= prefix_len
)
1234 kbuf
= kzalloc(count
+ 1, GFP_NOFS
);
1238 if (copy_from_user(kbuf
, buffer
, count
)) {
1245 /* only support connection=uuid::instance now */
1246 if (strncmp(prefix
, kbuf
, prefix_len
) != 0) {
1251 uuid
= kbuf
+ prefix_len
;
1252 ptr
= strstr(uuid
, "::");
1259 ptr
+= strlen("::");
1260 inst
= simple_strtoul(ptr
, &endptr
, 10);
1262 CERROR("config: wrong instance # %s\n", ptr
);
1263 } else if (inst
!= imp
->imp_connect_data
.ocd_instance
) {
1264 CDEBUG(D_INFO
, "IR: %s is connecting to an obsoleted target(%u/%u), reconnecting...\n",
1265 imp
->imp_obd
->obd_name
,
1266 imp
->imp_connect_data
.ocd_instance
, inst
);
1269 CDEBUG(D_INFO
, "IR: %s has already been connecting to new target(%u)\n",
1270 imp
->imp_obd
->obd_name
, inst
);
1275 ptlrpc_recover_import(imp
, uuid
, 1);
1281 EXPORT_SYMBOL(lprocfs_wr_import
);
1283 int lprocfs_rd_pinger_recov(struct seq_file
*m
, void *n
)
1285 struct obd_device
*obd
= m
->private;
1286 struct obd_import
*imp
= obd
->u
.cli
.cl_import
;
1289 rc
= lprocfs_climp_check(obd
);
1293 seq_printf(m
, "%d\n", !imp
->imp_no_pinger_recover
);
1294 LPROCFS_CLIMP_EXIT(obd
);
1298 EXPORT_SYMBOL(lprocfs_rd_pinger_recov
);
1300 int lprocfs_wr_pinger_recov(struct file
*file
, const char __user
*buffer
,
1301 size_t count
, loff_t
*off
)
1303 struct obd_device
*obd
= ((struct seq_file
*)file
->private_data
)->private;
1304 struct client_obd
*cli
= &obd
->u
.cli
;
1305 struct obd_import
*imp
= cli
->cl_import
;
1308 rc
= lprocfs_write_helper(buffer
, count
, &val
);
1312 if (val
!= 0 && val
!= 1)
1315 rc
= lprocfs_climp_check(obd
);
1319 spin_lock(&imp
->imp_lock
);
1320 imp
->imp_no_pinger_recover
= !val
;
1321 spin_unlock(&imp
->imp_lock
);
1322 LPROCFS_CLIMP_EXIT(obd
);
1327 EXPORT_SYMBOL(lprocfs_wr_pinger_recov
);