Merge remote-tracking branch 'staging/staging-next'
[deliverable/linux.git] / drivers / staging / lustre / lustre / obdclass / lu_object.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
6a5b99a4 18 * http://www.gnu.org/licenses/gpl-2.0.html
d7e09d03 19 *
d7e09d03
PT
20 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
1dc563a6 26 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
27 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * lustre/obdclass/lu_object.c
33 *
34 * Lustre Object.
35 * These are the only exported functions, they provide some generic
36 * infrastructure for managing object devices
37 *
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
39 */
40
41#define DEBUG_SUBSYSTEM S_CLASS
42
9fdaf8c0 43#include "../../include/linux/libcfs/libcfs.h"
d7e09d03
PT
44
45# include <linux/module.h>
46
47/* hash_long() */
9fdaf8c0 48#include "../../include/linux/libcfs/libcfs_hash.h"
610f7377
GKH
49#include "../include/obd_class.h"
50#include "../include/obd_support.h"
51#include "../include/lustre_disk.h"
52#include "../include/lustre_fid.h"
53#include "../include/lu_object.h"
26f98e82 54#include "../include/cl_object.h"
610f7377 55#include "../include/lu_ref.h"
d7e09d03
PT
56#include <linux/list.h>
57
be191af9
BB
58enum {
59 LU_CACHE_PERCENT_MAX = 50,
60 LU_CACHE_PERCENT_DEFAULT = 20
61};
62
63#define LU_CACHE_NR_MAX_ADJUST 128
64#define LU_CACHE_NR_UNLIMITED -1
65#define LU_CACHE_NR_DEFAULT LU_CACHE_NR_UNLIMITED
66#define LU_CACHE_NR_LDISKFS_LIMIT LU_CACHE_NR_UNLIMITED
67#define LU_CACHE_NR_ZFS_LIMIT 256
68
69#define LU_SITE_BITS_MIN 12
70#define LU_SITE_BITS_MAX 24
71/**
72 * total 256 buckets, we don't want too many buckets because:
73 * - consume too much memory
74 * - avoid unbalanced LRU list
75 */
76#define LU_SITE_BKT_BITS 8
77
78static unsigned int lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
79module_param(lu_cache_percent, int, 0644);
80MODULE_PARM_DESC(lu_cache_percent, "Percentage of memory to be used as lu_object cache");
81
82static long lu_cache_nr = LU_CACHE_NR_DEFAULT;
83module_param(lu_cache_nr, long, 0644);
84MODULE_PARM_DESC(lu_cache_nr, "Maximum number of objects in lu_object cache");
85
d7e09d03 86static void lu_object_free(const struct lu_env *env, struct lu_object *o);
a0b8803a 87static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx);
d7e09d03
PT
88
89/**
90 * Decrease reference counter on object. If last reference is freed, return
91 * object to the cache, unless lu_object_is_dying(o) holds. In the latter
92 * case, free object immediately.
93 */
94void lu_object_put(const struct lu_env *env, struct lu_object *o)
95{
96 struct lu_site_bkt_data *bkt;
97 struct lu_object_header *top;
98 struct lu_site *site;
99 struct lu_object *orig;
6ea510c1 100 struct cfs_hash_bd bd;
d7e09d03
PT
101 const struct lu_fid *fid;
102
103 top = o->lo_header;
104 site = o->lo_dev->ld_site;
105 orig = o;
106
107 /*
108 * till we have full fids-on-OST implemented anonymous objects
109 * are possible in OSP. such an object isn't listed in the site
110 * so we should not remove it from the site.
111 */
112 fid = lu_object_fid(o);
113 if (fid_is_zero(fid)) {
cce3c2da 114 LASSERT(!top->loh_hash.next && !top->loh_hash.pprev);
d7e09d03
PT
115 LASSERT(list_empty(&top->loh_lru));
116 if (!atomic_dec_and_test(&top->loh_ref))
117 return;
118 list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
cce3c2da 119 if (o->lo_ops->loo_object_release)
d7e09d03
PT
120 o->lo_ops->loo_object_release(env, o);
121 }
122 lu_object_free(env, orig);
123 return;
124 }
125
126 cfs_hash_bd_get(site->ls_obj_hash, &top->loh_fid, &bd);
127 bkt = cfs_hash_bd_extra_get(site->ls_obj_hash, &bd);
128
129 if (!cfs_hash_bd_dec_and_lock(site->ls_obj_hash, &bd, &top->loh_ref)) {
130 if (lu_object_is_dying(top)) {
d7e09d03
PT
131 /*
132 * somebody may be waiting for this, currently only
133 * used for cl_object, see cl_object_put_last().
134 */
135 wake_up_all(&bkt->lsb_marche_funebre);
136 }
137 return;
138 }
139
d7e09d03
PT
140 /*
141 * When last reference is released, iterate over object
142 * layers, and notify them that object is no longer busy.
143 */
144 list_for_each_entry_reverse(o, &top->loh_layers, lo_linkage) {
cce3c2da 145 if (o->lo_ops->loo_object_release)
d7e09d03
PT
146 o->lo_ops->loo_object_release(env, o);
147 }
148
149 if (!lu_object_is_dying(top)) {
150 LASSERT(list_empty(&top->loh_lru));
151 list_add_tail(&top->loh_lru, &bkt->lsb_lru);
6e580ab5 152 bkt->lsb_lru_len++;
a0b8803a
AK
153 lprocfs_counter_incr(site->ls_stats, LU_SS_LRU_LEN);
154 CDEBUG(D_INODE, "Add %p to site lru. hash: %p, bkt: %p, lru_len: %ld\n",
155 o, site->ls_obj_hash, bkt, bkt->lsb_lru_len);
d7e09d03
PT
156 cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
157 return;
158 }
159
160 /*
9c379663 161 * If object is dying (will not be cached), then removed it
d7e09d03
PT
162 * from hash table and LRU.
163 *
164 * This is done with hash table and LRU lists locked. As the only
165 * way to acquire first reference to previously unreferenced
166 * object is through hash-table lookup (lu_object_find()),
167 * or LRU scanning (lu_site_purge()), that are done under hash-table
168 * and LRU lock, no race with concurrent object lookup is possible
169 * and we can safely destroy object below.
170 */
171 if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags))
172 cfs_hash_bd_del_locked(site->ls_obj_hash, &bd, &top->loh_hash);
173 cfs_hash_bd_unlock(site->ls_obj_hash, &bd, 1);
174 /*
175 * Object was already removed from hash and lru above, can
176 * kill it.
177 */
178 lu_object_free(env, orig);
179}
180EXPORT_SYMBOL(lu_object_put);
181
d7e09d03
PT
182/**
183 * Kill the object and take it out of LRU cache.
184 * Currently used by client code for layout change.
185 */
186void lu_object_unhash(const struct lu_env *env, struct lu_object *o)
187{
188 struct lu_object_header *top;
189
190 top = o->lo_header;
191 set_bit(LU_OBJECT_HEARD_BANSHEE, &top->loh_flags);
192 if (!test_and_set_bit(LU_OBJECT_UNHASHED, &top->loh_flags)) {
a0b8803a
AK
193 struct lu_site *site = o->lo_dev->ld_site;
194 struct cfs_hash *obj_hash = site->ls_obj_hash;
6ea510c1 195 struct cfs_hash_bd bd;
d7e09d03
PT
196
197 cfs_hash_bd_get_and_lock(obj_hash, &top->loh_fid, &bd, 1);
6e580ab5
FZ
198 if (!list_empty(&top->loh_lru)) {
199 struct lu_site_bkt_data *bkt;
200
a0b8803a 201 list_del_init(&top->loh_lru);
6e580ab5
FZ
202 bkt = cfs_hash_bd_extra_get(obj_hash, &bd);
203 bkt->lsb_lru_len--;
a0b8803a 204 lprocfs_counter_decr(site->ls_stats, LU_SS_LRU_LEN);
6e580ab5 205 }
d7e09d03
PT
206 cfs_hash_bd_del_locked(obj_hash, &bd, &top->loh_hash);
207 cfs_hash_bd_unlock(obj_hash, &bd, 1);
208 }
209}
210EXPORT_SYMBOL(lu_object_unhash);
211
212/**
213 * Allocate new object.
214 *
215 * This follows object creation protocol, described in the comment within
216 * struct lu_device_operations definition.
217 */
218static struct lu_object *lu_object_alloc(const struct lu_env *env,
219 struct lu_device *dev,
220 const struct lu_fid *f,
221 const struct lu_object_conf *conf)
222{
223 struct lu_object *scan;
224 struct lu_object *top;
225 struct list_head *layers;
7cd875d2
JH
226 unsigned int init_mask = 0;
227 unsigned int init_flag;
d7e09d03
PT
228 int clean;
229 int result;
d7e09d03
PT
230
231 /*
232 * Create top-level object slice. This will also create
233 * lu_object_header.
234 */
235 top = dev->ld_ops->ldo_object_alloc(env, NULL, dev);
cce3c2da 236 if (!top)
0a3bdb00 237 return ERR_PTR(-ENOMEM);
d7e09d03 238 if (IS_ERR(top))
0a3bdb00 239 return top;
d7e09d03
PT
240 /*
241 * This is the only place where object fid is assigned. It's constant
242 * after this point.
243 */
244 top->lo_header->loh_fid = *f;
245 layers = &top->lo_header->loh_layers;
7cd875d2 246
d7e09d03
PT
247 do {
248 /*
249 * Call ->loo_object_init() repeatedly, until no more new
250 * object slices are created.
251 */
252 clean = 1;
7cd875d2 253 init_flag = 1;
d7e09d03 254 list_for_each_entry(scan, layers, lo_linkage) {
7cd875d2
JH
255 if (init_mask & init_flag)
256 goto next;
d7e09d03
PT
257 clean = 0;
258 scan->lo_header = top->lo_header;
259 result = scan->lo_ops->loo_object_init(env, scan, conf);
260 if (result != 0) {
261 lu_object_free(env, top);
0a3bdb00 262 return ERR_PTR(result);
d7e09d03 263 }
7cd875d2
JH
264 init_mask |= init_flag;
265next:
266 init_flag <<= 1;
d7e09d03
PT
267 }
268 } while (!clean);
269
270 list_for_each_entry_reverse(scan, layers, lo_linkage) {
cce3c2da 271 if (scan->lo_ops->loo_object_start) {
d7e09d03
PT
272 result = scan->lo_ops->loo_object_start(env, scan);
273 if (result != 0) {
274 lu_object_free(env, top);
0a3bdb00 275 return ERR_PTR(result);
d7e09d03
PT
276 }
277 }
278 }
279
280 lprocfs_counter_incr(dev->ld_site->ls_stats, LU_SS_CREATED);
0a3bdb00 281 return top;
d7e09d03
PT
282}
283
284/**
285 * Free an object.
286 */
287static void lu_object_free(const struct lu_env *env, struct lu_object *o)
288{
289 struct lu_site_bkt_data *bkt;
290 struct lu_site *site;
291 struct lu_object *scan;
292 struct list_head *layers;
293 struct list_head splice;
294
295 site = o->lo_dev->ld_site;
296 layers = &o->lo_header->loh_layers;
297 bkt = lu_site_bkt_from_fid(site, &o->lo_header->loh_fid);
298 /*
299 * First call ->loo_object_delete() method to release all resources.
300 */
301 list_for_each_entry_reverse(scan, layers, lo_linkage) {
cce3c2da 302 if (scan->lo_ops->loo_object_delete)
d7e09d03
PT
303 scan->lo_ops->loo_object_delete(env, scan);
304 }
305
306 /*
307 * Then, splice object layers into stand-alone list, and call
308 * ->loo_object_free() on all layers to free memory. Splice is
309 * necessary, because lu_object_header is freed together with the
310 * top-level slice.
311 */
312 INIT_LIST_HEAD(&splice);
313 list_splice_init(layers, &splice);
314 while (!list_empty(&splice)) {
315 /*
316 * Free layers in bottom-to-top order, so that object header
317 * lives as long as possible and ->loo_object_free() methods
318 * can look at its contents.
319 */
320 o = container_of0(splice.prev, struct lu_object, lo_linkage);
321 list_del_init(&o->lo_linkage);
d7e09d03
PT
322 o->lo_ops->loo_object_free(env, o);
323 }
324
325 if (waitqueue_active(&bkt->lsb_marche_funebre))
326 wake_up_all(&bkt->lsb_marche_funebre);
327}
328
329/**
330 * Free \a nr objects from the cold end of the site LRU list.
331 */
332int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr)
333{
334 struct lu_object_header *h;
335 struct lu_object_header *temp;
336 struct lu_site_bkt_data *bkt;
6ea510c1
LN
337 struct cfs_hash_bd bd;
338 struct cfs_hash_bd bd2;
d7e09d03
PT
339 struct list_head dispose;
340 int did_sth;
341 int start;
342 int count;
343 int bnr;
344 int i;
345
346 if (OBD_FAIL_CHECK(OBD_FAIL_OBD_NO_LRU))
0a3bdb00 347 return 0;
d7e09d03
PT
348
349 INIT_LIST_HEAD(&dispose);
350 /*
351 * Under LRU list lock, scan LRU list and move unreferenced objects to
352 * the dispose list, removing them from LRU and hash table.
353 */
354 start = s->ls_purge_start;
355 bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1;
356 again:
357 did_sth = 0;
358 cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
359 if (i < start)
360 continue;
361 count = bnr;
362 cfs_hash_bd_lock(s->ls_obj_hash, &bd, 1);
363 bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
364
365 list_for_each_entry_safe(h, temp, &bkt->lsb_lru, loh_lru) {
366 LASSERT(atomic_read(&h->loh_ref) == 0);
367
368 cfs_hash_bd_get(s->ls_obj_hash, &h->loh_fid, &bd2);
369 LASSERT(bd.bd_bucket == bd2.bd_bucket);
370
371 cfs_hash_bd_del_locked(s->ls_obj_hash,
372 &bd2, &h->loh_hash);
373 list_move(&h->loh_lru, &dispose);
6e580ab5 374 bkt->lsb_lru_len--;
a0b8803a 375 lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN);
d7e09d03
PT
376 if (did_sth == 0)
377 did_sth = 1;
378
379 if (nr != ~0 && --nr == 0)
380 break;
381
382 if (count > 0 && --count == 0)
383 break;
d7e09d03
PT
384 }
385 cfs_hash_bd_unlock(s->ls_obj_hash, &bd, 1);
386 cond_resched();
387 /*
388 * Free everything on the dispose list. This is safe against
389 * races due to the reasons described in lu_object_put().
390 */
391 while (!list_empty(&dispose)) {
392 h = container_of0(dispose.next,
393 struct lu_object_header, loh_lru);
394 list_del_init(&h->loh_lru);
395 lu_object_free(env, lu_object_top(h));
396 lprocfs_counter_incr(s->ls_stats, LU_SS_LRU_PURGED);
397 }
398
399 if (nr == 0)
400 break;
401 }
402
403 if (nr != 0 && did_sth && start != 0) {
404 start = 0; /* restart from the first bucket */
405 goto again;
406 }
407 /* race on s->ls_purge_start, but nobody cares */
408 s->ls_purge_start = i % CFS_HASH_NBKT(s->ls_obj_hash);
409
410 return nr;
411}
412EXPORT_SYMBOL(lu_site_purge);
413
414/*
415 * Object printing.
416 *
417 * Code below has to jump through certain loops to output object description
418 * into libcfs_debug_msg-based log. The problem is that lu_object_print()
419 * composes object description from strings that are parts of _lines_ of
420 * output (i.e., strings that are not terminated by newline). This doesn't fit
421 * very well into libcfs_debug_msg() interface that assumes that each message
422 * supplied to it is a self-contained output line.
423 *
424 * To work around this, strings are collected in a temporary buffer
425 * (implemented as a value of lu_cdebug_key key), until terminating newline
426 * character is detected.
427 *
428 */
429
430enum {
431 /**
432 * Maximal line size.
433 *
434 * XXX overflow is not handled correctly.
435 */
436 LU_CDEBUG_LINE = 512
437};
438
439struct lu_cdebug_data {
440 /**
441 * Temporary buffer.
442 */
443 char lck_area[LU_CDEBUG_LINE];
444};
445
446/* context key constructor/destructor: lu_global_key_init, lu_global_key_fini */
447LU_KEY_INIT_FINI(lu_global, struct lu_cdebug_data);
448
449/**
450 * Key, holding temporary buffer. This key is registered very early by
451 * lu_global_init().
452 */
05311893 453static struct lu_context_key lu_global_key = {
d7e09d03 454 .lct_tags = LCT_MD_THREAD | LCT_DT_THREAD |
aa4e3c8a 455 LCT_MG_THREAD | LCT_CL_THREAD | LCT_LOCAL,
d7e09d03
PT
456 .lct_init = lu_global_key_init,
457 .lct_fini = lu_global_key_fini
458};
459
460/**
461 * Printer function emitting messages through libcfs_debug_msg().
462 */
463int lu_cdebug_printer(const struct lu_env *env,
464 void *cookie, const char *format, ...)
465{
466 struct libcfs_debug_msg_data *msgdata = cookie;
467 struct lu_cdebug_data *key;
468 int used;
469 int complete;
470 va_list args;
471
472 va_start(args, format);
473
474 key = lu_context_key_get(&env->le_ctx, &lu_global_key);
d7e09d03
PT
475
476 used = strlen(key->lck_area);
477 complete = format[strlen(format) - 1] == '\n';
478 /*
479 * Append new chunk to the buffer.
480 */
481 vsnprintf(key->lck_area + used,
482 ARRAY_SIZE(key->lck_area) - used, format, args);
483 if (complete) {
484 if (cfs_cdebug_show(msgdata->msg_mask, msgdata->msg_subsys))
19b2056f 485 libcfs_debug_msg(msgdata, "%s\n", key->lck_area);
d7e09d03
PT
486 key->lck_area[0] = 0;
487 }
488 va_end(args);
489 return 0;
490}
491EXPORT_SYMBOL(lu_cdebug_printer);
492
493/**
494 * Print object header.
495 */
496void lu_object_header_print(const struct lu_env *env, void *cookie,
497 lu_printer_t printer,
498 const struct lu_object_header *hdr)
499{
500 (*printer)(env, cookie, "header@%p[%#lx, %d, "DFID"%s%s%s]",
501 hdr, hdr->loh_flags, atomic_read(&hdr->loh_ref),
502 PFID(&hdr->loh_fid),
503 hlist_unhashed(&hdr->loh_hash) ? "" : " hash",
504 list_empty((struct list_head *)&hdr->loh_lru) ? \
505 "" : " lru",
506 hdr->loh_attr & LOHA_EXISTS ? " exist":"");
507}
508EXPORT_SYMBOL(lu_object_header_print);
509
510/**
511 * Print human readable representation of the \a o to the \a printer.
512 */
513void lu_object_print(const struct lu_env *env, void *cookie,
514 lu_printer_t printer, const struct lu_object *o)
515{
516 static const char ruler[] = "........................................";
517 struct lu_object_header *top;
7cd875d2 518 int depth = 4;
d7e09d03
PT
519
520 top = o->lo_header;
521 lu_object_header_print(env, cookie, printer, top);
7cd875d2 522 (*printer)(env, cookie, "{\n");
d7e09d03 523
7cd875d2 524 list_for_each_entry(o, &top->loh_layers, lo_linkage) {
d7e09d03
PT
525 /*
526 * print `.' \a depth times followed by type name and address
527 */
528 (*printer)(env, cookie, "%*.*s%s@%p", depth, depth, ruler,
529 o->lo_dev->ld_type->ldt_name, o);
7cd875d2 530
cce3c2da 531 if (o->lo_ops->loo_object_print)
7cd875d2
JH
532 (*o->lo_ops->loo_object_print)(env, cookie, printer, o);
533
d7e09d03
PT
534 (*printer)(env, cookie, "\n");
535 }
7cd875d2 536
d7e09d03
PT
537 (*printer)(env, cookie, "} header@%p\n", top);
538}
539EXPORT_SYMBOL(lu_object_print);
540
d7e09d03 541static struct lu_object *htable_lookup(struct lu_site *s,
6ea510c1 542 struct cfs_hash_bd *bd,
d7e09d03
PT
543 const struct lu_fid *f,
544 wait_queue_t *waiter,
545 __u64 *version)
546{
547 struct lu_site_bkt_data *bkt;
548 struct lu_object_header *h;
549 struct hlist_node *hnode;
550 __u64 ver = cfs_hash_bd_version_get(bd);
551
552 if (*version == ver)
70b749d4 553 return ERR_PTR(-ENOENT);
d7e09d03
PT
554
555 *version = ver;
556 bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
557 /* cfs_hash_bd_peek_locked is a somehow "internal" function
6ba59179
OD
558 * of cfs_hash, it doesn't add refcount on object.
559 */
d7e09d03 560 hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
cce3c2da 561 if (!hnode) {
d7e09d03 562 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
70b749d4 563 return ERR_PTR(-ENOENT);
d7e09d03
PT
564 }
565
566 h = container_of0(hnode, struct lu_object_header, loh_hash);
567 if (likely(!lu_object_is_dying(h))) {
568 cfs_hash_get(s->ls_obj_hash, hnode);
569 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
6e580ab5 570 if (!list_empty(&h->loh_lru)) {
a0b8803a 571 list_del_init(&h->loh_lru);
6e580ab5 572 bkt->lsb_lru_len--;
a0b8803a 573 lprocfs_counter_decr(s->ls_stats, LU_SS_LRU_LEN);
6e580ab5 574 }
d7e09d03
PT
575 return lu_object_top(h);
576 }
577
578 /*
579 * Lookup found an object being destroyed this object cannot be
580 * returned (to assure that references to dying objects are eventually
581 * drained), and moreover, lookup has to wait until object is freed.
582 */
583
9e795d35 584 init_waitqueue_entry(waiter, current);
d7e09d03
PT
585 add_wait_queue(&bkt->lsb_marche_funebre, waiter);
586 set_current_state(TASK_UNINTERRUPTIBLE);
587 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_DEATH_RACE);
588 return ERR_PTR(-EAGAIN);
589}
590
591/**
592 * Search cache for an object with the fid \a f. If such object is found,
593 * return it. Otherwise, create new object, insert it into cache and return
594 * it. In any case, additional reference is acquired on the returned object.
595 */
5913ef5e
SB
596static struct lu_object *lu_object_find(const struct lu_env *env,
597 struct lu_device *dev,
598 const struct lu_fid *f,
599 const struct lu_object_conf *conf)
d7e09d03
PT
600{
601 return lu_object_find_at(env, dev->ld_site->ls_top_dev, f, conf);
602}
d7e09d03 603
be191af9
BB
604/*
605 * Limit the lu_object cache to a maximum of lu_cache_nr objects. Because
606 * the calculation for the number of objects to reclaim is not covered by
607 * a lock the maximum number of objects is capped by LU_CACHE_MAX_ADJUST.
608 * This ensures that many concurrent threads will not accidentally purge
609 * the entire cache.
610 */
611static void lu_object_limit(const struct lu_env *env, struct lu_device *dev)
612{
613 __u64 size, nr;
614
615 if (lu_cache_nr == LU_CACHE_NR_UNLIMITED)
616 return;
617
618 size = cfs_hash_size_get(dev->ld_site->ls_obj_hash);
619 nr = (__u64)lu_cache_nr;
620 if (size > nr)
621 lu_site_purge(env, dev->ld_site,
622 min_t(__u64, size - nr, LU_CACHE_NR_MAX_ADJUST));
623}
624
d7e09d03
PT
625static struct lu_object *lu_object_new(const struct lu_env *env,
626 struct lu_device *dev,
627 const struct lu_fid *f,
628 const struct lu_object_conf *conf)
629{
630 struct lu_object *o;
6da6eabe 631 struct cfs_hash *hs;
6ea510c1 632 struct cfs_hash_bd bd;
d7e09d03
PT
633
634 o = lu_object_alloc(env, dev, f, conf);
7f44cb0b 635 if (IS_ERR(o))
d7e09d03
PT
636 return o;
637
638 hs = dev->ld_site->ls_obj_hash;
639 cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
d7e09d03 640 cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
d7e09d03 641 cfs_hash_bd_unlock(hs, &bd, 1);
be191af9
BB
642
643 lu_object_limit(env, dev);
644
d7e09d03
PT
645 return o;
646}
647
648/**
649 * Core logic of lu_object_find*() functions.
650 */
651static struct lu_object *lu_object_find_try(const struct lu_env *env,
652 struct lu_device *dev,
653 const struct lu_fid *f,
654 const struct lu_object_conf *conf,
655 wait_queue_t *waiter)
656{
657 struct lu_object *o;
658 struct lu_object *shadow;
659 struct lu_site *s;
6da6eabe 660 struct cfs_hash *hs;
6ea510c1 661 struct cfs_hash_bd bd;
d7e09d03
PT
662 __u64 version = 0;
663
664 /*
665 * This uses standard index maintenance protocol:
666 *
667 * - search index under lock, and return object if found;
668 * - otherwise, unlock index, allocate new object;
669 * - lock index and search again;
670 * - if nothing is found (usual case), insert newly created
671 * object into index;
672 * - otherwise (race: other thread inserted object), free
673 * object just allocated.
674 * - unlock index;
675 * - return object.
676 *
677 * For "LOC_F_NEW" case, we are sure the object is new established.
678 * It is unnecessary to perform lookup-alloc-lookup-insert, instead,
679 * just alloc and insert directly.
680 *
681 * If dying object is found during index search, add @waiter to the
682 * site wait-queue and return ERR_PTR(-EAGAIN).
683 */
cce3c2da 684 if (conf && conf->loc_flags & LOC_F_NEW)
d7e09d03
PT
685 return lu_object_new(env, dev, f, conf);
686
687 s = dev->ld_site;
688 hs = s->ls_obj_hash;
689 cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
690 o = htable_lookup(s, &bd, f, waiter, &version);
691 cfs_hash_bd_unlock(hs, &bd, 1);
70b749d4 692 if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
d7e09d03
PT
693 return o;
694
695 /*
696 * Allocate new object. This may result in rather complicated
697 * operations, including fld queries, inode loading, etc.
698 */
699 o = lu_object_alloc(env, dev, f, conf);
7f44cb0b 700 if (IS_ERR(o))
d7e09d03
PT
701 return o;
702
703 LASSERT(lu_fid_eq(lu_object_fid(o), f));
704
705 cfs_hash_bd_lock(hs, &bd, 1);
706
707 shadow = htable_lookup(s, &bd, f, waiter, &version);
208bf770 708 if (likely(PTR_ERR(shadow) == -ENOENT)) {
d7e09d03 709 cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
d7e09d03 710 cfs_hash_bd_unlock(hs, &bd, 1);
be191af9
BB
711
712 lu_object_limit(env, dev);
713
d7e09d03
PT
714 return o;
715 }
716
717 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_RACE);
718 cfs_hash_bd_unlock(hs, &bd, 1);
719 lu_object_free(env, o);
720 return shadow;
721}
722
723/**
724 * Much like lu_object_find(), but top level device of object is specifically
725 * \a dev rather than top level device of the site. This interface allows
726 * objects of different "stacking" to be created within the same site.
727 */
728struct lu_object *lu_object_find_at(const struct lu_env *env,
729 struct lu_device *dev,
730 const struct lu_fid *f,
731 const struct lu_object_conf *conf)
732{
733 struct lu_site_bkt_data *bkt;
734 struct lu_object *obj;
735 wait_queue_t wait;
736
737 while (1) {
738 obj = lu_object_find_try(env, dev, f, conf, &wait);
739 if (obj != ERR_PTR(-EAGAIN))
740 return obj;
741 /*
742 * lu_object_find_try() already added waiter into the
743 * wait queue.
744 */
b3669a7f 745 schedule();
d7e09d03
PT
746 bkt = lu_site_bkt_from_fid(dev->ld_site, (void *)f);
747 remove_wait_queue(&bkt->lsb_marche_funebre, &wait);
748 }
749}
750EXPORT_SYMBOL(lu_object_find_at);
751
752/**
753 * Find object with given fid, and return its slice belonging to given device.
754 */
755struct lu_object *lu_object_find_slice(const struct lu_env *env,
756 struct lu_device *dev,
757 const struct lu_fid *f,
758 const struct lu_object_conf *conf)
759{
760 struct lu_object *top;
761 struct lu_object *obj;
762
763 top = lu_object_find(env, dev, f, conf);
764 if (!IS_ERR(top)) {
765 obj = lu_object_locate(top->lo_header, dev->ld_type);
cce3c2da 766 if (!obj)
d7e09d03 767 lu_object_put(env, top);
da5ecb4d 768 } else {
d7e09d03 769 obj = top;
da5ecb4d 770 }
d7e09d03
PT
771 return obj;
772}
773EXPORT_SYMBOL(lu_object_find_slice);
774
775/**
776 * Global list of all device types.
777 */
778static LIST_HEAD(lu_device_types);
779
780int lu_device_type_init(struct lu_device_type *ldt)
781{
782 int result = 0;
783
a8610297 784 atomic_set(&ldt->ldt_device_nr, 0);
d7e09d03
PT
785 INIT_LIST_HEAD(&ldt->ldt_linkage);
786 if (ldt->ldt_ops->ldto_init)
787 result = ldt->ldt_ops->ldto_init(ldt);
a8610297
FY
788
789 if (!result) {
790 spin_lock(&obd_types_lock);
d7e09d03 791 list_add(&ldt->ldt_linkage, &lu_device_types);
a8610297
FY
792 spin_unlock(&obd_types_lock);
793 }
794
d7e09d03
PT
795 return result;
796}
797EXPORT_SYMBOL(lu_device_type_init);
798
799void lu_device_type_fini(struct lu_device_type *ldt)
800{
a8610297 801 spin_lock(&obd_types_lock);
d7e09d03 802 list_del_init(&ldt->ldt_linkage);
a8610297 803 spin_unlock(&obd_types_lock);
d7e09d03
PT
804 if (ldt->ldt_ops->ldto_fini)
805 ldt->ldt_ops->ldto_fini(ldt);
806}
807EXPORT_SYMBOL(lu_device_type_fini);
808
d7e09d03
PT
809/**
810 * Global list of all sites on this node
811 */
812static LIST_HEAD(lu_sites);
813static DEFINE_MUTEX(lu_sites_guard);
814
815/**
816 * Global environment used by site shrinker.
817 */
818static struct lu_env lu_shrink_env;
819
820struct lu_site_print_arg {
821 struct lu_env *lsp_env;
822 void *lsp_cookie;
823 lu_printer_t lsp_printer;
824};
825
826static int
6da6eabe 827lu_site_obj_print(struct cfs_hash *hs, struct cfs_hash_bd *bd,
d7e09d03
PT
828 struct hlist_node *hnode, void *data)
829{
830 struct lu_site_print_arg *arg = (struct lu_site_print_arg *)data;
831 struct lu_object_header *h;
832
833 h = hlist_entry(hnode, struct lu_object_header, loh_hash);
834 if (!list_empty(&h->loh_layers)) {
835 const struct lu_object *o;
836
837 o = lu_object_top(h);
838 lu_object_print(arg->lsp_env, arg->lsp_cookie,
839 arg->lsp_printer, o);
840 } else {
841 lu_object_header_print(arg->lsp_env, arg->lsp_cookie,
842 arg->lsp_printer, h);
843 }
844 return 0;
845}
846
847/**
848 * Print all objects in \a s.
849 */
850void lu_site_print(const struct lu_env *env, struct lu_site *s, void *cookie,
851 lu_printer_t printer)
852{
853 struct lu_site_print_arg arg = {
854 .lsp_env = (struct lu_env *)env,
855 .lsp_cookie = cookie,
856 .lsp_printer = printer,
857 };
858
859 cfs_hash_for_each(s->ls_obj_hash, lu_site_obj_print, &arg);
860}
861EXPORT_SYMBOL(lu_site_print);
862
d7e09d03
PT
863/**
864 * Return desired hash table order.
865 */
be191af9 866static int lu_htable_order(struct lu_device *top)
d7e09d03 867{
be191af9 868 unsigned long bits_max = LU_SITE_BITS_MAX;
d7e09d03
PT
869 unsigned long cache_size;
870 int bits;
871
872 /*
873 * Calculate hash table size, assuming that we want reasonable
874 * performance when 20% of total memory is occupied by cache of
875 * lu_objects.
876 *
877 * Size of lu_object is (arbitrary) taken as 1K (together with inode).
878 */
4f6cc9ab 879 cache_size = totalram_pages;
d7e09d03
PT
880
881#if BITS_PER_LONG == 32
882 /* limit hashtable size for lowmem systems to low RAM */
09cbfeaf
KS
883 if (cache_size > 1 << (30 - PAGE_SHIFT))
884 cache_size = 1 << (30 - PAGE_SHIFT) * 3 / 4;
d7e09d03
PT
885#endif
886
887 /* clear off unreasonable cache setting. */
888 if (lu_cache_percent == 0 || lu_cache_percent > LU_CACHE_PERCENT_MAX) {
2d00bd17 889 CWARN("obdclass: invalid lu_cache_percent: %u, it must be in the range of (0, %u]. Will use default value: %u.\n",
d7e09d03
PT
890 lu_cache_percent, LU_CACHE_PERCENT_MAX,
891 LU_CACHE_PERCENT_DEFAULT);
892
893 lu_cache_percent = LU_CACHE_PERCENT_DEFAULT;
894 }
895 cache_size = cache_size / 100 * lu_cache_percent *
09cbfeaf 896 (PAGE_SIZE / 1024);
d7e09d03
PT
897
898 for (bits = 1; (1 << bits) < cache_size; ++bits) {
899 ;
900 }
be191af9 901 return clamp_t(typeof(bits), bits, LU_SITE_BITS_MIN, bits_max);
d7e09d03
PT
902}
903
6da6eabe 904static unsigned lu_obj_hop_hash(struct cfs_hash *hs,
d7e09d03
PT
905 const void *key, unsigned mask)
906{
907 struct lu_fid *fid = (struct lu_fid *)key;
908 __u32 hash;
909
910 hash = fid_flatten32(fid);
911 hash += (hash >> 4) + (hash << 12); /* mixing oid and seq */
72c0824a 912 hash = hash_long(hash, hs->hs_bkt_bits);
d7e09d03
PT
913
914 /* give me another random factor */
72c0824a 915 hash -= hash_long((unsigned long)hs, fid_oid(fid) % 11 + 3);
d7e09d03
PT
916
917 hash <<= hs->hs_cur_bits - hs->hs_bkt_bits;
918 hash |= (fid_seq(fid) + fid_oid(fid)) & (CFS_HASH_NBKT(hs) - 1);
919
920 return hash & mask;
921}
922
923static void *lu_obj_hop_object(struct hlist_node *hnode)
924{
925 return hlist_entry(hnode, struct lu_object_header, loh_hash);
926}
927
928static void *lu_obj_hop_key(struct hlist_node *hnode)
929{
930 struct lu_object_header *h;
931
932 h = hlist_entry(hnode, struct lu_object_header, loh_hash);
933 return &h->loh_fid;
934}
935
936static int lu_obj_hop_keycmp(const void *key, struct hlist_node *hnode)
937{
938 struct lu_object_header *h;
939
940 h = hlist_entry(hnode, struct lu_object_header, loh_hash);
941 return lu_fid_eq(&h->loh_fid, (struct lu_fid *)key);
942}
943
6da6eabe 944static void lu_obj_hop_get(struct cfs_hash *hs, struct hlist_node *hnode)
d7e09d03
PT
945{
946 struct lu_object_header *h;
947
948 h = hlist_entry(hnode, struct lu_object_header, loh_hash);
6e580ab5 949 atomic_inc(&h->loh_ref);
d7e09d03
PT
950}
951
6da6eabe 952static void lu_obj_hop_put_locked(struct cfs_hash *hs, struct hlist_node *hnode)
d7e09d03
PT
953{
954 LBUG(); /* we should never called it */
955}
956
fb7a0201 957static struct cfs_hash_ops lu_site_hash_ops = {
d7e09d03 958 .hs_hash = lu_obj_hop_hash,
db9fc06b 959 .hs_key = lu_obj_hop_key,
d7e09d03
PT
960 .hs_keycmp = lu_obj_hop_keycmp,
961 .hs_object = lu_obj_hop_object,
db9fc06b 962 .hs_get = lu_obj_hop_get,
d7e09d03
PT
963 .hs_put_locked = lu_obj_hop_put_locked,
964};
965
5913ef5e 966static void lu_dev_add_linkage(struct lu_site *s, struct lu_device *d)
d7e09d03
PT
967{
968 spin_lock(&s->ls_ld_lock);
969 if (list_empty(&d->ld_linkage))
970 list_add(&d->ld_linkage, &s->ls_ld_linkage);
971 spin_unlock(&s->ls_ld_lock);
972}
d7e09d03 973
d7e09d03
PT
974/**
975 * Initialize site \a s, with \a d as the top level device.
976 */
d7e09d03
PT
977int lu_site_init(struct lu_site *s, struct lu_device *top)
978{
979 struct lu_site_bkt_data *bkt;
6ea510c1 980 struct cfs_hash_bd bd;
be191af9
BB
981 unsigned long bits;
982 unsigned long i;
d7e09d03 983 char name[16];
d7e09d03 984
ec83e611 985 memset(s, 0, sizeof(*s));
d7e09d03 986 snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name);
be191af9 987 for (bits = lu_htable_order(top); bits >= LU_SITE_BITS_MIN; bits--) {
d7e09d03
PT
988 s->ls_obj_hash = cfs_hash_create(name, bits, bits,
989 bits - LU_SITE_BKT_BITS,
990 sizeof(*bkt), 0, 0,
991 &lu_site_hash_ops,
992 CFS_HASH_SPIN_BKTLOCK |
993 CFS_HASH_NO_ITEMREF |
994 CFS_HASH_DEPTH |
be191af9
BB
995 CFS_HASH_ASSERT_EMPTY |
996 CFS_HASH_COUNTER);
cce3c2da 997 if (s->ls_obj_hash)
d7e09d03
PT
998 break;
999 }
1000
cce3c2da 1001 if (!s->ls_obj_hash) {
be191af9 1002 CERROR("failed to create lu_site hash with bits: %lu\n", bits);
d7e09d03
PT
1003 return -ENOMEM;
1004 }
1005
1006 cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) {
1007 bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, &bd);
1008 INIT_LIST_HEAD(&bkt->lsb_lru);
1009 init_waitqueue_head(&bkt->lsb_marche_funebre);
1010 }
1011
1012 s->ls_stats = lprocfs_alloc_stats(LU_SS_LAST_STAT, 0);
cce3c2da 1013 if (!s->ls_stats) {
d7e09d03
PT
1014 cfs_hash_putref(s->ls_obj_hash);
1015 s->ls_obj_hash = NULL;
1016 return -ENOMEM;
1017 }
1018
1019 lprocfs_counter_init(s->ls_stats, LU_SS_CREATED,
1020 0, "created", "created");
1021 lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_HIT,
1022 0, "cache_hit", "cache_hit");
1023 lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_MISS,
1024 0, "cache_miss", "cache_miss");
1025 lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_RACE,
1026 0, "cache_race", "cache_race");
1027 lprocfs_counter_init(s->ls_stats, LU_SS_CACHE_DEATH_RACE,
1028 0, "cache_death_race", "cache_death_race");
1029 lprocfs_counter_init(s->ls_stats, LU_SS_LRU_PURGED,
1030 0, "lru_purged", "lru_purged");
a0b8803a
AK
1031 /*
1032 * Unlike other counters, lru_len can be decremented so
1033 * need lc_sum instead of just lc_count
1034 */
1035 lprocfs_counter_init(s->ls_stats, LU_SS_LRU_LEN,
1036 LPROCFS_CNTR_AVGMINMAX, "lru_len", "lru_len");
d7e09d03
PT
1037
1038 INIT_LIST_HEAD(&s->ls_linkage);
1039 s->ls_top_dev = top;
1040 top->ld_site = s;
1041 lu_device_get(top);
1042 lu_ref_add(&top->ld_reference, "site-top", s);
1043
1044 INIT_LIST_HEAD(&s->ls_ld_linkage);
1045 spin_lock_init(&s->ls_ld_lock);
1046
1047 lu_dev_add_linkage(s, top);
1048
0a3bdb00 1049 return 0;
d7e09d03
PT
1050}
1051EXPORT_SYMBOL(lu_site_init);
1052
1053/**
1054 * Finalize \a s and release its resources.
1055 */
1056void lu_site_fini(struct lu_site *s)
1057{
1058 mutex_lock(&lu_sites_guard);
1059 list_del_init(&s->ls_linkage);
1060 mutex_unlock(&lu_sites_guard);
1061
cce3c2da 1062 if (s->ls_obj_hash) {
d7e09d03
PT
1063 cfs_hash_putref(s->ls_obj_hash);
1064 s->ls_obj_hash = NULL;
1065 }
1066
cce3c2da 1067 if (s->ls_top_dev) {
d7e09d03
PT
1068 s->ls_top_dev->ld_site = NULL;
1069 lu_ref_del(&s->ls_top_dev->ld_reference, "site-top", s);
1070 lu_device_put(s->ls_top_dev);
1071 s->ls_top_dev = NULL;
1072 }
1073
cce3c2da 1074 if (s->ls_stats)
d7e09d03
PT
1075 lprocfs_free_stats(&s->ls_stats);
1076}
1077EXPORT_SYMBOL(lu_site_fini);
1078
1079/**
1080 * Called when initialization of stack for this site is completed.
1081 */
1082int lu_site_init_finish(struct lu_site *s)
1083{
1084 int result;
50ffcb7e 1085
d7e09d03
PT
1086 mutex_lock(&lu_sites_guard);
1087 result = lu_context_refill(&lu_shrink_env.le_ctx);
1088 if (result == 0)
1089 list_add(&s->ls_linkage, &lu_sites);
1090 mutex_unlock(&lu_sites_guard);
1091 return result;
1092}
1093EXPORT_SYMBOL(lu_site_init_finish);
1094
1095/**
1096 * Acquire additional reference on device \a d
1097 */
1098void lu_device_get(struct lu_device *d)
1099{
1100 atomic_inc(&d->ld_ref);
1101}
1102EXPORT_SYMBOL(lu_device_get);
1103
1104/**
1105 * Release reference on device \a d.
1106 */
1107void lu_device_put(struct lu_device *d)
1108{
1109 LASSERT(atomic_read(&d->ld_ref) > 0);
1110 atomic_dec(&d->ld_ref);
1111}
1112EXPORT_SYMBOL(lu_device_put);
1113
1114/**
1115 * Initialize device \a d of type \a t.
1116 */
1117int lu_device_init(struct lu_device *d, struct lu_device_type *t)
1118{
a8610297
FY
1119 if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
1120 t->ldt_ops->ldto_start)
d7e09d03 1121 t->ldt_ops->ldto_start(t);
a8610297 1122
ec83e611 1123 memset(d, 0, sizeof(*d));
d7e09d03
PT
1124 atomic_set(&d->ld_ref, 0);
1125 d->ld_type = t;
1126 lu_ref_init(&d->ld_reference);
1127 INIT_LIST_HEAD(&d->ld_linkage);
1128 return 0;
1129}
1130EXPORT_SYMBOL(lu_device_init);
1131
1132/**
1133 * Finalize device \a d.
1134 */
1135void lu_device_fini(struct lu_device *d)
1136{
a8610297 1137 struct lu_device_type *t = d->ld_type;
d7e09d03 1138
cce3c2da 1139 if (d->ld_obd) {
d7e09d03
PT
1140 d->ld_obd->obd_lu_dev = NULL;
1141 d->ld_obd = NULL;
1142 }
1143
1144 lu_ref_fini(&d->ld_reference);
1145 LASSERTF(atomic_read(&d->ld_ref) == 0,
1146 "Refcount is %u\n", atomic_read(&d->ld_ref));
a8610297
FY
1147 LASSERT(atomic_read(&t->ldt_device_nr) > 0);
1148
1149 if (atomic_dec_and_test(&t->ldt_device_nr) &&
1150 t->ldt_ops->ldto_stop)
d7e09d03
PT
1151 t->ldt_ops->ldto_stop(t);
1152}
1153EXPORT_SYMBOL(lu_device_fini);
1154
1155/**
1156 * Initialize object \a o that is part of compound object \a h and was created
1157 * by device \a d.
1158 */
631abc6e
JH
1159int lu_object_init(struct lu_object *o, struct lu_object_header *h,
1160 struct lu_device *d)
d7e09d03 1161{
631abc6e 1162 memset(o, 0, sizeof(*o));
d7e09d03 1163 o->lo_header = h;
631abc6e 1164 o->lo_dev = d;
d7e09d03 1165 lu_device_get(d);
631abc6e 1166 lu_ref_add_at(&d->ld_reference, &o->lo_dev_ref, "lu_object", o);
d7e09d03 1167 INIT_LIST_HEAD(&o->lo_linkage);
631abc6e 1168
d7e09d03
PT
1169 return 0;
1170}
1171EXPORT_SYMBOL(lu_object_init);
1172
1173/**
1174 * Finalize object and release its resources.
1175 */
1176void lu_object_fini(struct lu_object *o)
1177{
1178 struct lu_device *dev = o->lo_dev;
1179
1180 LASSERT(list_empty(&o->lo_linkage));
1181
cce3c2da 1182 if (dev) {
631abc6e
JH
1183 lu_ref_del_at(&dev->ld_reference, &o->lo_dev_ref,
1184 "lu_object", o);
d7e09d03
PT
1185 lu_device_put(dev);
1186 o->lo_dev = NULL;
1187 }
1188}
1189EXPORT_SYMBOL(lu_object_fini);
1190
1191/**
1192 * Add object \a o as first layer of compound object \a h
1193 *
1194 * This is typically called by the ->ldo_object_alloc() method of top-level
1195 * device.
1196 */
1197void lu_object_add_top(struct lu_object_header *h, struct lu_object *o)
1198{
1199 list_move(&o->lo_linkage, &h->loh_layers);
1200}
1201EXPORT_SYMBOL(lu_object_add_top);
1202
1203/**
1204 * Add object \a o as a layer of compound object, going after \a before.
1205 *
1206 * This is typically called by the ->ldo_object_alloc() method of \a
1207 * before->lo_dev.
1208 */
1209void lu_object_add(struct lu_object *before, struct lu_object *o)
1210{
1211 list_move(&o->lo_linkage, &before->lo_linkage);
1212}
1213EXPORT_SYMBOL(lu_object_add);
1214
1215/**
1216 * Initialize compound object.
1217 */
1218int lu_object_header_init(struct lu_object_header *h)
1219{
ec83e611 1220 memset(h, 0, sizeof(*h));
d7e09d03
PT
1221 atomic_set(&h->loh_ref, 1);
1222 INIT_HLIST_NODE(&h->loh_hash);
1223 INIT_LIST_HEAD(&h->loh_lru);
1224 INIT_LIST_HEAD(&h->loh_layers);
1225 lu_ref_init(&h->loh_reference);
1226 return 0;
1227}
1228EXPORT_SYMBOL(lu_object_header_init);
1229
1230/**
1231 * Finalize compound object.
1232 */
1233void lu_object_header_fini(struct lu_object_header *h)
1234{
1235 LASSERT(list_empty(&h->loh_layers));
1236 LASSERT(list_empty(&h->loh_lru));
1237 LASSERT(hlist_unhashed(&h->loh_hash));
1238 lu_ref_fini(&h->loh_reference);
1239}
1240EXPORT_SYMBOL(lu_object_header_fini);
1241
1242/**
1243 * Given a compound object, find its slice, corresponding to the device type
1244 * \a dtype.
1245 */
1246struct lu_object *lu_object_locate(struct lu_object_header *h,
1247 const struct lu_device_type *dtype)
1248{
1249 struct lu_object *o;
1250
1251 list_for_each_entry(o, &h->loh_layers, lo_linkage) {
1252 if (o->lo_dev->ld_type == dtype)
1253 return o;
1254 }
1255 return NULL;
1256}
1257EXPORT_SYMBOL(lu_object_locate);
1258
d7e09d03
PT
1259/**
1260 * Finalize and free devices in the device stack.
1261 *
1262 * Finalize device stack by purging object cache, and calling
1263 * lu_device_type_operations::ldto_device_fini() and
1264 * lu_device_type_operations::ldto_device_free() on all devices in the stack.
1265 */
1266void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
1267{
1268 struct lu_site *site = top->ld_site;
1269 struct lu_device *scan;
1270 struct lu_device *next;
1271
1272 lu_site_purge(env, site, ~0);
cce3c2da 1273 for (scan = top; scan; scan = next) {
d7e09d03
PT
1274 next = scan->ld_type->ldt_ops->ldto_device_fini(env, scan);
1275 lu_ref_del(&scan->ld_reference, "lu-stack", &lu_site_init);
1276 lu_device_put(scan);
1277 }
1278
1279 /* purge again. */
1280 lu_site_purge(env, site, ~0);
1281
cce3c2da 1282 for (scan = top; scan; scan = next) {
d7e09d03
PT
1283 const struct lu_device_type *ldt = scan->ld_type;
1284 struct obd_type *type;
1285
1286 next = ldt->ldt_ops->ldto_device_free(env, scan);
1287 type = ldt->ldt_obd_type;
cce3c2da 1288 if (type) {
d7e09d03
PT
1289 type->typ_refcnt--;
1290 class_put_type(type);
1291 }
1292 }
1293}
1294EXPORT_SYMBOL(lu_stack_fini);
1295
1296enum {
1297 /**
1298 * Maximal number of tld slots.
1299 */
1300 LU_CONTEXT_KEY_NR = 40
1301};
1302
1303static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
1304
1305static DEFINE_SPINLOCK(lu_keys_guard);
1306
1307/**
1308 * Global counter incremented whenever key is registered, unregistered,
1309 * revived or quiesced. This is used to void unnecessary calls to
1310 * lu_context_refill(). No locking is provided, as initialization and shutdown
1311 * are supposed to be externally serialized.
1312 */
225f597c 1313static unsigned key_set_version;
d7e09d03
PT
1314
1315/**
1316 * Register new key.
1317 */
1318int lu_context_key_register(struct lu_context_key *key)
1319{
1320 int result;
1321 int i;
1322
cce3c2da
OD
1323 LASSERT(key->lct_init);
1324 LASSERT(key->lct_fini);
d7e09d03 1325 LASSERT(key->lct_tags != 0);
d7e09d03
PT
1326
1327 result = -ENFILE;
1328 spin_lock(&lu_keys_guard);
1329 for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
cce3c2da 1330 if (!lu_keys[i]) {
d7e09d03
PT
1331 key->lct_index = i;
1332 atomic_set(&key->lct_used, 1);
1333 lu_keys[i] = key;
1334 lu_ref_init(&key->lct_reference);
1335 result = 0;
1336 ++key_set_version;
1337 break;
1338 }
1339 }
1340 spin_unlock(&lu_keys_guard);
1341 return result;
1342}
1343EXPORT_SYMBOL(lu_context_key_register);
1344
1345static void key_fini(struct lu_context *ctx, int index)
1346{
cce3c2da 1347 if (ctx->lc_value && ctx->lc_value[index]) {
d7e09d03
PT
1348 struct lu_context_key *key;
1349
1350 key = lu_keys[index];
d7e09d03
PT
1351 LASSERT(atomic_read(&key->lct_used) > 1);
1352
1353 key->lct_fini(ctx, key, ctx->lc_value[index]);
1354 lu_ref_del(&key->lct_reference, "ctx", ctx);
1355 atomic_dec(&key->lct_used);
1356
d7e09d03 1357 if ((ctx->lc_tags & LCT_NOREF) == 0) {
4a1a01ea 1358#ifdef CONFIG_MODULE_UNLOAD
d7e09d03 1359 LINVRNT(module_refcount(key->lct_owner) > 0);
4a1a01ea 1360#endif
d7e09d03
PT
1361 module_put(key->lct_owner);
1362 }
1363 ctx->lc_value[index] = NULL;
1364 }
1365}
1366
1367/**
1368 * Deregister key.
1369 */
1370void lu_context_key_degister(struct lu_context_key *key)
1371{
1372 LASSERT(atomic_read(&key->lct_used) >= 1);
1373 LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
1374
1375 lu_context_key_quiesce(key);
1376
1377 ++key_set_version;
1378 spin_lock(&lu_keys_guard);
1379 key_fini(&lu_shrink_env.le_ctx, key->lct_index);
1380 if (lu_keys[key->lct_index]) {
1381 lu_keys[key->lct_index] = NULL;
1382 lu_ref_fini(&key->lct_reference);
1383 }
1384 spin_unlock(&lu_keys_guard);
1385
1386 LASSERTF(atomic_read(&key->lct_used) == 1,
1387 "key has instances: %d\n",
1388 atomic_read(&key->lct_used));
1389}
1390EXPORT_SYMBOL(lu_context_key_degister);
1391
1392/**
1393 * Register a number of keys. This has to be called after all keys have been
1394 * initialized by a call to LU_CONTEXT_KEY_INIT().
1395 */
1396int lu_context_key_register_many(struct lu_context_key *k, ...)
1397{
1398 struct lu_context_key *key = k;
1399 va_list args;
1400 int result;
1401
1402 va_start(args, k);
1403 do {
1404 result = lu_context_key_register(key);
1405 if (result)
1406 break;
1407 key = va_arg(args, struct lu_context_key *);
cce3c2da 1408 } while (key);
d7e09d03
PT
1409 va_end(args);
1410
1411 if (result != 0) {
1412 va_start(args, k);
1413 while (k != key) {
1414 lu_context_key_degister(k);
1415 k = va_arg(args, struct lu_context_key *);
1416 }
1417 va_end(args);
1418 }
1419
1420 return result;
1421}
1422EXPORT_SYMBOL(lu_context_key_register_many);
1423
1424/**
1425 * De-register a number of keys. This is a dual to
1426 * lu_context_key_register_many().
1427 */
1428void lu_context_key_degister_many(struct lu_context_key *k, ...)
1429{
1430 va_list args;
1431
1432 va_start(args, k);
1433 do {
1434 lu_context_key_degister(k);
1435 k = va_arg(args, struct lu_context_key*);
cce3c2da 1436 } while (k);
d7e09d03
PT
1437 va_end(args);
1438}
1439EXPORT_SYMBOL(lu_context_key_degister_many);
1440
1441/**
1442 * Revive a number of keys.
1443 */
1444void lu_context_key_revive_many(struct lu_context_key *k, ...)
1445{
1446 va_list args;
1447
1448 va_start(args, k);
1449 do {
1450 lu_context_key_revive(k);
1451 k = va_arg(args, struct lu_context_key*);
cce3c2da 1452 } while (k);
d7e09d03
PT
1453 va_end(args);
1454}
1455EXPORT_SYMBOL(lu_context_key_revive_many);
1456
1457/**
1458 * Quiescent a number of keys.
1459 */
1460void lu_context_key_quiesce_many(struct lu_context_key *k, ...)
1461{
1462 va_list args;
1463
1464 va_start(args, k);
1465 do {
1466 lu_context_key_quiesce(k);
1467 k = va_arg(args, struct lu_context_key*);
cce3c2da 1468 } while (k);
d7e09d03
PT
1469 va_end(args);
1470}
1471EXPORT_SYMBOL(lu_context_key_quiesce_many);
1472
1473/**
1474 * Return value associated with key \a key in context \a ctx.
1475 */
1476void *lu_context_key_get(const struct lu_context *ctx,
1477 const struct lu_context_key *key)
1478{
1479 LINVRNT(ctx->lc_state == LCS_ENTERED);
1480 LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
1481 LASSERT(lu_keys[key->lct_index] == key);
1482 return ctx->lc_value[key->lct_index];
1483}
1484EXPORT_SYMBOL(lu_context_key_get);
1485
1486/**
1487 * List of remembered contexts. XXX document me.
1488 */
1489static LIST_HEAD(lu_context_remembered);
1490
1491/**
1492 * Destroy \a key in all remembered contexts. This is used to destroy key
1493 * values in "shared" contexts (like service threads), when a module owning
1494 * the key is about to be unloaded.
1495 */
1496void lu_context_key_quiesce(struct lu_context_key *key)
1497{
1498 struct lu_context *ctx;
1499
1500 if (!(key->lct_tags & LCT_QUIESCENT)) {
1501 /*
1502 * XXX layering violation.
1503 */
26f98e82 1504 cl_env_cache_purge(~0);
d7e09d03
PT
1505 key->lct_tags |= LCT_QUIESCENT;
1506 /*
1507 * XXX memory barrier has to go here.
1508 */
1509 spin_lock(&lu_keys_guard);
926d6fb2 1510 list_for_each_entry(ctx, &lu_context_remembered, lc_remember)
d7e09d03
PT
1511 key_fini(ctx, key->lct_index);
1512 spin_unlock(&lu_keys_guard);
1513 ++key_set_version;
1514 }
1515}
1516EXPORT_SYMBOL(lu_context_key_quiesce);
1517
1518void lu_context_key_revive(struct lu_context_key *key)
1519{
1520 key->lct_tags &= ~LCT_QUIESCENT;
1521 ++key_set_version;
1522}
1523EXPORT_SYMBOL(lu_context_key_revive);
1524
1525static void keys_fini(struct lu_context *ctx)
1526{
1527 int i;
1528
cce3c2da 1529 if (!ctx->lc_value)
d7e09d03
PT
1530 return;
1531
1532 for (i = 0; i < ARRAY_SIZE(lu_keys); ++i)
1533 key_fini(ctx, i);
1534
d7279044 1535 kfree(ctx->lc_value);
d7e09d03
PT
1536 ctx->lc_value = NULL;
1537}
1538
1539static int keys_fill(struct lu_context *ctx)
1540{
1541 int i;
1542
cce3c2da 1543 LINVRNT(ctx->lc_value);
d7e09d03
PT
1544 for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
1545 struct lu_context_key *key;
1546
1547 key = lu_keys[i];
cce3c2da 1548 if (!ctx->lc_value[i] && key &&
d7e09d03
PT
1549 (key->lct_tags & ctx->lc_tags) &&
1550 /*
1551 * Don't create values for a LCT_QUIESCENT key, as this
1552 * will pin module owning a key.
1553 */
1554 !(key->lct_tags & LCT_QUIESCENT)) {
1555 void *value;
1556
cce3c2da 1557 LINVRNT(key->lct_init);
d7e09d03
PT
1558 LINVRNT(key->lct_index == i);
1559
1560 value = key->lct_init(ctx, key);
7f44cb0b 1561 if (IS_ERR(value))
d7e09d03
PT
1562 return PTR_ERR(value);
1563
d7e09d03
PT
1564 if (!(ctx->lc_tags & LCT_NOREF))
1565 try_module_get(key->lct_owner);
1566 lu_ref_add_atomic(&key->lct_reference, "ctx", ctx);
1567 atomic_inc(&key->lct_used);
1568 /*
1569 * This is the only place in the code, where an
1570 * element of ctx->lc_value[] array is set to non-NULL
1571 * value.
1572 */
1573 ctx->lc_value[i] = value;
cce3c2da 1574 if (key->lct_exit)
d7e09d03
PT
1575 ctx->lc_tags |= LCT_HAS_EXIT;
1576 }
1577 ctx->lc_version = key_set_version;
1578 }
1579 return 0;
1580}
1581
1582static int keys_init(struct lu_context *ctx)
1583{
d7279044
JL
1584 ctx->lc_value = kcalloc(ARRAY_SIZE(lu_keys), sizeof(ctx->lc_value[0]),
1585 GFP_NOFS);
cce3c2da 1586 if (likely(ctx->lc_value))
d7e09d03
PT
1587 return keys_fill(ctx);
1588
1589 return -ENOMEM;
1590}
1591
1592/**
1593 * Initialize context data-structure. Create values for all keys.
1594 */
1595int lu_context_init(struct lu_context *ctx, __u32 tags)
1596{
1597 int rc;
1598
ec83e611 1599 memset(ctx, 0, sizeof(*ctx));
d7e09d03
PT
1600 ctx->lc_state = LCS_INITIALIZED;
1601 ctx->lc_tags = tags;
1602 if (tags & LCT_REMEMBER) {
1603 spin_lock(&lu_keys_guard);
1604 list_add(&ctx->lc_remember, &lu_context_remembered);
1605 spin_unlock(&lu_keys_guard);
1606 } else {
1607 INIT_LIST_HEAD(&ctx->lc_remember);
1608 }
1609
1610 rc = keys_init(ctx);
1611 if (rc != 0)
1612 lu_context_fini(ctx);
1613
1614 return rc;
1615}
1616EXPORT_SYMBOL(lu_context_init);
1617
1618/**
1619 * Finalize context data-structure. Destroy key values.
1620 */
1621void lu_context_fini(struct lu_context *ctx)
1622{
1623 LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
1624 ctx->lc_state = LCS_FINALIZED;
1625
1626 if ((ctx->lc_tags & LCT_REMEMBER) == 0) {
1627 LASSERT(list_empty(&ctx->lc_remember));
1628 keys_fini(ctx);
1629
1630 } else { /* could race with key degister */
1631 spin_lock(&lu_keys_guard);
1632 keys_fini(ctx);
1633 list_del_init(&ctx->lc_remember);
1634 spin_unlock(&lu_keys_guard);
1635 }
1636}
1637EXPORT_SYMBOL(lu_context_fini);
1638
1639/**
1640 * Called before entering context.
1641 */
1642void lu_context_enter(struct lu_context *ctx)
1643{
1644 LINVRNT(ctx->lc_state == LCS_INITIALIZED || ctx->lc_state == LCS_LEFT);
1645 ctx->lc_state = LCS_ENTERED;
1646}
1647EXPORT_SYMBOL(lu_context_enter);
1648
1649/**
1650 * Called after exiting from \a ctx
1651 */
1652void lu_context_exit(struct lu_context *ctx)
1653{
1654 int i;
1655
1656 LINVRNT(ctx->lc_state == LCS_ENTERED);
1657 ctx->lc_state = LCS_LEFT;
cce3c2da 1658 if (ctx->lc_tags & LCT_HAS_EXIT && ctx->lc_value) {
d7e09d03 1659 for (i = 0; i < ARRAY_SIZE(lu_keys); ++i) {
cce3c2da 1660 if (ctx->lc_value[i]) {
d7e09d03
PT
1661 struct lu_context_key *key;
1662
1663 key = lu_keys[i];
cce3c2da 1664 if (key->lct_exit)
d7e09d03
PT
1665 key->lct_exit(ctx,
1666 key, ctx->lc_value[i]);
1667 }
1668 }
1669 }
1670}
1671EXPORT_SYMBOL(lu_context_exit);
1672
1673/**
1674 * Allocate for context all missing keys that were registered after context
1675 * creation. key_set_version is only changed in rare cases when modules
1676 * are loaded and removed.
1677 */
1678int lu_context_refill(struct lu_context *ctx)
1679{
1680 return likely(ctx->lc_version == key_set_version) ? 0 : keys_fill(ctx);
1681}
1682EXPORT_SYMBOL(lu_context_refill);
1683
1684/**
1685 * lu_ctx_tags/lu_ses_tags will be updated if there are new types of
1686 * obd being added. Currently, this is only used on client side, specifically
1687 * for echo device client, for other stack (like ptlrpc threads), context are
1688 * predefined when the lu_device type are registered, during the module probe
1689 * phase.
1690 */
ea28d21a
MR
1691__u32 lu_context_tags_default;
1692__u32 lu_session_tags_default;
d7e09d03 1693
d7e09d03
PT
1694int lu_env_init(struct lu_env *env, __u32 tags)
1695{
1696 int result;
1697
1698 env->le_ses = NULL;
1699 result = lu_context_init(&env->le_ctx, tags);
1700 if (likely(result == 0))
1701 lu_context_enter(&env->le_ctx);
1702 return result;
1703}
1704EXPORT_SYMBOL(lu_env_init);
1705
1706void lu_env_fini(struct lu_env *env)
1707{
1708 lu_context_exit(&env->le_ctx);
1709 lu_context_fini(&env->le_ctx);
1710 env->le_ses = NULL;
1711}
1712EXPORT_SYMBOL(lu_env_fini);
1713
1714int lu_env_refill(struct lu_env *env)
1715{
1716 int result;
1717
1718 result = lu_context_refill(&env->le_ctx);
cce3c2da 1719 if (result == 0 && env->le_ses)
d7e09d03
PT
1720 result = lu_context_refill(env->le_ses);
1721 return result;
1722}
1723EXPORT_SYMBOL(lu_env_refill);
1724
2de5855c 1725struct lu_site_stats {
d7e09d03
PT
1726 unsigned lss_populated;
1727 unsigned lss_max_search;
1728 unsigned lss_total;
1729 unsigned lss_busy;
2de5855c 1730};
d7e09d03 1731
6da6eabe 1732static void lu_site_stats_get(struct cfs_hash *hs,
2de5855c 1733 struct lu_site_stats *stats, int populated)
d7e09d03 1734{
6ea510c1 1735 struct cfs_hash_bd bd;
d7e09d03
PT
1736 int i;
1737
1738 cfs_hash_for_each_bucket(hs, &bd, i) {
1739 struct lu_site_bkt_data *bkt = cfs_hash_bd_extra_get(hs, &bd);
1740 struct hlist_head *hhead;
1741
1742 cfs_hash_bd_lock(hs, &bd, 1);
6e580ab5
FZ
1743 stats->lss_busy +=
1744 cfs_hash_bd_count_get(&bd) - bkt->lsb_lru_len;
d7e09d03
PT
1745 stats->lss_total += cfs_hash_bd_count_get(&bd);
1746 stats->lss_max_search = max((int)stats->lss_max_search,
1747 cfs_hash_bd_depmax_get(&bd));
1748 if (!populated) {
1749 cfs_hash_bd_unlock(hs, &bd, 1);
1750 continue;
1751 }
1752
1753 cfs_hash_bd_for_each_hlist(hs, &bd, hhead) {
1754 if (!hlist_empty(hhead))
1755 stats->lss_populated++;
1756 }
1757 cfs_hash_bd_unlock(hs, &bd, 1);
1758 }
1759}
1760
d7e09d03 1761/*
a0b8803a
AK
1762 * lu_cache_shrink_count returns the number of cached objects that are
1763 * candidates to be freed by shrink_slab(). A counter, which tracks
1764 * the number of items in the site's lru, is maintained in the per cpu
1765 * stats of each site. The counter is incremented when an object is added
1766 * to a site's lru and decremented when one is removed. The number of
1767 * free-able objects is the sum of all per cpu counters for all sites.
d7e09d03 1768 *
a0b8803a
AK
1769 * Using a per cpu counter is a compromise solution to concurrent access:
1770 * lu_object_put() can update the counter without locking the site and
1771 * lu_cache_shrink_count can sum the counters without locking each
1772 * ls_obj_hash bucket.
d7e09d03 1773 */
fe92a055
PT
1774static unsigned long lu_cache_shrink_count(struct shrinker *sk,
1775 struct shrink_control *sc)
d7e09d03 1776{
d7e09d03
PT
1777 struct lu_site *s;
1778 struct lu_site *tmp;
fe92a055 1779 unsigned long cached = 0;
d7e09d03 1780
fe92a055
PT
1781 if (!(sc->gfp_mask & __GFP_FS))
1782 return 0;
d7e09d03
PT
1783
1784 mutex_lock(&lu_sites_guard);
1785 list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
a0b8803a 1786 cached += ls_stats_read(s->ls_stats, LU_SS_LRU_LEN);
d7e09d03 1787 }
d7e09d03
PT
1788 mutex_unlock(&lu_sites_guard);
1789
1790 cached = (cached / 100) * sysctl_vfs_cache_pressure;
a0b8803a
AK
1791 CDEBUG(D_INODE, "%ld objects cached, cache pressure %d\n",
1792 cached, sysctl_vfs_cache_pressure);
1793
d7e09d03
PT
1794 return cached;
1795}
1796
fe92a055
PT
1797static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
1798 struct shrink_control *sc)
1799{
1800 struct lu_site *s;
1801 struct lu_site *tmp;
1802 unsigned long remain = sc->nr_to_scan, freed = 0;
1803 LIST_HEAD(splice);
1804
1805 if (!(sc->gfp_mask & __GFP_FS))
1806 /* We must not take the lu_sites_guard lock when
1807 * __GFP_FS is *not* set because of the deadlock
1808 * possibility detailed above. Additionally,
1809 * since we cannot determine the number of
1810 * objects in the cache without taking this
1811 * lock, we're in a particularly tough spot. As
1812 * a result, we'll just lie and say our cache is
1813 * empty. This _should_ be ok, as we can't
1814 * reclaim objects when __GFP_FS is *not* set
1815 * anyways.
1816 */
1817 return SHRINK_STOP;
1818
1819 mutex_lock(&lu_sites_guard);
1820 list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
1821 freed = lu_site_purge(&lu_shrink_env, s, remain);
1822 remain -= freed;
1823 /*
1824 * Move just shrunk site to the tail of site list to
1825 * assure shrinking fairness.
1826 */
1827 list_move_tail(&s->ls_linkage, &splice);
1828 }
1829 list_splice(&splice, lu_sites.prev);
1830 mutex_unlock(&lu_sites_guard);
1831
1832 return sc->nr_to_scan - remain;
1833}
1834
d7e09d03
PT
1835/**
1836 * Debugging printer function using printk().
1837 */
fe92a055
PT
1838static struct shrinker lu_site_shrinker = {
1839 .count_objects = lu_cache_shrink_count,
1840 .scan_objects = lu_cache_shrink_scan,
1841 .seeks = DEFAULT_SEEKS,
1842};
1843
d7e09d03
PT
1844/**
1845 * Initialization of global lu_* data.
1846 */
1847int lu_global_init(void)
1848{
1849 int result;
1850
1851 CDEBUG(D_INFO, "Lustre LU module (%p).\n", &lu_keys);
1852
1853 result = lu_ref_global_init();
1854 if (result != 0)
1855 return result;
1856
1857 LU_CONTEXT_KEY_INIT(&lu_global_key);
1858 result = lu_context_key_register(&lu_global_key);
1859 if (result != 0)
1860 return result;
1861
1862 /*
1863 * At this level, we don't know what tags are needed, so allocate them
1864 * conservatively. This should not be too bad, because this
1865 * environment is global.
1866 */
1867 mutex_lock(&lu_sites_guard);
1868 result = lu_env_init(&lu_shrink_env, LCT_SHRINKER);
1869 mutex_unlock(&lu_sites_guard);
1870 if (result != 0)
1871 return result;
1872
1873 /*
1874 * seeks estimation: 3 seeks to read a record from oi, one to read
1875 * inode, one for ea. Unfortunately setting this high value results in
1876 * lu_object/inode cache consuming all the memory.
1877 */
fe92a055 1878 register_shrinker(&lu_site_shrinker);
d7e09d03
PT
1879
1880 return result;
1881}
1882
1883/**
1884 * Dual to lu_global_init().
1885 */
1886void lu_global_fini(void)
1887{
fe92a055 1888 unregister_shrinker(&lu_site_shrinker);
d7e09d03
PT
1889 lu_context_key_degister(&lu_global_key);
1890
1891 /*
1892 * Tear shrinker environment down _after_ de-registering
1893 * lu_global_key, because the latter has a value in the former.
1894 */
1895 mutex_lock(&lu_sites_guard);
1896 lu_env_fini(&lu_shrink_env);
1897 mutex_unlock(&lu_sites_guard);
1898
1899 lu_ref_global_fini();
1900}
1901
1902static __u32 ls_stats_read(struct lprocfs_stats *stats, int idx)
1903{
d7e09d03
PT
1904 struct lprocfs_counter ret;
1905
1906 lprocfs_stats_collect(stats, idx, &ret);
a0b8803a
AK
1907 if (idx == LU_SS_LRU_LEN)
1908 /*
1909 * protect against counter on cpu A being decremented
1910 * before counter is incremented on cpu B; unlikely
1911 */
1912 return (__u32)((ret.lc_sum > 0) ? ret.lc_sum : 0);
1913
d7e09d03 1914 return (__u32)ret.lc_count;
d7e09d03
PT
1915}
1916
1917/**
1918 * Output site statistical counters into a buffer. Suitable for
1919 * lprocfs_rd_*()-style functions.
1920 */
73bb1da6 1921int lu_site_stats_print(const struct lu_site *s, struct seq_file *m)
d7e09d03 1922{
2de5855c 1923 struct lu_site_stats stats;
d7e09d03
PT
1924
1925 memset(&stats, 0, sizeof(stats));
1926 lu_site_stats_get(s->ls_obj_hash, &stats, 1);
1927
a0b8803a 1928 seq_printf(m, "%d/%d %d/%d %d %d %d %d %d %d %d %d\n",
8faeebdf
JP
1929 stats.lss_busy,
1930 stats.lss_total,
1931 stats.lss_populated,
1932 CFS_HASH_NHLIST(s->ls_obj_hash),
1933 stats.lss_max_search,
1934 ls_stats_read(s->ls_stats, LU_SS_CREATED),
1935 ls_stats_read(s->ls_stats, LU_SS_CACHE_HIT),
1936 ls_stats_read(s->ls_stats, LU_SS_CACHE_MISS),
1937 ls_stats_read(s->ls_stats, LU_SS_CACHE_RACE),
1938 ls_stats_read(s->ls_stats, LU_SS_CACHE_DEATH_RACE),
a0b8803a
AK
1939 ls_stats_read(s->ls_stats, LU_SS_LRU_PURGED),
1940 ls_stats_read(s->ls_stats, LU_SS_LRU_LEN));
8faeebdf 1941 return 0;
d7e09d03
PT
1942}
1943EXPORT_SYMBOL(lu_site_stats_print);
1944
1945/**
1946 * Helper function to initialize a number of kmem slab caches at once.
1947 */
1948int lu_kmem_init(struct lu_kmem_descr *caches)
1949{
1950 int result;
1951 struct lu_kmem_descr *iter = caches;
1952
cce3c2da 1953 for (result = 0; iter->ckd_cache; ++iter) {
d7e09d03
PT
1954 *iter->ckd_cache = kmem_cache_create(iter->ckd_name,
1955 iter->ckd_size,
1956 0, 0, NULL);
cce3c2da 1957 if (!*iter->ckd_cache) {
d7e09d03
PT
1958 result = -ENOMEM;
1959 /* free all previously allocated caches */
1960 lu_kmem_fini(caches);
1961 break;
1962 }
1963 }
1964 return result;
1965}
1966EXPORT_SYMBOL(lu_kmem_init);
1967
1968/**
1969 * Helper function to finalize a number of kmem slab cached at once. Dual to
1970 * lu_kmem_init().
1971 */
1972void lu_kmem_fini(struct lu_kmem_descr *caches)
1973{
cce3c2da 1974 for (; caches->ckd_cache; ++caches) {
ce85ed4d
JL
1975 kmem_cache_destroy(*caches->ckd_cache);
1976 *caches->ckd_cache = NULL;
d7e09d03
PT
1977 }
1978}
1979EXPORT_SYMBOL(lu_kmem_fini);
This page took 0.682983 seconds and 5 git commands to generate.