staging/lustre/lov: Adjust NULL comparison codestyle
[deliverable/linux.git] / drivers / staging / lustre / lustre / llite / dir.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26/*
27 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
1dc563a6 30 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
31 */
32/*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lustre/llite/dir.c
37 *
38 * Directory code for lustre client.
39 */
40
41#include <linux/fs.h>
42#include <linux/pagemap.h>
43#include <linux/mm.h>
31982482 44#include <linux/uaccess.h>
995c8b4a 45#include <linux/buffer_head.h> /* for wait_on_buffer */
d7e09d03 46#include <linux/pagevec.h>
2870cd10 47#include <linux/prefetch.h>
d7e09d03
PT
48
49#define DEBUG_SUBSYSTEM S_LLITE
50
67a235f5
GKH
51#include "../include/obd_support.h"
52#include "../include/obd_class.h"
53#include "../include/lustre_lib.h"
54#include "../include/lustre/lustre_idl.h"
55#include "../include/lustre_lite.h"
56#include "../include/lustre_dlm.h"
57#include "../include/lustre_fid.h"
e2780478 58#include "../include/lustre_kernelcomm.h"
d7e09d03
PT
59#include "llite_internal.h"
60
61/*
62 * (new) readdir implementation overview.
63 *
64 * Original lustre readdir implementation cached exact copy of raw directory
65 * pages on the client. These pages were indexed in client page cache by
66 * logical offset in the directory file. This design, while very simple and
67 * intuitive had some inherent problems:
68 *
69 * . it implies that byte offset to the directory entry serves as a
70 * telldir(3)/seekdir(3) cookie, but that offset is not stable: in
71 * ext3/htree directory entries may move due to splits, and more
72 * importantly,
73 *
74 * . it is incompatible with the design of split directories for cmd3,
75 * that assumes that names are distributed across nodes based on their
76 * hash, and so readdir should be done in hash order.
77 *
78 * New readdir implementation does readdir in hash order, and uses hash of a
79 * file name as a telldir/seekdir cookie. This led to number of complications:
80 *
81 * . hash is not unique, so it cannot be used to index cached directory
82 * pages on the client (note, that it requires a whole pageful of hash
83 * collided entries to cause two pages to have identical hashes);
84 *
85 * . hash is not unique, so it cannot, strictly speaking, be used as an
86 * entry cookie. ext3/htree has the same problem and lustre implementation
87 * mimics their solution: seekdir(hash) positions directory at the first
88 * entry with the given hash.
89 *
90 * Client side.
91 *
92 * 0. caching
93 *
94 * Client caches directory pages using hash of the first entry as an index. As
95 * noted above hash is not unique, so this solution doesn't work as is:
96 * special processing is needed for "page hash chains" (i.e., sequences of
97 * pages filled with entries all having the same hash value).
98 *
99 * First, such chains have to be detected. To this end, server returns to the
100 * client the hash of the first entry on the page next to one returned. When
101 * client detects that this hash is the same as hash of the first entry on the
102 * returned page, page hash collision has to be handled. Pages in the
103 * hash chain, except first one, are termed "overflow pages".
104 *
105 * Solution to index uniqueness problem is to not cache overflow
106 * pages. Instead, when page hash collision is detected, all overflow pages
107 * from emerging chain are immediately requested from the server and placed in
108 * a special data structure (struct ll_dir_chain). This data structure is used
109 * by ll_readdir() to process entries from overflow pages. When readdir
110 * invocation finishes, overflow pages are discarded. If page hash collision
111 * chain weren't completely processed, next call to readdir will again detect
112 * page hash collision, again read overflow pages in, process next portion of
113 * entries and again discard the pages. This is not as wasteful as it looks,
114 * because, given reasonable hash, page hash collisions are extremely rare.
115 *
116 * 1. directory positioning
117 *
118 * When seekdir(hash) is called, original
119 *
120 *
121 *
122 *
123 *
124 *
125 *
126 *
127 * Server.
128 *
129 * identification of and access to overflow pages
130 *
131 * page format
132 *
133 * Page in MDS_READPAGE RPC is packed in LU_PAGE_SIZE, and each page contains
134 * a header lu_dirpage which describes the start/end hash, and whether this
135 * page is empty (contains no dir entry) or hash collide with next page.
136 * After client receives reply, several pages will be integrated into dir page
137 * in PAGE_CACHE_SIZE (if PAGE_CACHE_SIZE greater than LU_PAGE_SIZE), and the
138 * lu_dirpage for this integrated page will be adjusted. See
139 * lmv_adjust_dirpages().
140 *
141 */
142
143/* returns the page unlocked, but with a reference */
144static int ll_dir_filler(void *_hash, struct page *page0)
145{
146 struct inode *inode = page0->mapping->host;
147 int hash64 = ll_i2sbi(inode)->ll_flags & LL_SBI_64BIT_HASH;
148 struct obd_export *exp = ll_i2sbi(inode)->ll_md_exp;
149 struct ptlrpc_request *request;
150 struct mdt_body *body;
151 struct md_op_data *op_data;
152 __u64 hash = *((__u64 *)_hash);
153 struct page **page_pool;
154 struct page *page;
155 struct lu_dirpage *dp;
156 int max_pages = ll_i2sbi(inode)->ll_md_brw_size >> PAGE_CACHE_SHIFT;
157 int nrdpgs = 0; /* number of pages read actually */
158 int npages;
159 int i;
160 int rc;
d7e09d03 161
b0f5aad5 162 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) hash %llu\n",
d7e09d03
PT
163 inode->i_ino, inode->i_generation, inode, hash);
164
165 LASSERT(max_pages > 0 && max_pages <= MD_MAX_BRW_PAGES);
166
0fa3b9d3 167 page_pool = kcalloc(max_pages, sizeof(page), GFP_NOFS);
496a51bd 168 if (page_pool) {
d7e09d03
PT
169 page_pool[0] = page0;
170 } else {
171 page_pool = &page0;
172 max_pages = 1;
173 }
174 for (npages = 1; npages < max_pages; npages++) {
175 page = page_cache_alloc_cold(inode->i_mapping);
176 if (!page)
177 break;
178 page_pool[npages] = page;
179 }
180
181 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
182 LUSTRE_OPC_ANY, NULL);
183 op_data->op_npages = npages;
184 op_data->op_offset = hash;
185 rc = md_readpage(exp, op_data, page_pool, &request);
186 ll_finish_md_op_data(op_data);
c67587a7
LS
187 if (rc < 0) {
188 /* page0 is special, which was added into page cache early */
189 delete_from_page_cache(page0);
190 } else if (rc == 0) {
d7e09d03
PT
191 body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY);
192 /* Checked by mdc_readpage() */
193 LASSERT(body != NULL);
194
195 if (body->valid & OBD_MD_FLSIZE)
196 cl_isize_write(inode, body->size);
197
198 nrdpgs = (request->rq_bulk->bd_nob_transferred+PAGE_CACHE_SIZE-1)
199 >> PAGE_CACHE_SHIFT;
200 SetPageUptodate(page0);
201 }
202 unlock_page(page0);
203 ptlrpc_req_finished(request);
204
205 CDEBUG(D_VFSTRACE, "read %d/%d pages\n", nrdpgs, npages);
206
d7e09d03
PT
207 for (i = 1; i < npages; i++) {
208 unsigned long offset;
209 int ret;
210
211 page = page_pool[i];
212
213 if (rc < 0 || i >= nrdpgs) {
214 page_cache_release(page);
215 continue;
216 }
217
218 SetPageUptodate(page);
219
220 dp = kmap(page);
221 hash = le64_to_cpu(dp->ldp_hash_start);
222 kunmap(page);
223
224 offset = hash_x_index(hash, hash64);
225
226 prefetchw(&page->flags);
227 ret = add_to_page_cache_lru(page, inode->i_mapping, offset,
063d99b4 228 GFP_NOFS);
d7e09d03
PT
229 if (ret == 0) {
230 unlock_page(page);
d7e09d03 231 } else {
2d00bd17
JP
232 CDEBUG(D_VFSTRACE, "page %lu add to page cache failed: %d\n",
233 offset, ret);
d7e09d03
PT
234 }
235 page_cache_release(page);
236 }
d7e09d03
PT
237
238 if (page_pool != &page0)
97903a26 239 kfree(page_pool);
d7e09d03
PT
240 return rc;
241}
242
d7e09d03
PT
243void ll_release_page(struct page *page, int remove)
244{
245 kunmap(page);
246 if (remove) {
247 lock_page(page);
248 if (likely(page->mapping != NULL))
249 truncate_complete_page(page->mapping, page);
250 unlock_page(page);
251 }
252 page_cache_release(page);
253}
254
255/*
256 * Find, kmap and return page that contains given hash.
257 */
258static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
259 __u64 *start, __u64 *end)
260{
261 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
262 struct address_space *mapping = dir->i_mapping;
263 /*
264 * Complement of hash is used as an index so that
265 * radix_tree_gang_lookup() can be used to find a page with starting
266 * hash _smaller_ than one we are looking for.
267 */
268 unsigned long offset = hash_x_index(*hash, hash64);
269 struct page *page;
270 int found;
271
12afe493 272 spin_lock_irq(&mapping->tree_lock);
d7e09d03
PT
273 found = radix_tree_gang_lookup(&mapping->page_tree,
274 (void **)&page, offset, 1);
437dfb20 275 if (found > 0 && !radix_tree_exceptional_entry(page)) {
d7e09d03
PT
276 struct lu_dirpage *dp;
277
278 page_cache_get(page);
12afe493 279 spin_unlock_irq(&mapping->tree_lock);
d7e09d03
PT
280 /*
281 * In contrast to find_lock_page() we are sure that directory
282 * page cannot be truncated (while DLM lock is held) and,
283 * hence, can avoid restart.
284 *
285 * In fact, page cannot be locked here at all, because
286 * ll_dir_filler() does synchronous io.
287 */
288 wait_on_page_locked(page);
289 if (PageUptodate(page)) {
290 dp = kmap(page);
291 if (BITS_PER_LONG == 32 && hash64) {
292 *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
293 *end = le64_to_cpu(dp->ldp_hash_end) >> 32;
294 *hash = *hash >> 32;
295 } else {
296 *start = le64_to_cpu(dp->ldp_hash_start);
297 *end = le64_to_cpu(dp->ldp_hash_end);
298 }
55f5a824
GKH
299 LASSERTF(*start <= *hash, "start = %#llx,end = %#llx,hash = %#llx\n",
300 *start, *end, *hash);
b0f5aad5 301 CDEBUG(D_VFSTRACE, "page %lu [%llu %llu], hash %llu\n",
d7e09d03
PT
302 offset, *start, *end, *hash);
303 if (*hash > *end) {
304 ll_release_page(page, 0);
305 page = NULL;
306 } else if (*end != *start && *hash == *end) {
307 /*
308 * upon hash collision, remove this page,
309 * otherwise put page reference, and
310 * ll_get_dir_page() will issue RPC to fetch
311 * the page we want.
312 */
313 ll_release_page(page,
314 le32_to_cpu(dp->ldp_flags) & LDF_COLLIDE);
315 page = NULL;
316 }
317 } else {
318 page_cache_release(page);
319 page = ERR_PTR(-EIO);
320 }
321
322 } else {
12afe493 323 spin_unlock_irq(&mapping->tree_lock);
d7e09d03
PT
324 page = NULL;
325 }
326 return page;
327}
328
329struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
330 struct ll_dir_chain *chain)
331{
332 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
333 struct address_space *mapping = dir->i_mapping;
334 struct lustre_handle lockh;
335 struct lu_dirpage *dp;
336 struct page *page;
337 ldlm_mode_t mode;
338 int rc;
339 __u64 start = 0;
340 __u64 end = 0;
341 __u64 lhash = hash;
342 struct ll_inode_info *lli = ll_i2info(dir);
343 int hash64 = ll_i2sbi(dir)->ll_flags & LL_SBI_64BIT_HASH;
344
345 mode = LCK_PR;
346 rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
347 ll_inode2fid(dir), LDLM_IBITS, &policy, mode, &lockh);
348 if (!rc) {
f2145eae
BK
349 struct ldlm_enqueue_info einfo = {
350 .ei_type = LDLM_IBITS,
351 .ei_mode = mode,
352 .ei_cb_bl = ll_md_blocking_ast,
353 .ei_cb_cp = ldlm_completion_ast,
354 };
d7e09d03
PT
355 struct lookup_intent it = { .it_op = IT_READDIR };
356 struct ptlrpc_request *request;
357 struct md_op_data *op_data;
358
588de43a 359 op_data = ll_prep_md_op_data(NULL, dir, dir, NULL, 0, 0,
d7e09d03
PT
360 LUSTRE_OPC_ANY, NULL);
361 if (IS_ERR(op_data))
362 return (void *)op_data;
363
364 rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, &einfo, &it,
365 op_data, &lockh, NULL, 0, NULL, 0);
366
367 ll_finish_md_op_data(op_data);
368
369 request = (struct ptlrpc_request *)it.d.lustre.it_data;
370 if (request)
371 ptlrpc_req_finished(request);
372 if (rc < 0) {
b0f5aad5 373 CERROR("lock enqueue: "DFID" at %llu: rc %d\n",
d7e09d03
PT
374 PFID(ll_inode2fid(dir)), hash, rc);
375 return ERR_PTR(rc);
376 }
377
378 CDEBUG(D_INODE, "setting lr_lvb_inode to inode %p (%lu/%u)\n",
379 dir, dir->i_ino, dir->i_generation);
380 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp,
381 &it.d.lustre.it_lock_handle, dir, NULL);
382 } else {
383 /* for cross-ref object, l_ast_data of the lock may not be set,
384 * we reset it here */
385 md_set_lock_data(ll_i2sbi(dir)->ll_md_exp, &lockh.cookie,
386 dir, NULL);
387 }
388 ldlm_lock_dump_handle(D_OTHER, &lockh);
389
390 mutex_lock(&lli->lli_readdir_mutex);
391 page = ll_dir_page_locate(dir, &lhash, &start, &end);
392 if (IS_ERR(page)) {
b0f5aad5 393 CERROR("dir page locate: "DFID" at %llu: rc %ld\n",
d7e09d03 394 PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
34e1f2bb 395 goto out_unlock;
d7e09d03
PT
396 } else if (page != NULL) {
397 /*
398 * XXX nikita: not entirely correct handling of a corner case:
399 * suppose hash chain of entries with hash value HASH crosses
400 * border between pages P0 and P1. First both P0 and P1 are
401 * cached, seekdir() is called for some entry from the P0 part
402 * of the chain. Later P0 goes out of cache. telldir(HASH)
403 * happens and finds P1, as it starts with matching hash
404 * value. Remaining entries from P0 part of the chain are
405 * skipped. (Is that really a bug?)
406 *
407 * Possible solutions: 0. don't cache P1 is such case, handle
408 * it as an "overflow" page. 1. invalidate all pages at
409 * once. 2. use HASH|1 as an index for P1.
410 */
34e1f2bb 411 goto hash_collision;
d7e09d03
PT
412 }
413
414 page = read_cache_page(mapping, hash_x_index(hash, hash64),
415 ll_dir_filler, &lhash);
416 if (IS_ERR(page)) {
b0f5aad5 417 CERROR("read cache page: "DFID" at %llu: rc %ld\n",
d7e09d03 418 PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
34e1f2bb 419 goto out_unlock;
d7e09d03
PT
420 }
421
422 wait_on_page_locked(page);
423 (void)kmap(page);
424 if (!PageUptodate(page)) {
b0f5aad5 425 CERROR("page not updated: "DFID" at %llu: rc %d\n",
d7e09d03
PT
426 PFID(ll_inode2fid(dir)), hash, -5);
427 goto fail;
428 }
429 if (!PageChecked(page))
c6ef5b91
SB
430 /* XXX: check page format later */
431 SetPageChecked(page);
d7e09d03 432 if (PageError(page)) {
b0f5aad5 433 CERROR("page error: "DFID" at %llu: rc %d\n",
d7e09d03
PT
434 PFID(ll_inode2fid(dir)), hash, -5);
435 goto fail;
436 }
437hash_collision:
438 dp = page_address(page);
439 if (BITS_PER_LONG == 32 && hash64) {
440 start = le64_to_cpu(dp->ldp_hash_start) >> 32;
441 end = le64_to_cpu(dp->ldp_hash_end) >> 32;
442 lhash = hash >> 32;
443 } else {
444 start = le64_to_cpu(dp->ldp_hash_start);
445 end = le64_to_cpu(dp->ldp_hash_end);
446 lhash = hash;
447 }
448 if (end == start) {
449 LASSERT(start == lhash);
b0f5aad5 450 CWARN("Page-wide hash collision: %llu\n", end);
d7e09d03 451 if (BITS_PER_LONG == 32 && hash64)
b0f5aad5 452 CWARN("Real page-wide hash collision at [%llu %llu] with hash %llu\n",
d7e09d03
PT
453 le64_to_cpu(dp->ldp_hash_start),
454 le64_to_cpu(dp->ldp_hash_end), hash);
455 /*
456 * Fetch whole overflow chain...
457 *
458 * XXX not yet.
459 */
460 goto fail;
461 }
462out_unlock:
463 mutex_unlock(&lli->lli_readdir_mutex);
464 ldlm_lock_decref(&lockh, mode);
465 return page;
466
467fail:
468 ll_release_page(page, 1);
469 page = ERR_PTR(-EIO);
470 goto out_unlock;
471}
472
0b09d381 473int ll_dir_read(struct inode *inode, struct dir_context *ctx)
d7e09d03
PT
474{
475 struct ll_inode_info *info = ll_i2info(inode);
476 struct ll_sb_info *sbi = ll_i2sbi(inode);
0b09d381 477 __u64 pos = ctx->pos;
d7e09d03
PT
478 int api32 = ll_need_32bit_api(sbi);
479 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
480 struct page *page;
481 struct ll_dir_chain chain;
482 int done = 0;
483 int rc = 0;
d7e09d03
PT
484
485 ll_dir_chain_init(&chain);
486
487 page = ll_get_dir_page(inode, pos, &chain);
488
489 while (rc == 0 && !done) {
490 struct lu_dirpage *dp;
491 struct lu_dirent *ent;
492
493 if (!IS_ERR(page)) {
494 /*
495 * If page is empty (end of directory is reached),
496 * use this value.
497 */
498 __u64 hash = MDS_DIR_END_OFF;
499 __u64 next;
500
501 dp = page_address(page);
502 for (ent = lu_dirent_start(dp); ent != NULL && !done;
503 ent = lu_dirent_next(ent)) {
504 __u16 type;
505 int namelen;
506 struct lu_fid fid;
507 __u64 lhash;
508 __u64 ino;
509
510 /*
511 * XXX: implement correct swabbing here.
512 */
513
514 hash = le64_to_cpu(ent->lde_hash);
515 if (hash < pos)
516 /*
517 * Skip until we find target hash
518 * value.
519 */
520 continue;
521
522 namelen = le16_to_cpu(ent->lde_namelen);
523 if (namelen == 0)
524 /*
525 * Skip dummy record.
526 */
527 continue;
528
529 if (api32 && hash64)
530 lhash = hash >> 32;
531 else
532 lhash = hash;
533 fid_le_to_cpu(&fid, &ent->lde_fid);
534 ino = cl_fid_build_ino(&fid, api32);
535 type = ll_dirent_type_get(ent);
0b09d381 536 ctx->pos = lhash;
d7e09d03
PT
537 /* For 'll_nfs_get_name_filldir()', it will try
538 * to access the 'ent' through its 'lde_name',
0b09d381
PT
539 * so the parameter 'name' for 'ctx->actor()'
540 * must be part of the 'ent'.
541 */
542 done = !dir_emit(ctx, ent->lde_name,
543 namelen, ino, type);
d7e09d03
PT
544 }
545 next = le64_to_cpu(dp->ldp_hash_end);
546 if (!done) {
547 pos = next;
548 if (pos == MDS_DIR_END_OFF) {
549 /*
550 * End of directory reached.
551 */
552 done = 1;
553 ll_release_page(page, 0);
554 } else if (1 /* chain is exhausted*/) {
555 /*
556 * Normal case: continue to the next
557 * page.
558 */
559 ll_release_page(page,
560 le32_to_cpu(dp->ldp_flags) &
561 LDF_COLLIDE);
562 next = pos;
563 page = ll_get_dir_page(inode, pos,
564 &chain);
565 } else {
566 /*
567 * go into overflow page.
568 */
569 LASSERT(le32_to_cpu(dp->ldp_flags) &
570 LDF_COLLIDE);
571 ll_release_page(page, 1);
572 }
573 } else {
574 pos = hash;
575 ll_release_page(page, 0);
576 }
577 } else {
578 rc = PTR_ERR(page);
579 CERROR("error reading dir "DFID" at %lu: rc %d\n",
580 PFID(&info->lli_fid), (unsigned long)pos, rc);
581 }
582 }
583
0b09d381 584 ctx->pos = pos;
d7e09d03 585 ll_dir_chain_fini(&chain);
0a3bdb00 586 return rc;
d7e09d03
PT
587}
588
0b09d381 589static int ll_readdir(struct file *filp, struct dir_context *ctx)
d7e09d03 590{
2a8a3597 591 struct inode *inode = file_inode(filp);
d7e09d03
PT
592 struct ll_file_data *lfd = LUSTRE_FPRIVATE(filp);
593 struct ll_sb_info *sbi = ll_i2sbi(inode);
d7e09d03
PT
594 int hash64 = sbi->ll_flags & LL_SBI_64BIT_HASH;
595 int api32 = ll_need_32bit_api(sbi);
596 int rc;
d7e09d03 597
2d00bd17
JP
598 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) pos %lu/%llu 32bit_api %d\n",
599 inode->i_ino, inode->i_generation,
0b09d381 600 inode, (unsigned long)lfd->lfd_pos, i_size_read(inode), api32);
d7e09d03 601
34e1f2bb 602 if (lfd->lfd_pos == MDS_DIR_END_OFF) {
d7e09d03
PT
603 /*
604 * end-of-file.
605 */
34e1f2bb
JL
606 rc = 0;
607 goto out;
608 }
d7e09d03 609
0b09d381
PT
610 ctx->pos = lfd->lfd_pos;
611 rc = ll_dir_read(inode, ctx);
612 lfd->lfd_pos = ctx->pos;
613 if (ctx->pos == MDS_DIR_END_OFF) {
d7e09d03 614 if (api32)
0b09d381 615 ctx->pos = LL_DIR_END_OFF_32BIT;
d7e09d03 616 else
0b09d381 617 ctx->pos = LL_DIR_END_OFF;
d7e09d03
PT
618 } else {
619 if (api32 && hash64)
0b09d381 620 ctx->pos >>= 32;
d7e09d03
PT
621 }
622 filp->f_version = inode->i_version;
d7e09d03
PT
623
624out:
625 if (!rc)
626 ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1);
627
0a3bdb00 628 return rc;
d7e09d03
PT
629}
630
2d95f10e 631static int ll_send_mgc_param(struct obd_export *mgc, char *string)
d7e09d03
PT
632{
633 struct mgs_send_param *msp;
634 int rc = 0;
635
496a51bd 636 msp = kzalloc(sizeof(*msp), GFP_NOFS);
d7e09d03
PT
637 if (!msp)
638 return -ENOMEM;
639
9563fe8a 640 strlcpy(msp->mgs_param, string, sizeof(msp->mgs_param));
d7e09d03
PT
641 rc = obd_set_info_async(NULL, mgc, sizeof(KEY_SET_INFO), KEY_SET_INFO,
642 sizeof(struct mgs_send_param), msp, NULL);
643 if (rc)
644 CERROR("Failed to set parameter: %d\n", rc);
97903a26 645 kfree(msp);
d7e09d03
PT
646
647 return rc;
648}
649
920b4f2e
LC
650static int ll_dir_setdirstripe(struct inode *dir, struct lmv_user_md *lump,
651 char *filename)
d7e09d03
PT
652{
653 struct ptlrpc_request *request = NULL;
654 struct md_op_data *op_data;
655 struct ll_sb_info *sbi = ll_i2sbi(dir);
656 int mode;
657 int err;
658
1f6eaf83 659 mode = (~current_umask() & 0755) | S_IFDIR;
d7e09d03
PT
660 op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
661 strlen(filename), mode, LUSTRE_OPC_MKDIR,
662 lump);
34e1f2bb
JL
663 if (IS_ERR(op_data)) {
664 err = PTR_ERR(op_data);
665 goto err_exit;
666 }
d7e09d03
PT
667
668 op_data->op_cli_flags |= CLI_SET_MEA;
669 err = md_create(sbi->ll_md_exp, op_data, lump, sizeof(*lump), mode,
4b1a25f0
PT
670 from_kuid(&init_user_ns, current_fsuid()),
671 from_kgid(&init_user_ns, current_fsgid()),
d7e09d03
PT
672 cfs_curproc_cap_pack(), 0, &request);
673 ll_finish_md_op_data(op_data);
674 if (err)
34e1f2bb 675 goto err_exit;
d7e09d03
PT
676err_exit:
677 ptlrpc_req_finished(request);
678 return err;
679}
680
681int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump,
682 int set_default)
683{
684 struct ll_sb_info *sbi = ll_i2sbi(inode);
685 struct md_op_data *op_data;
686 struct ptlrpc_request *req = NULL;
687 int rc = 0;
688 struct lustre_sb_info *lsi = s2lsi(inode->i_sb);
689 struct obd_device *mgc = lsi->lsi_mgc;
690 int lum_size;
d7e09d03
PT
691
692 if (lump != NULL) {
693 /*
694 * This is coming from userspace, so should be in
695 * local endian. But the MDS would like it in little
696 * endian, so we swab it before we send it.
697 */
698 switch (lump->lmm_magic) {
699 case LOV_USER_MAGIC_V1: {
700 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
701 lustre_swab_lov_user_md_v1(lump);
702 lum_size = sizeof(struct lov_user_md_v1);
703 break;
704 }
705 case LOV_USER_MAGIC_V3: {
706 if (lump->lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
707 lustre_swab_lov_user_md_v3(
708 (struct lov_user_md_v3 *)lump);
709 lum_size = sizeof(struct lov_user_md_v3);
710 break;
711 }
712 default: {
2d00bd17
JP
713 CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x nor %#08x\n",
714 lump->lmm_magic, LOV_USER_MAGIC_V1,
715 LOV_USER_MAGIC_V3);
0a3bdb00 716 return -EINVAL;
d7e09d03
PT
717 }
718 }
719 } else {
720 lum_size = sizeof(struct lov_user_md_v1);
721 }
722
723 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
724 LUSTRE_OPC_ANY, NULL);
725 if (IS_ERR(op_data))
0a3bdb00 726 return PTR_ERR(op_data);
d7e09d03
PT
727
728 if (lump != NULL && lump->lmm_magic == cpu_to_le32(LMV_USER_MAGIC))
729 op_data->op_cli_flags |= CLI_SET_MEA;
730
731 /* swabbing is done in lov_setstripe() on server side */
732 rc = md_setattr(sbi->ll_md_exp, op_data, lump, lum_size,
733 NULL, 0, &req, NULL);
734 ll_finish_md_op_data(op_data);
735 ptlrpc_req_finished(req);
736 if (rc) {
737 if (rc != -EPERM && rc != -EACCES)
738 CERROR("mdc_setattr fails: rc = %d\n", rc);
739 }
740
741 /* In the following we use the fact that LOV_USER_MAGIC_V1 and
742 LOV_USER_MAGIC_V3 have the same initial fields so we do not
bef31c78 743 need to make the distinction between the 2 versions */
d7e09d03
PT
744 if (set_default && mgc->u.cli.cl_mgc_mgsexp) {
745 char *param = NULL;
746 char *buf;
747
496a51bd 748 param = kzalloc(MGS_PARAM_MAXLEN, GFP_NOFS);
57876fd8
JL
749 if (!param)
750 return -ENOMEM;
d7e09d03
PT
751
752 buf = param;
753 /* Get fsname and assume devname to be -MDT0000. */
754 ll_get_fsname(inode->i_sb, buf, MTI_NAME_MAXLEN);
755 strcat(buf, "-MDT0000.lov");
756 buf += strlen(buf);
757
758 /* Set root stripesize */
759 sprintf(buf, ".stripesize=%u",
760 lump ? le32_to_cpu(lump->lmm_stripe_size) : 0);
761 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
762 if (rc)
34e1f2bb 763 goto end;
d7e09d03
PT
764
765 /* Set root stripecount */
766 sprintf(buf, ".stripecount=%hd",
767 lump ? le16_to_cpu(lump->lmm_stripe_count) : 0);
768 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
769 if (rc)
34e1f2bb 770 goto end;
d7e09d03
PT
771
772 /* Set root stripeoffset */
773 sprintf(buf, ".stripeoffset=%hd",
774 lump ? le16_to_cpu(lump->lmm_stripe_offset) :
775 (typeof(lump->lmm_stripe_offset))(-1));
776 rc = ll_send_mgc_param(mgc->u.cli.cl_mgc_mgsexp, param);
777
778end:
57876fd8 779 kfree(param);
d7e09d03 780 }
0a3bdb00 781 return rc;
d7e09d03
PT
782}
783
784int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmmp,
785 int *lmm_size, struct ptlrpc_request **request)
786{
787 struct ll_sb_info *sbi = ll_i2sbi(inode);
788 struct mdt_body *body;
789 struct lov_mds_md *lmm = NULL;
790 struct ptlrpc_request *req = NULL;
791 int rc, lmmsize;
792 struct md_op_data *op_data;
793
44779340 794 rc = ll_get_default_mdsize(sbi, &lmmsize);
d7e09d03 795 if (rc)
0a3bdb00 796 return rc;
d7e09d03
PT
797
798 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
799 0, lmmsize, LUSTRE_OPC_ANY,
800 NULL);
801 if (IS_ERR(op_data))
0a3bdb00 802 return PTR_ERR(op_data);
d7e09d03
PT
803
804 op_data->op_valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
805 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
806 ll_finish_md_op_data(op_data);
807 if (rc < 0) {
2d00bd17
JP
808 CDEBUG(D_INFO, "md_getattr failed on inode %lu/%u: rc %d\n",
809 inode->i_ino,
d7e09d03 810 inode->i_generation, rc);
34e1f2bb 811 goto out;
d7e09d03
PT
812 }
813
814 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
815 LASSERT(body != NULL);
816
817 lmmsize = body->eadatasize;
818
819 if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
820 lmmsize == 0) {
34e1f2bb
JL
821 rc = -ENODATA;
822 goto out;
d7e09d03
PT
823 }
824
825 lmm = req_capsule_server_sized_get(&req->rq_pill,
826 &RMF_MDT_MD, lmmsize);
827 LASSERT(lmm != NULL);
828
829 /*
830 * This is coming from the MDS, so is probably in
831 * little endian. We convert it to host endian before
832 * passing it to userspace.
833 */
834 /* We don't swab objects for directories */
835 switch (le32_to_cpu(lmm->lmm_magic)) {
836 case LOV_MAGIC_V1:
1f6eaf83 837 if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
d7e09d03
PT
838 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
839 break;
840 case LOV_MAGIC_V3:
1f6eaf83 841 if (cpu_to_le32(LOV_MAGIC) != LOV_MAGIC)
d7e09d03
PT
842 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
843 break;
844 default:
845 CERROR("unknown magic: %lX\n", (unsigned long)lmm->lmm_magic);
846 rc = -EPROTO;
847 }
848out:
849 *lmmp = lmm;
850 *lmm_size = lmmsize;
851 *request = req;
852 return rc;
853}
854
855/*
856 * Get MDT index for the inode.
857 */
858int ll_get_mdt_idx(struct inode *inode)
859{
860 struct ll_sb_info *sbi = ll_i2sbi(inode);
861 struct md_op_data *op_data;
862 int rc, mdtidx;
d7e09d03
PT
863
864 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0,
865 0, LUSTRE_OPC_ANY, NULL);
866 if (IS_ERR(op_data))
0a3bdb00 867 return PTR_ERR(op_data);
d7e09d03
PT
868
869 op_data->op_flags |= MF_GET_MDT_IDX;
870 rc = md_getattr(sbi->ll_md_exp, op_data, NULL);
871 mdtidx = op_data->op_mds;
872 ll_finish_md_op_data(op_data);
873 if (rc < 0) {
874 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
0a3bdb00 875 return rc;
d7e09d03
PT
876 }
877 return mdtidx;
878}
879
880/**
881 * Generic handler to do any pre-copy work.
882 *
883 * It send a first hsm_progress (with extent length == 0) to coordinator as a
884 * first information for it that real work has started.
885 *
886 * Moreover, for a ARCHIVE request, it will sample the file data version and
887 * store it in \a copy.
888 *
889 * \return 0 on success.
890 */
891static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy)
892{
893 struct ll_sb_info *sbi = ll_s2sbi(sb);
894 struct hsm_progress_kernel hpk;
895 int rc;
d7e09d03
PT
896
897 /* Forge a hsm_progress based on data from copy. */
898 hpk.hpk_fid = copy->hc_hai.hai_fid;
899 hpk.hpk_cookie = copy->hc_hai.hai_cookie;
900 hpk.hpk_extent.offset = copy->hc_hai.hai_extent.offset;
901 hpk.hpk_extent.length = 0;
902 hpk.hpk_flags = 0;
903 hpk.hpk_errval = 0;
904 hpk.hpk_data_version = 0;
905
d7e09d03
PT
906 /* For archive request, we need to read the current file version. */
907 if (copy->hc_hai.hai_action == HSMA_ARCHIVE) {
908 struct inode *inode;
909 __u64 data_version = 0;
910
911 /* Get inode for this fid */
912 inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
913 if (IS_ERR(inode)) {
914 hpk.hpk_flags |= HP_FLAG_RETRY;
915 /* hpk_errval is >= 0 */
916 hpk.hpk_errval = -PTR_ERR(inode);
34e1f2bb
JL
917 rc = PTR_ERR(inode);
918 goto progress;
d7e09d03
PT
919 }
920
921 /* Read current file data version */
922 rc = ll_data_version(inode, &data_version, 1);
923 iput(inode);
924 if (rc != 0) {
925 CDEBUG(D_HSM, "Could not read file data version of "
55f5a824 926 DFID" (rc = %d). Archive request (%#llx) could not be done.\n",
d7e09d03
PT
927 PFID(&copy->hc_hai.hai_fid), rc,
928 copy->hc_hai.hai_cookie);
929 hpk.hpk_flags |= HP_FLAG_RETRY;
930 /* hpk_errval must be >= 0 */
931 hpk.hpk_errval = -rc;
34e1f2bb 932 goto progress;
d7e09d03
PT
933 }
934
935 /* Store it the hsm_copy for later copytool use.
936 * Always modified even if no lsm. */
937 copy->hc_data_version = data_version;
938 }
939
940progress:
941 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
942 &hpk, NULL);
943
0a3bdb00 944 return rc;
d7e09d03
PT
945}
946
947/**
948 * Generic handler to do any post-copy work.
949 *
950 * It will send the last hsm_progress update to coordinator to inform it
951 * that copy is finished and whether it was successful or not.
952 *
953 * Moreover,
954 * - for ARCHIVE request, it will sample the file data version and compare it
955 * with the version saved in ll_ioc_copy_start(). If they do not match, copy
956 * will be considered as failed.
957 * - for RESTORE request, it will sample the file data version and send it to
958 * coordinator which is useful if the file was imported as 'released'.
959 *
960 * \return 0 on success.
961 */
962static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy)
963{
964 struct ll_sb_info *sbi = ll_s2sbi(sb);
965 struct hsm_progress_kernel hpk;
966 int rc;
d7e09d03
PT
967
968 /* If you modify the logic here, also check llapi_hsm_copy_end(). */
969 /* Take care: copy->hc_hai.hai_action, len, gid and data are not
970 * initialized if copy_end was called with copy == NULL.
971 */
972
973 /* Forge a hsm_progress based on data from copy. */
974 hpk.hpk_fid = copy->hc_hai.hai_fid;
975 hpk.hpk_cookie = copy->hc_hai.hai_cookie;
976 hpk.hpk_extent = copy->hc_hai.hai_extent;
977 hpk.hpk_flags = copy->hc_flags | HP_FLAG_COMPLETED;
978 hpk.hpk_errval = copy->hc_errval;
979 hpk.hpk_data_version = 0;
980
981 /* For archive request, we need to check the file data was not changed.
982 *
983 * For restore request, we need to send the file data version, this is
984 * useful when the file was created using hsm_import.
985 */
986 if (((copy->hc_hai.hai_action == HSMA_ARCHIVE) ||
987 (copy->hc_hai.hai_action == HSMA_RESTORE)) &&
988 (copy->hc_errval == 0)) {
989 struct inode *inode;
990 __u64 data_version = 0;
991
992 /* Get lsm for this fid */
993 inode = search_inode_for_lustre(sb, &copy->hc_hai.hai_fid);
994 if (IS_ERR(inode)) {
995 hpk.hpk_flags |= HP_FLAG_RETRY;
996 /* hpk_errval must be >= 0 */
997 hpk.hpk_errval = -PTR_ERR(inode);
34e1f2bb
JL
998 rc = PTR_ERR(inode);
999 goto progress;
d7e09d03
PT
1000 }
1001
1002 rc = ll_data_version(inode, &data_version,
1003 copy->hc_hai.hai_action == HSMA_ARCHIVE);
1004 iput(inode);
1005 if (rc) {
2d00bd17 1006 CDEBUG(D_HSM, "Could not read file data version. Request could not be confirmed.\n");
d7e09d03
PT
1007 if (hpk.hpk_errval == 0)
1008 hpk.hpk_errval = -rc;
34e1f2bb 1009 goto progress;
d7e09d03
PT
1010 }
1011
1012 /* Store it the hsm_copy for later copytool use.
1013 * Always modified even if no lsm. */
1014 hpk.hpk_data_version = data_version;
1015
1016 /* File could have been stripped during archiving, so we need
1017 * to check anyway. */
1018 if ((copy->hc_hai.hai_action == HSMA_ARCHIVE) &&
1019 (copy->hc_data_version != data_version)) {
2d00bd17 1020 CDEBUG(D_HSM, "File data version mismatched. File content was changed during archiving. "
55f5a824 1021 DFID", start:%#llx current:%#llx\n",
d7e09d03
PT
1022 PFID(&copy->hc_hai.hai_fid),
1023 copy->hc_data_version, data_version);
1024 /* File was changed, send error to cdt. Do not ask for
1025 * retry because if a file is modified frequently,
1026 * the cdt will loop on retried archive requests.
1027 * The policy engine will ask for a new archive later
1028 * when the file will not be modified for some tunable
1029 * time */
1030 /* we do not notify caller */
1031 hpk.hpk_flags &= ~HP_FLAG_RETRY;
1032 /* hpk_errval must be >= 0 */
1033 hpk.hpk_errval = EBUSY;
1034 }
1035
1036 }
1037
1038progress:
1039 rc = obd_iocontrol(LL_IOC_HSM_PROGRESS, sbi->ll_md_exp, sizeof(hpk),
1040 &hpk, NULL);
1041
0a3bdb00 1042 return rc;
d7e09d03
PT
1043}
1044
b0337d6c
JH
1045static int copy_and_ioctl(int cmd, struct obd_export *exp,
1046 const void __user *data, size_t size)
d7e09d03 1047{
b0337d6c 1048 void *copy;
d7e09d03
PT
1049 int rc;
1050
496a51bd
JL
1051 copy = kzalloc(size, GFP_NOFS);
1052 if (!copy)
d7e09d03 1053 return -ENOMEM;
b0337d6c
JH
1054
1055 if (copy_from_user(copy, data, size)) {
1056 rc = -EFAULT;
1057 goto out;
d7e09d03 1058 }
b0337d6c
JH
1059
1060 rc = obd_iocontrol(cmd, exp, size, copy, NULL);
1061out:
97903a26 1062 kfree(copy);
b0337d6c 1063
d7e09d03
PT
1064 return rc;
1065}
1066
1067static int quotactl_ioctl(struct ll_sb_info *sbi, struct if_quotactl *qctl)
1068{
1069 int cmd = qctl->qc_cmd;
1070 int type = qctl->qc_type;
1071 int id = qctl->qc_id;
1072 int valid = qctl->qc_valid;
1073 int rc = 0;
d7e09d03
PT
1074
1075 switch (cmd) {
1076 case LUSTRE_Q_INVALIDATE:
1077 case LUSTRE_Q_FINVALIDATE:
1078 case Q_QUOTAON:
1079 case Q_QUOTAOFF:
1080 case Q_SETQUOTA:
1081 case Q_SETINFO:
2eb90a75 1082 if (!capable(CFS_CAP_SYS_ADMIN) ||
d7e09d03 1083 sbi->ll_flags & LL_SBI_RMT_CLIENT)
0a3bdb00 1084 return -EPERM;
d7e09d03
PT
1085 break;
1086 case Q_GETQUOTA:
4b1a25f0 1087 if (((type == USRQUOTA &&
8b9e418c 1088 !uid_eq(current_euid(), make_kuid(&init_user_ns, id))) ||
4b1a25f0
PT
1089 (type == GRPQUOTA &&
1090 !in_egroup_p(make_kgid(&init_user_ns, id)))) &&
2eb90a75 1091 (!capable(CFS_CAP_SYS_ADMIN) ||
d7e09d03 1092 sbi->ll_flags & LL_SBI_RMT_CLIENT))
0a3bdb00 1093 return -EPERM;
d7e09d03
PT
1094 break;
1095 case Q_GETINFO:
1096 break;
1097 default:
1098 CERROR("unsupported quotactl op: %#x\n", cmd);
0a3bdb00 1099 return -ENOTTY;
d7e09d03
PT
1100 }
1101
1102 if (valid != QC_GENERAL) {
1103 if (sbi->ll_flags & LL_SBI_RMT_CLIENT)
0a3bdb00 1104 return -EOPNOTSUPP;
d7e09d03
PT
1105
1106 if (cmd == Q_GETINFO)
1107 qctl->qc_cmd = Q_GETOINFO;
1108 else if (cmd == Q_GETQUOTA)
1109 qctl->qc_cmd = Q_GETOQUOTA;
1110 else
0a3bdb00 1111 return -EINVAL;
d7e09d03
PT
1112
1113 switch (valid) {
1114 case QC_MDTIDX:
1115 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
1116 sizeof(*qctl), qctl, NULL);
1117 break;
1118 case QC_OSTIDX:
1119 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_dt_exp,
1120 sizeof(*qctl), qctl, NULL);
1121 break;
1122 case QC_UUID:
1123 rc = obd_iocontrol(OBD_IOC_QUOTACTL, sbi->ll_md_exp,
1124 sizeof(*qctl), qctl, NULL);
1125 if (rc == -EAGAIN)
1126 rc = obd_iocontrol(OBD_IOC_QUOTACTL,
1127 sbi->ll_dt_exp,
1128 sizeof(*qctl), qctl, NULL);
1129 break;
1130 default:
1131 rc = -EINVAL;
1132 break;
1133 }
1134
1135 if (rc)
0a3bdb00 1136 return rc;
d7e09d03
PT
1137
1138 qctl->qc_cmd = cmd;
1139 } else {
1140 struct obd_quotactl *oqctl;
1141
496a51bd
JL
1142 oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
1143 if (!oqctl)
0a3bdb00 1144 return -ENOMEM;
d7e09d03
PT
1145
1146 QCTL_COPY(oqctl, qctl);
1147 rc = obd_quotactl(sbi->ll_md_exp, oqctl);
1148 if (rc) {
1149 if (rc != -EALREADY && cmd == Q_QUOTAON) {
1150 oqctl->qc_cmd = Q_QUOTAOFF;
1151 obd_quotactl(sbi->ll_md_exp, oqctl);
1152 }
97903a26 1153 kfree(oqctl);
0a3bdb00 1154 return rc;
d7e09d03
PT
1155 }
1156 /* If QIF_SPACE is not set, client should collect the
1157 * space usage from OSSs by itself */
1158 if (cmd == Q_GETQUOTA &&
1159 !(oqctl->qc_dqblk.dqb_valid & QIF_SPACE) &&
1160 !oqctl->qc_dqblk.dqb_curspace) {
1161 struct obd_quotactl *oqctl_tmp;
1162
496a51bd
JL
1163 oqctl_tmp = kzalloc(sizeof(*oqctl_tmp), GFP_NOFS);
1164 if (!oqctl_tmp) {
34e1f2bb
JL
1165 rc = -ENOMEM;
1166 goto out;
1167 }
d7e09d03
PT
1168
1169 oqctl_tmp->qc_cmd = Q_GETOQUOTA;
1170 oqctl_tmp->qc_id = oqctl->qc_id;
1171 oqctl_tmp->qc_type = oqctl->qc_type;
1172
1173 /* collect space usage from OSTs */
1174 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1175 rc = obd_quotactl(sbi->ll_dt_exp, oqctl_tmp);
1176 if (!rc || rc == -EREMOTEIO) {
1177 oqctl->qc_dqblk.dqb_curspace =
1178 oqctl_tmp->qc_dqblk.dqb_curspace;
1179 oqctl->qc_dqblk.dqb_valid |= QIF_SPACE;
1180 }
1181
1182 /* collect space & inode usage from MDTs */
1183 oqctl_tmp->qc_dqblk.dqb_curspace = 0;
1184 oqctl_tmp->qc_dqblk.dqb_curinodes = 0;
1185 rc = obd_quotactl(sbi->ll_md_exp, oqctl_tmp);
1186 if (!rc || rc == -EREMOTEIO) {
1187 oqctl->qc_dqblk.dqb_curspace +=
1188 oqctl_tmp->qc_dqblk.dqb_curspace;
1189 oqctl->qc_dqblk.dqb_curinodes =
1190 oqctl_tmp->qc_dqblk.dqb_curinodes;
1191 oqctl->qc_dqblk.dqb_valid |= QIF_INODES;
1192 } else {
1193 oqctl->qc_dqblk.dqb_valid &= ~QIF_SPACE;
1194 }
1195
97903a26 1196 kfree(oqctl_tmp);
d7e09d03
PT
1197 }
1198out:
1199 QCTL_COPY(qctl, oqctl);
97903a26 1200 kfree(oqctl);
d7e09d03
PT
1201 }
1202
0a3bdb00 1203 return rc;
d7e09d03
PT
1204}
1205
a7503434
OD
1206/* This function tries to get a single name component,
1207 * to send to the server. No actual path traversal involved,
1208 * so we limit to NAME_MAX */
1209static char *ll_getname(const char __user *filename)
d7e09d03
PT
1210{
1211 int ret = 0, len;
a7503434 1212 char *tmp;
d7e09d03 1213
a7503434 1214 tmp = kzalloc(NAME_MAX + 1, GFP_KERNEL);
d7e09d03
PT
1215 if (!tmp)
1216 return ERR_PTR(-ENOMEM);
1217
a7503434
OD
1218 len = strncpy_from_user(tmp, filename, NAME_MAX + 1);
1219 if (len < 0)
1220 ret = len;
1221 else if (len == 0)
d7e09d03 1222 ret = -ENOENT;
a7503434 1223 else if (len > NAME_MAX && tmp[NAME_MAX] != 0)
d7e09d03
PT
1224 ret = -ENAMETOOLONG;
1225
1226 if (ret) {
a7503434 1227 kfree(tmp);
d7e09d03
PT
1228 tmp = ERR_PTR(ret);
1229 }
1230 return tmp;
1231}
1232
a7503434 1233#define ll_putname(filename) kfree(filename)
d7e09d03
PT
1234
1235static long ll_dir_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
1236{
2a8a3597 1237 struct inode *inode = file_inode(file);
d7e09d03
PT
1238 struct ll_sb_info *sbi = ll_i2sbi(inode);
1239 struct obd_ioctl_data *data;
1240 int rc = 0;
d7e09d03
PT
1241
1242 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), cmd=%#x\n",
1243 inode->i_ino, inode->i_generation, inode, cmd);
1244
1245 /* asm-ppc{,64} declares TCGETS, et. al. as type 't' not 'T' */
1246 if (_IOC_TYPE(cmd) == 'T' || _IOC_TYPE(cmd) == 't') /* tty ioctls */
1247 return -ENOTTY;
1248
1249 ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_IOCTL, 1);
a58a38ac 1250 switch (cmd) {
d7e09d03
PT
1251 case FSFILT_IOC_GETFLAGS:
1252 case FSFILT_IOC_SETFLAGS:
0a3bdb00 1253 return ll_iocontrol(inode, file, cmd, arg);
d7e09d03
PT
1254 case FSFILT_IOC_GETVERSION_OLD:
1255 case FSFILT_IOC_GETVERSION:
af00f6c5 1256 return put_user(inode->i_generation, (int __user *)arg);
d7e09d03
PT
1257 /* We need to special case any other ioctls we want to handle,
1258 * to send them to the MDS/OST as appropriate and to properly
1259 * network encode the arg field.
1260 case FSFILT_IOC_SETVERSION_OLD:
1261 case FSFILT_IOC_SETVERSION:
1262 */
1263 case LL_IOC_GET_MDTIDX: {
1264 int mdtidx;
1265
1266 mdtidx = ll_get_mdt_idx(inode);
1267 if (mdtidx < 0)
0a3bdb00 1268 return mdtidx;
d7e09d03 1269
af00f6c5 1270 if (put_user((int)mdtidx, (int __user *)arg))
0a3bdb00 1271 return -EFAULT;
d7e09d03
PT
1272
1273 return 0;
1274 }
1275 case IOC_MDC_LOOKUP: {
1276 struct ptlrpc_request *request = NULL;
1277 int namelen, len = 0;
1278 char *buf = NULL;
1279 char *filename;
1280 struct md_op_data *op_data;
1281
e3e8ff41 1282 rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
d7e09d03 1283 if (rc)
0a3bdb00 1284 return rc;
d7e09d03
PT
1285 data = (void *)buf;
1286
1287 filename = data->ioc_inlbuf1;
1288 namelen = strlen(filename);
1289
1290 if (namelen < 1) {
1291 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
34e1f2bb
JL
1292 rc = -EINVAL;
1293 goto out_free;
d7e09d03
PT
1294 }
1295
1296 op_data = ll_prep_md_op_data(NULL, inode, NULL, filename, namelen,
1297 0, LUSTRE_OPC_ANY, NULL);
34e1f2bb
JL
1298 if (IS_ERR(op_data)) {
1299 rc = PTR_ERR(op_data);
1300 goto out_free;
1301 }
d7e09d03
PT
1302
1303 op_data->op_valid = OBD_MD_FLID;
1304 rc = md_getattr_name(sbi->ll_md_exp, op_data, &request);
1305 ll_finish_md_op_data(op_data);
1306 if (rc < 0) {
1307 CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
34e1f2bb 1308 goto out_free;
d7e09d03
PT
1309 }
1310 ptlrpc_req_finished(request);
d7e09d03
PT
1311out_free:
1312 obd_ioctl_freedata(buf, len);
1313 return rc;
1314 }
1315 case LL_IOC_LMV_SETSTRIPE: {
1316 struct lmv_user_md *lum;
1317 char *buf = NULL;
1318 char *filename;
1319 int namelen = 0;
1320 int lumlen = 0;
1321 int len;
1322 int rc;
1323
e3e8ff41 1324 rc = obd_ioctl_getdata(&buf, &len, (void __user *)arg);
d7e09d03 1325 if (rc)
0a3bdb00 1326 return rc;
d7e09d03
PT
1327
1328 data = (void *)buf;
1329 if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf2 == NULL ||
34e1f2bb
JL
1330 data->ioc_inllen1 == 0 || data->ioc_inllen2 == 0) {
1331 rc = -EINVAL;
1332 goto lmv_out_free;
1333 }
d7e09d03
PT
1334
1335 filename = data->ioc_inlbuf1;
1336 namelen = data->ioc_inllen1;
1337
1338 if (namelen < 1) {
1339 CDEBUG(D_INFO, "IOC_MDC_LOOKUP missing filename\n");
34e1f2bb
JL
1340 rc = -EINVAL;
1341 goto lmv_out_free;
d7e09d03
PT
1342 }
1343 lum = (struct lmv_user_md *)data->ioc_inlbuf2;
1344 lumlen = data->ioc_inllen2;
1345
1346 if (lum->lum_magic != LMV_USER_MAGIC ||
1347 lumlen != sizeof(*lum)) {
1348 CERROR("%s: wrong lum magic %x or size %d: rc = %d\n",
1349 filename, lum->lum_magic, lumlen, -EFAULT);
34e1f2bb
JL
1350 rc = -EINVAL;
1351 goto lmv_out_free;
d7e09d03
PT
1352 }
1353
1354 /**
1355 * ll_dir_setdirstripe will be used to set dir stripe
1356 * mdc_create--->mdt_reint_create (with dirstripe)
1357 */
1358 rc = ll_dir_setdirstripe(inode, lum, filename);
1359lmv_out_free:
1360 obd_ioctl_freedata(buf, len);
0a3bdb00 1361 return rc;
d7e09d03
PT
1362
1363 }
1364 case LL_IOC_LOV_SETSTRIPE: {
1365 struct lov_user_md_v3 lumv3;
1366 struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
af00f6c5
OD
1367 struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
1368 struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
d7e09d03
PT
1369
1370 int set_default = 0;
1371
1372 LASSERT(sizeof(lumv3) == sizeof(*lumv3p));
1373 LASSERT(sizeof(lumv3.lmm_objects[0]) ==
1374 sizeof(lumv3p->lmm_objects[0]));
1375 /* first try with v1 which is smaller than v3 */
1376 if (copy_from_user(lumv1, lumv1p, sizeof(*lumv1)))
0a3bdb00 1377 return -EFAULT;
d7e09d03 1378
557732ad 1379 if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
d7e09d03 1380 if (copy_from_user(&lumv3, lumv3p, sizeof(lumv3)))
0a3bdb00 1381 return -EFAULT;
d7e09d03
PT
1382 }
1383
f76c23da 1384 if (is_root_inode(inode))
d7e09d03
PT
1385 set_default = 1;
1386
1387 /* in v1 and v3 cases lumv1 points to data */
1388 rc = ll_dir_setstripe(inode, lumv1, set_default);
1389
0a3bdb00 1390 return rc;
d7e09d03
PT
1391 }
1392 case LL_IOC_LMV_GETSTRIPE: {
af00f6c5 1393 struct lmv_user_md __user *lump = (void __user *)arg;
d7e09d03
PT
1394 struct lmv_user_md lum;
1395 struct lmv_user_md *tmp;
1396 int lum_size;
1397 int rc = 0;
1398 int mdtindex;
1399
1400 if (copy_from_user(&lum, lump, sizeof(struct lmv_user_md)))
0a3bdb00 1401 return -EFAULT;
d7e09d03
PT
1402
1403 if (lum.lum_magic != LMV_MAGIC_V1)
0a3bdb00 1404 return -EINVAL;
d7e09d03
PT
1405
1406 lum_size = lmv_user_md_size(1, LMV_MAGIC_V1);
496a51bd
JL
1407 tmp = kzalloc(lum_size, GFP_NOFS);
1408 if (!tmp) {
34e1f2bb
JL
1409 rc = -ENOMEM;
1410 goto free_lmv;
1411 }
d7e09d03 1412
7f46528c 1413 *tmp = lum;
d7e09d03
PT
1414 tmp->lum_type = LMV_STRIPE_TYPE;
1415 tmp->lum_stripe_count = 1;
1416 mdtindex = ll_get_mdt_idx(inode);
34e1f2bb
JL
1417 if (mdtindex < 0) {
1418 rc = -ENOMEM;
1419 goto free_lmv;
1420 }
d7e09d03
PT
1421
1422 tmp->lum_stripe_offset = mdtindex;
1423 tmp->lum_objects[0].lum_mds = mdtindex;
1424 memcpy(&tmp->lum_objects[0].lum_fid, ll_inode2fid(inode),
1425 sizeof(struct lu_fid));
af00f6c5 1426 if (copy_to_user((void __user *)arg, tmp, lum_size)) {
34e1f2bb
JL
1427 rc = -EFAULT;
1428 goto free_lmv;
1429 }
d7e09d03 1430free_lmv:
fd5e2fd0 1431 kfree(tmp);
0a3bdb00 1432 return rc;
d7e09d03 1433 }
d7e09d03 1434 case LL_IOC_LOV_SWAP_LAYOUTS:
0a3bdb00 1435 return -EPERM;
d7e09d03 1436 case LL_IOC_OBD_STATFS:
4c6243ec 1437 return ll_obd_statfs(inode, (void __user *)arg);
d7e09d03
PT
1438 case LL_IOC_LOV_GETSTRIPE:
1439 case LL_IOC_MDC_GETINFO:
1440 case IOC_MDC_GETFILEINFO:
1441 case IOC_MDC_GETFILESTRIPE: {
1442 struct ptlrpc_request *request = NULL;
af00f6c5 1443 struct lov_user_md __user *lump;
d7e09d03
PT
1444 struct lov_mds_md *lmm = NULL;
1445 struct mdt_body *body;
1446 char *filename = NULL;
1447 int lmmsize;
1448
1449 if (cmd == IOC_MDC_GETFILEINFO ||
1450 cmd == IOC_MDC_GETFILESTRIPE) {
d47bb83b 1451 filename = ll_getname((const char __user *)arg);
d7e09d03 1452 if (IS_ERR(filename))
0a3bdb00 1453 return PTR_ERR(filename);
d7e09d03
PT
1454
1455 rc = ll_lov_getstripe_ea_info(inode, filename, &lmm,
1456 &lmmsize, &request);
1457 } else {
1458 rc = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
1459 }
1460
1461 if (request) {
1462 body = req_capsule_server_get(&request->rq_pill,
1463 &RMF_MDT_BODY);
1464 LASSERT(body != NULL);
1465 } else {
34e1f2bb 1466 goto out_req;
d7e09d03
PT
1467 }
1468
1469 if (rc < 0) {
1470 if (rc == -ENODATA && (cmd == IOC_MDC_GETFILEINFO ||
34e1f2bb
JL
1471 cmd == LL_IOC_MDC_GETINFO)) {
1472 rc = 0;
1473 goto skip_lmm;
4d1d413a 1474 } else
34e1f2bb 1475 goto out_req;
d7e09d03
PT
1476 }
1477
1478 if (cmd == IOC_MDC_GETFILESTRIPE ||
1479 cmd == LL_IOC_LOV_GETSTRIPE) {
af00f6c5 1480 lump = (struct lov_user_md __user *)arg;
d7e09d03 1481 } else {
af00f6c5 1482 struct lov_user_mds_data __user *lmdp;
79792190 1483
af00f6c5 1484 lmdp = (struct lov_user_mds_data __user *)arg;
d7e09d03
PT
1485 lump = &lmdp->lmd_lmm;
1486 }
1487 if (copy_to_user(lump, lmm, lmmsize)) {
34e1f2bb
JL
1488 if (copy_to_user(lump, lmm, sizeof(*lump))) {
1489 rc = -EFAULT;
1490 goto out_req;
1491 }
d7e09d03
PT
1492 rc = -EOVERFLOW;
1493 }
eb73f514 1494skip_lmm:
d7e09d03 1495 if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
af00f6c5 1496 struct lov_user_mds_data __user *lmdp;
d7e09d03
PT
1497 lstat_t st = { 0 };
1498
1499 st.st_dev = inode->i_sb->s_dev;
1500 st.st_mode = body->mode;
1501 st.st_nlink = body->nlink;
1502 st.st_uid = body->uid;
1503 st.st_gid = body->gid;
1504 st.st_rdev = body->rdev;
1505 st.st_size = body->size;
1506 st.st_blksize = PAGE_CACHE_SIZE;
1507 st.st_blocks = body->blocks;
1508 st.st_atime = body->atime;
1509 st.st_mtime = body->mtime;
1510 st.st_ctime = body->ctime;
1511 st.st_ino = inode->i_ino;
1512
af00f6c5 1513 lmdp = (struct lov_user_mds_data __user *)arg;
34e1f2bb
JL
1514 if (copy_to_user(&lmdp->lmd_st, &st, sizeof(st))) {
1515 rc = -EFAULT;
1516 goto out_req;
1517 }
d7e09d03
PT
1518 }
1519
eb73f514 1520out_req:
d7e09d03
PT
1521 ptlrpc_req_finished(request);
1522 if (filename)
1523 ll_putname(filename);
1524 return rc;
1525 }
1526 case IOC_LOV_GETINFO: {
af00f6c5 1527 struct lov_user_mds_data __user *lumd;
d7e09d03 1528 struct lov_stripe_md *lsm;
af00f6c5 1529 struct lov_user_md __user *lum;
d7e09d03
PT
1530 struct lov_mds_md *lmm;
1531 int lmmsize;
1532 lstat_t st;
1533
af00f6c5 1534 lumd = (struct lov_user_mds_data __user *)arg;
d7e09d03
PT
1535 lum = &lumd->lmd_lmm;
1536
1537 rc = ll_get_max_mdsize(sbi, &lmmsize);
1538 if (rc)
0a3bdb00 1539 return rc;
d7e09d03 1540
e958f49b 1541 lmm = libcfs_kvzalloc(lmmsize, GFP_NOFS);
73863d83 1542 if (lmm == NULL)
0a3bdb00 1543 return -ENOMEM;
34e1f2bb
JL
1544 if (copy_from_user(lmm, lum, lmmsize)) {
1545 rc = -EFAULT;
1546 goto free_lmm;
1547 }
d7e09d03
PT
1548
1549 switch (lmm->lmm_magic) {
1550 case LOV_USER_MAGIC_V1:
1f6eaf83 1551 if (cpu_to_le32(LOV_USER_MAGIC_V1) == LOV_USER_MAGIC_V1)
d7e09d03
PT
1552 break;
1553 /* swab objects first so that stripes num will be sane */
1554 lustre_swab_lov_user_md_objects(
1555 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
1556 ((struct lov_user_md_v1 *)lmm)->lmm_stripe_count);
1557 lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
1558 break;
1559 case LOV_USER_MAGIC_V3:
1f6eaf83 1560 if (cpu_to_le32(LOV_USER_MAGIC_V3) == LOV_USER_MAGIC_V3)
d7e09d03
PT
1561 break;
1562 /* swab objects first so that stripes num will be sane */
1563 lustre_swab_lov_user_md_objects(
1564 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
1565 ((struct lov_user_md_v3 *)lmm)->lmm_stripe_count);
1566 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
1567 break;
1568 default:
34e1f2bb
JL
1569 rc = -EINVAL;
1570 goto free_lmm;
d7e09d03
PT
1571 }
1572
1573 rc = obd_unpackmd(sbi->ll_dt_exp, &lsm, lmm, lmmsize);
34e1f2bb
JL
1574 if (rc < 0) {
1575 rc = -ENOMEM;
1576 goto free_lmm;
1577 }
d7e09d03
PT
1578
1579 /* Perform glimpse_size operation. */
1580 memset(&st, 0, sizeof(st));
1581
1582 rc = ll_glimpse_ioctl(sbi, lsm, &st);
1583 if (rc)
34e1f2bb 1584 goto free_lsm;
d7e09d03 1585
34e1f2bb
JL
1586 if (copy_to_user(&lumd->lmd_st, &st, sizeof(st))) {
1587 rc = -EFAULT;
1588 goto free_lsm;
1589 }
d7e09d03 1590
eb73f514 1591free_lsm:
d7e09d03 1592 obd_free_memmd(sbi->ll_dt_exp, &lsm);
eb73f514 1593free_lmm:
e958f49b 1594 kvfree(lmm);
d7e09d03
PT
1595 return rc;
1596 }
1597 case OBD_IOC_LLOG_CATINFO: {
0a3bdb00 1598 return -EOPNOTSUPP;
d7e09d03
PT
1599 }
1600 case OBD_IOC_QUOTACHECK: {
1601 struct obd_quotactl *oqctl;
1602 int error = 0;
1603
2eb90a75 1604 if (!capable(CFS_CAP_SYS_ADMIN) ||
d7e09d03 1605 sbi->ll_flags & LL_SBI_RMT_CLIENT)
0a3bdb00 1606 return -EPERM;
d7e09d03 1607
496a51bd 1608 oqctl = kzalloc(sizeof(*oqctl), GFP_NOFS);
d7e09d03 1609 if (!oqctl)
0a3bdb00 1610 return -ENOMEM;
d7e09d03
PT
1611 oqctl->qc_type = arg;
1612 rc = obd_quotacheck(sbi->ll_md_exp, oqctl);
1613 if (rc < 0) {
1614 CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc);
1615 error = rc;
1616 }
1617
1618 rc = obd_quotacheck(sbi->ll_dt_exp, oqctl);
1619 if (rc < 0)
1620 CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc);
1621
97903a26 1622 kfree(oqctl);
d7e09d03
PT
1623 return error ?: rc;
1624 }
1625 case OBD_IOC_POLL_QUOTACHECK: {
1626 struct if_quotacheck *check;
1627
2eb90a75 1628 if (!capable(CFS_CAP_SYS_ADMIN) ||
d7e09d03 1629 sbi->ll_flags & LL_SBI_RMT_CLIENT)
0a3bdb00 1630 return -EPERM;
d7e09d03 1631
496a51bd 1632 check = kzalloc(sizeof(*check), GFP_NOFS);
d7e09d03 1633 if (!check)
0a3bdb00 1634 return -ENOMEM;
d7e09d03
PT
1635
1636 rc = obd_iocontrol(cmd, sbi->ll_md_exp, 0, (void *)check,
1637 NULL);
1638 if (rc) {
1639 CDEBUG(D_QUOTA, "mdc ioctl %d failed: %d\n", cmd, rc);
af00f6c5
OD
1640 if (copy_to_user((void __user *)arg, check,
1641 sizeof(*check)))
d7e09d03 1642 CDEBUG(D_QUOTA, "copy_to_user failed\n");
34e1f2bb 1643 goto out_poll;
d7e09d03
PT
1644 }
1645
1646 rc = obd_iocontrol(cmd, sbi->ll_dt_exp, 0, (void *)check,
1647 NULL);
1648 if (rc) {
1649 CDEBUG(D_QUOTA, "osc ioctl %d failed: %d\n", cmd, rc);
af00f6c5
OD
1650 if (copy_to_user((void __user *)arg, check,
1651 sizeof(*check)))
d7e09d03 1652 CDEBUG(D_QUOTA, "copy_to_user failed\n");
34e1f2bb 1653 goto out_poll;
d7e09d03 1654 }
eb73f514 1655out_poll:
97903a26 1656 kfree(check);
0a3bdb00 1657 return rc;
d7e09d03 1658 }
d7e09d03
PT
1659 case LL_IOC_QUOTACTL: {
1660 struct if_quotactl *qctl;
1661
496a51bd 1662 qctl = kzalloc(sizeof(*qctl), GFP_NOFS);
d7e09d03 1663 if (!qctl)
0a3bdb00 1664 return -ENOMEM;
d7e09d03 1665
af00f6c5 1666 if (copy_from_user(qctl, (void __user *)arg, sizeof(*qctl))) {
34e1f2bb
JL
1667 rc = -EFAULT;
1668 goto out_quotactl;
1669 }
d7e09d03
PT
1670
1671 rc = quotactl_ioctl(sbi, qctl);
1672
af00f6c5
OD
1673 if (rc == 0 && copy_to_user((void __user *)arg, qctl,
1674 sizeof(*qctl)))
d7e09d03
PT
1675 rc = -EFAULT;
1676
eb73f514 1677out_quotactl:
97903a26 1678 kfree(qctl);
0a3bdb00 1679 return rc;
d7e09d03
PT
1680 }
1681 case OBD_IOC_GETDTNAME:
1682 case OBD_IOC_GETMDNAME:
0a3bdb00 1683 return ll_get_obd_name(inode, cmd, arg);
d7e09d03 1684 case LL_IOC_FLUSHCTX:
0a3bdb00 1685 return ll_flush_ctx(inode);
d7e09d03
PT
1686#ifdef CONFIG_FS_POSIX_ACL
1687 case LL_IOC_RMTACL: {
f76c23da 1688 if (sbi->ll_flags & LL_SBI_RMT_CLIENT && is_root_inode(inode)) {
d7e09d03
PT
1689 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1690
1691 LASSERT(fd != NULL);
1692 rc = rct_add(&sbi->ll_rct, current_pid(), arg);
1693 if (!rc)
1694 fd->fd_flags |= LL_FILE_RMTACL;
0a3bdb00 1695 return rc;
d7e09d03 1696 } else
0a3bdb00 1697 return 0;
d7e09d03
PT
1698 }
1699#endif
1700 case LL_IOC_GETOBDCOUNT: {
1701 int count, vallen;
1702 struct obd_export *exp;
1703
af00f6c5 1704 if (copy_from_user(&count, (int __user *)arg, sizeof(int)))
0a3bdb00 1705 return -EFAULT;
d7e09d03
PT
1706
1707 /* get ost count when count is zero, get mdt count otherwise */
1708 exp = count ? sbi->ll_md_exp : sbi->ll_dt_exp;
1709 vallen = sizeof(count);
1710 rc = obd_get_info(NULL, exp, sizeof(KEY_TGT_COUNT),
1711 KEY_TGT_COUNT, &vallen, &count, NULL);
1712 if (rc) {
1713 CERROR("get target count failed: %d\n", rc);
0a3bdb00 1714 return rc;
d7e09d03
PT
1715 }
1716
af00f6c5 1717 if (copy_to_user((int __user *)arg, &count, sizeof(int)))
0a3bdb00 1718 return -EFAULT;
d7e09d03 1719
0a3bdb00 1720 return 0;
d7e09d03
PT
1721 }
1722 case LL_IOC_PATH2FID:
af00f6c5
OD
1723 if (copy_to_user((void __user *)arg, ll_inode2fid(inode),
1724 sizeof(struct lu_fid)))
0a3bdb00
GKH
1725 return -EFAULT;
1726 return 0;
d7e09d03 1727 case LL_IOC_GET_CONNECT_FLAGS: {
e09bee34
OD
1728 return obd_iocontrol(cmd, sbi->ll_md_exp, 0, NULL,
1729 (void __user *)arg);
d7e09d03
PT
1730 }
1731 case OBD_IOC_CHANGELOG_SEND:
1732 case OBD_IOC_CHANGELOG_CLEAR:
bbaa9c10
NY
1733 if (!capable(CFS_CAP_SYS_ADMIN))
1734 return -EPERM;
1735
af00f6c5 1736 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
d7e09d03 1737 sizeof(struct ioc_changelog));
0a3bdb00 1738 return rc;
d7e09d03 1739 case OBD_IOC_FID2PATH:
7ec89fa5 1740 return ll_fid2path(inode, (void __user *)arg);
d7e09d03
PT
1741 case LL_IOC_HSM_REQUEST: {
1742 struct hsm_user_request *hur;
6b2eb32e 1743 ssize_t totalsize;
d7e09d03 1744
af00f6c5 1745 hur = memdup_user((void __user *)arg, sizeof(*hur));
4a07594e
AH
1746 if (IS_ERR(hur))
1747 return PTR_ERR(hur);
d7e09d03
PT
1748
1749 /* Compute the whole struct size */
1750 totalsize = hur_len(hur);
97903a26 1751 kfree(hur);
6b2eb32e
NC
1752 if (totalsize < 0)
1753 return -E2BIG;
e55c4476
JN
1754
1755 /* Final size will be more than double totalsize */
1756 if (totalsize >= MDS_MAXREQSIZE / 3)
1757 return -E2BIG;
1758
e958f49b 1759 hur = libcfs_kvzalloc(totalsize, GFP_NOFS);
d7e09d03 1760 if (hur == NULL)
0a3bdb00 1761 return -ENOMEM;
d7e09d03
PT
1762
1763 /* Copy the whole struct */
af00f6c5 1764 if (copy_from_user(hur, (void __user *)arg, totalsize)) {
e958f49b 1765 kvfree(hur);
0a3bdb00 1766 return -EFAULT;
d7e09d03
PT
1767 }
1768
48d23e61
JX
1769 if (hur->hur_request.hr_action == HUA_RELEASE) {
1770 const struct lu_fid *fid;
1771 struct inode *f;
1772 int i;
1773
1774 for (i = 0; i < hur->hur_request.hr_itemcount; i++) {
1775 fid = &hur->hur_user_item[i].hui_fid;
1776 f = search_inode_for_lustre(inode->i_sb, fid);
1777 if (IS_ERR(f)) {
1778 rc = PTR_ERR(f);
1779 break;
1780 }
1781
1782 rc = ll_hsm_release(f);
1783 iput(f);
1784 if (rc != 0)
1785 break;
1786 }
1787 } else {
1788 rc = obd_iocontrol(cmd, ll_i2mdexp(inode), totalsize,
1789 hur, NULL);
1790 }
d7e09d03 1791
e958f49b 1792 kvfree(hur);
d7e09d03 1793
0a3bdb00 1794 return rc;
d7e09d03
PT
1795 }
1796 case LL_IOC_HSM_PROGRESS: {
1797 struct hsm_progress_kernel hpk;
1798 struct hsm_progress hp;
1799
af00f6c5 1800 if (copy_from_user(&hp, (void __user *)arg, sizeof(hp)))
0a3bdb00 1801 return -EFAULT;
d7e09d03
PT
1802
1803 hpk.hpk_fid = hp.hp_fid;
1804 hpk.hpk_cookie = hp.hp_cookie;
1805 hpk.hpk_extent = hp.hp_extent;
1806 hpk.hpk_flags = hp.hp_flags;
1807 hpk.hpk_errval = hp.hp_errval;
1808 hpk.hpk_data_version = 0;
1809
1810 /* File may not exist in Lustre; all progress
1811 * reported to Lustre root */
1812 rc = obd_iocontrol(cmd, sbi->ll_md_exp, sizeof(hpk), &hpk,
1813 NULL);
0a3bdb00 1814 return rc;
d7e09d03
PT
1815 }
1816 case LL_IOC_HSM_CT_START:
af00f6c5 1817 rc = copy_and_ioctl(cmd, sbi->ll_md_exp, (void __user *)arg,
d7e09d03 1818 sizeof(struct lustre_kernelcomm));
0a3bdb00 1819 return rc;
d7e09d03
PT
1820
1821 case LL_IOC_HSM_COPY_START: {
1822 struct hsm_copy *copy;
1823 int rc;
1824
af00f6c5 1825 copy = memdup_user((char __user *)arg, sizeof(*copy));
4a07594e
AH
1826 if (IS_ERR(copy))
1827 return PTR_ERR(copy);
d7e09d03
PT
1828
1829 rc = ll_ioc_copy_start(inode->i_sb, copy);
af00f6c5 1830 if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
d7e09d03
PT
1831 rc = -EFAULT;
1832
97903a26 1833 kfree(copy);
0a3bdb00 1834 return rc;
d7e09d03
PT
1835 }
1836 case LL_IOC_HSM_COPY_END: {
1837 struct hsm_copy *copy;
1838 int rc;
1839
af00f6c5 1840 copy = memdup_user((char __user *)arg, sizeof(*copy));
4a07594e
AH
1841 if (IS_ERR(copy))
1842 return PTR_ERR(copy);
d7e09d03
PT
1843
1844 rc = ll_ioc_copy_end(inode->i_sb, copy);
af00f6c5 1845 if (copy_to_user((char __user *)arg, copy, sizeof(*copy)))
d7e09d03
PT
1846 rc = -EFAULT;
1847
97903a26 1848 kfree(copy);
0a3bdb00 1849 return rc;
d7e09d03
PT
1850 }
1851 default:
e09bee34
OD
1852 return obd_iocontrol(cmd, sbi->ll_dt_exp, 0, NULL,
1853 (void __user *)arg);
d7e09d03
PT
1854 }
1855}
1856
1857static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
1858{
1859 struct inode *inode = file->f_mapping->host;
1860 struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
1861 struct ll_sb_info *sbi = ll_i2sbi(inode);
1862 int api32 = ll_need_32bit_api(sbi);
1863 loff_t ret = -EINVAL;
d7e09d03 1864
5955102c 1865 inode_lock(inode);
d7e09d03 1866 switch (origin) {
e17f5594 1867 case SEEK_SET:
1868 break;
1869 case SEEK_CUR:
1870 offset += file->f_pos;
1871 break;
1872 case SEEK_END:
1873 if (offset > 0)
34e1f2bb 1874 goto out;
e17f5594 1875 if (api32)
1876 offset += LL_DIR_END_OFF_32BIT;
1877 else
1878 offset += LL_DIR_END_OFF;
1879 break;
1880 default:
1881 goto out;
d7e09d03
PT
1882 }
1883
1884 if (offset >= 0 &&
1885 ((api32 && offset <= LL_DIR_END_OFF_32BIT) ||
1886 (!api32 && offset <= LL_DIR_END_OFF))) {
1887 if (offset != file->f_pos) {
1888 if ((api32 && offset == LL_DIR_END_OFF_32BIT) ||
1889 (!api32 && offset == LL_DIR_END_OFF))
1890 fd->lfd_pos = MDS_DIR_END_OFF;
1891 else if (api32 && sbi->ll_flags & LL_SBI_64BIT_HASH)
1892 fd->lfd_pos = offset << 32;
1893 else
1894 fd->lfd_pos = offset;
1895 file->f_pos = offset;
1896 file->f_version = 0;
1897 }
1898 ret = offset;
1899 }
34e1f2bb 1900 goto out;
d7e09d03
PT
1901
1902out:
5955102c 1903 inode_unlock(inode);
d7e09d03
PT
1904 return ret;
1905}
1906
2d95f10e 1907static int ll_dir_open(struct inode *inode, struct file *file)
d7e09d03 1908{
0a3bdb00 1909 return ll_file_open(inode, file);
d7e09d03
PT
1910}
1911
2d95f10e 1912static int ll_dir_release(struct inode *inode, struct file *file)
d7e09d03 1913{
0a3bdb00 1914 return ll_file_release(inode, file);
d7e09d03
PT
1915}
1916
2d95f10e 1917const struct file_operations ll_dir_operations = {
d7e09d03
PT
1918 .llseek = ll_dir_seek,
1919 .open = ll_dir_open,
1920 .release = ll_dir_release,
1921 .read = generic_read_dir,
0b09d381 1922 .iterate = ll_readdir,
d7e09d03
PT
1923 .unlocked_ioctl = ll_dir_ioctl,
1924 .fsync = ll_fsync,
1925};
This page took 0.555082 seconds and 5 git commands to generate.