Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
6a5b99a4 | 18 | * http://www.gnu.org/licenses/gpl-2.0.html |
d7e09d03 | 19 | * |
d7e09d03 PT |
20 | * GPL HEADER END |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | * | |
26 | * Copyright (c) 2011, 2012, Intel Corporation. | |
27 | */ | |
28 | /* | |
29 | * This file is part of Lustre, http://www.lustre.org/ | |
30 | * Lustre is a trademark of Sun Microsystems, Inc. | |
31 | * | |
32 | * lustre/lustre/llite/rw26.c | |
33 | * | |
34 | * Lustre Lite I/O page cache routines for the 2.5/2.6 kernel version | |
35 | */ | |
36 | ||
37 | #include <linux/kernel.h> | |
38 | #include <linux/mm.h> | |
39 | #include <linux/string.h> | |
40 | #include <linux/stat.h> | |
41 | #include <linux/errno.h> | |
42 | #include <linux/unistd.h> | |
9fb186cf | 43 | #include <linux/uaccess.h> |
d7e09d03 PT |
44 | |
45 | #include <linux/migrate.h> | |
46 | #include <linux/fs.h> | |
47 | #include <linux/buffer_head.h> | |
48 | #include <linux/mpage.h> | |
49 | #include <linux/writeback.h> | |
d7e09d03 PT |
50 | #include <linux/pagemap.h> |
51 | ||
52 | #define DEBUG_SUBSYSTEM S_LLITE | |
53 | ||
67a235f5 | 54 | #include "../include/lustre_lite.h" |
d7e09d03 | 55 | #include "llite_internal.h" |
67a235f5 | 56 | #include "../include/linux/lustre_compat25.h" |
d7e09d03 PT |
57 | |
58 | /** | |
59 | * Implements Linux VM address_space::invalidatepage() method. This method is | |
60 | * called when the page is truncate from a file, either as a result of | |
61 | * explicit truncate, or when inode is removed from memory (as a result of | |
62 | * final iput(), umount, or memory pressure induced icache shrinking). | |
63 | * | |
64 | * [0, offset] bytes of the page remain valid (this is for a case of not-page | |
65 | * aligned truncate). Lustre leaves partially truncated page in the cache, | |
66 | * relying on struct inode::i_size to limit further accesses. | |
67 | */ | |
5237c441 SR |
68 | static void ll_invalidatepage(struct page *vmpage, unsigned int offset, |
69 | unsigned int length) | |
d7e09d03 PT |
70 | { |
71 | struct inode *inode; | |
72 | struct lu_env *env; | |
73 | struct cl_page *page; | |
74 | struct cl_object *obj; | |
75 | ||
76 | int refcheck; | |
77 | ||
78 | LASSERT(PageLocked(vmpage)); | |
79 | LASSERT(!PageWriteback(vmpage)); | |
80 | ||
81 | /* | |
82 | * It is safe to not check anything in invalidatepage/releasepage | |
83 | * below because they are run with page locked and all our io is | |
84 | * happening with locked page too | |
85 | */ | |
09cbfeaf | 86 | if (offset == 0 && length == PAGE_SIZE) { |
d7e09d03 PT |
87 | env = cl_env_get(&refcheck); |
88 | if (!IS_ERR(env)) { | |
89 | inode = vmpage->mapping->host; | |
90 | obj = ll_i2info(inode)->lli_clob; | |
6e16818b | 91 | if (obj) { |
d7e09d03 | 92 | page = cl_vmpage_page(vmpage, obj); |
6e16818b | 93 | if (page) { |
d7e09d03 | 94 | cl_page_delete(env, page); |
d7e09d03 PT |
95 | cl_page_put(env, page); |
96 | } | |
da5ecb4d | 97 | } else { |
d7e09d03 | 98 | LASSERT(vmpage->private == 0); |
da5ecb4d | 99 | } |
d7e09d03 PT |
100 | cl_env_put(env, &refcheck); |
101 | } | |
102 | } | |
103 | } | |
104 | ||
46f93a14 | 105 | static int ll_releasepage(struct page *vmpage, gfp_t gfp_mask) |
d7e09d03 | 106 | { |
d7e09d03 | 107 | struct lu_env *env; |
3c361c1c | 108 | void *cookie; |
d7e09d03 PT |
109 | struct cl_object *obj; |
110 | struct cl_page *page; | |
111 | struct address_space *mapping; | |
3c361c1c | 112 | int result = 0; |
d7e09d03 PT |
113 | |
114 | LASSERT(PageLocked(vmpage)); | |
115 | if (PageWriteback(vmpage) || PageDirty(vmpage)) | |
116 | return 0; | |
117 | ||
118 | mapping = vmpage->mapping; | |
6e16818b | 119 | if (!mapping) |
d7e09d03 PT |
120 | return 1; |
121 | ||
122 | obj = ll_i2info(mapping->host)->lli_clob; | |
6e16818b | 123 | if (!obj) |
d7e09d03 PT |
124 | return 1; |
125 | ||
3c361c1c | 126 | /* 1 for caller, 1 for cl_page and 1 for page cache */ |
d7e09d03 PT |
127 | if (page_count(vmpage) > 3) |
128 | return 0; | |
129 | ||
d7e09d03 | 130 | page = cl_vmpage_page(vmpage, obj); |
3c361c1c JX |
131 | if (!page) |
132 | return 1; | |
d7e09d03 | 133 | |
3c361c1c JX |
134 | cookie = cl_env_reenter(); |
135 | env = cl_env_percpu_get(); | |
136 | LASSERT(!IS_ERR(env)); | |
d7e09d03 | 137 | |
3c361c1c JX |
138 | if (!cl_page_in_use(page)) { |
139 | result = 1; | |
140 | cl_page_delete(env, page); | |
d7e09d03 | 141 | } |
3c361c1c JX |
142 | |
143 | /* To use percpu env array, the call path can not be rescheduled; | |
144 | * otherwise percpu array will be messed if ll_releaspage() called | |
145 | * again on the same CPU. | |
146 | * | |
147 | * If this page holds the last refc of cl_object, the following | |
148 | * call path may cause reschedule: | |
149 | * cl_page_put -> cl_page_free -> cl_object_put -> | |
06563b56 | 150 | * lu_object_put -> lu_object_free -> lov_delete_raid0. |
3c361c1c JX |
151 | * |
152 | * However, the kernel can't get rid of this inode until all pages have | |
153 | * been cleaned up. Now that we hold page lock here, it's pretty safe | |
154 | * that we won't get into object delete path. | |
d7e09d03 | 155 | */ |
3c361c1c JX |
156 | LASSERT(cl_object_refc(obj) > 1); |
157 | cl_page_put(env, page); | |
158 | ||
159 | cl_env_percpu_put(env); | |
160 | cl_env_reexit(cookie); | |
d7e09d03 | 161 | return result; |
d7e09d03 PT |
162 | } |
163 | ||
cd94f231 | 164 | #define MAX_DIRECTIO_SIZE (2 * 1024 * 1024 * 1024UL) |
d7e09d03 PT |
165 | |
166 | static inline int ll_get_user_pages(int rw, unsigned long user_addr, | |
167 | size_t size, struct page ***pages, | |
168 | int *max_pages) | |
169 | { | |
170 | int result = -ENOMEM; | |
171 | ||
172 | /* set an arbitrary limit to prevent arithmetic overflow */ | |
173 | if (size > MAX_DIRECTIO_SIZE) { | |
174 | *pages = NULL; | |
175 | return -EFBIG; | |
176 | } | |
177 | ||
09cbfeaf KS |
178 | *max_pages = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; |
179 | *max_pages -= user_addr >> PAGE_SHIFT; | |
d7e09d03 | 180 | |
e958f49b | 181 | *pages = libcfs_kvzalloc(*max_pages * sizeof(**pages), GFP_NOFS); |
d7e09d03 | 182 | if (*pages) { |
d1a168ec JK |
183 | result = get_user_pages_fast(user_addr, *max_pages, |
184 | (rw == READ), *pages); | |
d7e09d03 | 185 | if (unlikely(result <= 0)) |
e958f49b | 186 | kvfree(*pages); |
d7e09d03 PT |
187 | } |
188 | ||
189 | return result; | |
190 | } | |
191 | ||
192 | /* ll_free_user_pages - tear down page struct array | |
c0894c6c OD |
193 | * @pages: array of page struct pointers underlying target buffer |
194 | */ | |
d7e09d03 PT |
195 | static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) |
196 | { | |
197 | int i; | |
198 | ||
199 | for (i = 0; i < npages; i++) { | |
d7e09d03 PT |
200 | if (do_dirty) |
201 | set_page_dirty_lock(pages[i]); | |
09cbfeaf | 202 | put_page(pages[i]); |
d7e09d03 | 203 | } |
91f79c43 | 204 | kvfree(pages); |
d7e09d03 PT |
205 | } |
206 | ||
207 | ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, | |
208 | int rw, struct inode *inode, | |
209 | struct ll_dio_pages *pv) | |
210 | { | |
211 | struct cl_page *clp; | |
212 | struct cl_2queue *queue; | |
213 | struct cl_object *obj = io->ci_obj; | |
214 | int i; | |
215 | ssize_t rc = 0; | |
216 | loff_t file_offset = pv->ldp_start_offset; | |
217 | long size = pv->ldp_size; | |
218 | int page_count = pv->ldp_nr; | |
219 | struct page **pages = pv->ldp_pages; | |
220 | long page_size = cl_page_size(obj); | |
221 | bool do_io; | |
222 | int io_pages = 0; | |
d7e09d03 PT |
223 | |
224 | queue = &io->ci_queue; | |
225 | cl_2queue_init(queue); | |
226 | for (i = 0; i < page_count; i++) { | |
227 | if (pv->ldp_offsets) | |
defa220f | 228 | file_offset = pv->ldp_offsets[i]; |
d7e09d03 PT |
229 | |
230 | LASSERT(!(file_offset & (page_size - 1))); | |
231 | clp = cl_page_find(env, obj, cl_index(obj, file_offset), | |
232 | pv->ldp_pages[i], CPT_TRANSIENT); | |
233 | if (IS_ERR(clp)) { | |
234 | rc = PTR_ERR(clp); | |
235 | break; | |
236 | } | |
237 | ||
238 | rc = cl_page_own(env, io, clp); | |
239 | if (rc) { | |
240 | LASSERT(clp->cp_state == CPS_FREEING); | |
241 | cl_page_put(env, clp); | |
242 | break; | |
243 | } | |
244 | ||
245 | do_io = true; | |
246 | ||
247 | /* check the page type: if the page is a host page, then do | |
c0894c6c OD |
248 | * write directly |
249 | */ | |
d7e09d03 | 250 | if (clp->cp_type == CPT_CACHEABLE) { |
7addf402 | 251 | struct page *vmpage = cl_page_vmpage(clp); |
d7e09d03 PT |
252 | struct page *src_page; |
253 | struct page *dst_page; | |
254 | void *src; | |
255 | void *dst; | |
256 | ||
257 | src_page = (rw == WRITE) ? pages[i] : vmpage; | |
258 | dst_page = (rw == WRITE) ? vmpage : pages[i]; | |
259 | ||
5e8ebf13 ZH |
260 | src = kmap_atomic(src_page); |
261 | dst = kmap_atomic(dst_page); | |
d7e09d03 | 262 | memcpy(dst, src, min(page_size, size)); |
5e8ebf13 ZH |
263 | kunmap_atomic(dst); |
264 | kunmap_atomic(src); | |
d7e09d03 PT |
265 | |
266 | /* make sure page will be added to the transfer by | |
c0894c6c OD |
267 | * cl_io_submit()->...->vvp_page_prep_write(). |
268 | */ | |
d7e09d03 PT |
269 | if (rw == WRITE) |
270 | set_page_dirty(vmpage); | |
271 | ||
272 | if (rw == READ) { | |
273 | /* do not issue the page for read, since it | |
274 | * may reread a ra page which has NOT uptodate | |
c0894c6c OD |
275 | * bit set. |
276 | */ | |
d7e09d03 PT |
277 | cl_page_disown(env, io, clp); |
278 | do_io = false; | |
279 | } | |
280 | } | |
281 | ||
282 | if (likely(do_io)) { | |
53f1a127 SB |
283 | /* |
284 | * Add a page to the incoming page list of 2-queue. | |
285 | */ | |
286 | cl_page_list_add(&queue->c2_qin, clp); | |
d7e09d03 PT |
287 | |
288 | /* | |
289 | * Set page clip to tell transfer formation engine | |
290 | * that page has to be sent even if it is beyond KMS. | |
291 | */ | |
292 | cl_page_clip(env, clp, 0, min(size, page_size)); | |
293 | ||
294 | ++io_pages; | |
295 | } | |
296 | ||
297 | /* drop the reference count for cl_page_find */ | |
298 | cl_page_put(env, clp); | |
299 | size -= page_size; | |
300 | file_offset += page_size; | |
301 | } | |
302 | ||
303 | if (rc == 0 && io_pages) { | |
304 | rc = cl_io_submit_sync(env, io, | |
305 | rw == READ ? CRT_READ : CRT_WRITE, | |
306 | queue, 0); | |
307 | } | |
308 | if (rc == 0) | |
309 | rc = pv->ldp_size; | |
310 | ||
311 | cl_2queue_discard(env, io, queue); | |
312 | cl_2queue_disown(env, io, queue); | |
313 | cl_2queue_fini(env, queue); | |
0a3bdb00 | 314 | return rc; |
d7e09d03 PT |
315 | } |
316 | EXPORT_SYMBOL(ll_direct_rw_pages); | |
317 | ||
318 | static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, | |
319 | int rw, struct inode *inode, | |
320 | struct address_space *mapping, | |
321 | size_t size, loff_t file_offset, | |
322 | struct page **pages, int page_count) | |
323 | { | |
22ea97f0 OD |
324 | struct ll_dio_pages pvec = { |
325 | .ldp_pages = pages, | |
326 | .ldp_nr = page_count, | |
327 | .ldp_size = size, | |
328 | .ldp_offsets = NULL, | |
329 | .ldp_start_offset = file_offset | |
330 | }; | |
331 | ||
332 | return ll_direct_rw_pages(env, io, rw, inode, &pvec); | |
d7e09d03 PT |
333 | } |
334 | ||
d7e09d03 PT |
335 | /* This is the maximum size of a single O_DIRECT request, based on the |
336 | * kmalloc limit. We need to fit all of the brw_page structs, each one | |
337 | * representing PAGE_SIZE worth of user data, into a single buffer, and | |
338 | * then truncate this to be a full-sized RPC. For 4kB PAGE_SIZE this is | |
c0894c6c OD |
339 | * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. |
340 | */ | |
12a880bf | 341 | #define MAX_DIO_SIZE ((KMALLOC_MAX_SIZE / sizeof(struct brw_page) * \ |
09cbfeaf | 342 | PAGE_SIZE) & ~(DT_MAX_BRW_SIZE - 1)) |
c8b8e32d | 343 | static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter) |
d7e09d03 PT |
344 | { |
345 | struct lu_env *env; | |
346 | struct cl_io *io; | |
347 | struct file *file = iocb->ki_filp; | |
348 | struct inode *inode = file->f_mapping->host; | |
8c7b0e1a | 349 | struct vvp_object *obj = cl_inode2vvp(inode); |
c8b8e32d | 350 | loff_t file_offset = iocb->ki_pos; |
91f79c43 AV |
351 | ssize_t count = iov_iter_count(iter); |
352 | ssize_t tot_bytes = 0, result = 0; | |
d7e09d03 | 353 | struct ll_inode_info *lli = ll_i2info(inode); |
d7e09d03 PT |
354 | long size = MAX_DIO_SIZE; |
355 | int refcheck; | |
d7e09d03 PT |
356 | |
357 | if (!lli->lli_has_smd) | |
0a3bdb00 | 358 | return -EBADF; |
d7e09d03 PT |
359 | |
360 | /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ | |
616387e8 | 361 | if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) |
0a3bdb00 | 362 | return -EINVAL; |
d7e09d03 | 363 | |
97a075cd JN |
364 | CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), offset=%lld=%llx, pages %zd (max %lu)\n", |
365 | PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, | |
09cbfeaf KS |
366 | file_offset, file_offset, count >> PAGE_SHIFT, |
367 | MAX_DIO_SIZE >> PAGE_SHIFT); | |
d7e09d03 PT |
368 | |
369 | /* Check that all user buffers are aligned as well */ | |
616387e8 | 370 | if (iov_iter_alignment(iter) & ~PAGE_MASK) |
886a3911 | 371 | return -EINVAL; |
d7e09d03 PT |
372 | |
373 | env = cl_env_get(&refcheck); | |
374 | LASSERT(!IS_ERR(env)); | |
e0a8144b | 375 | io = vvp_env_io(env)->vui_cl.cis_io; |
6e16818b | 376 | LASSERT(io); |
d7e09d03 PT |
377 | |
378 | /* 0. Need locking between buffered and direct access. and race with | |
379 | * size changing by concurrent truncates and writes. | |
380 | * 1. Need inode mutex to operate transient pages. | |
381 | */ | |
6f673763 | 382 | if (iov_iter_rw(iter) == READ) |
5955102c | 383 | inode_lock(inode); |
d7e09d03 | 384 | |
8c7b0e1a | 385 | LASSERT(obj->vob_transient_pages == 0); |
91f79c43 AV |
386 | while (iov_iter_count(iter)) { |
387 | struct page **pages; | |
388 | size_t offs; | |
d7e09d03 | 389 | |
91f79c43 | 390 | count = min_t(size_t, iov_iter_count(iter), size); |
6f673763 | 391 | if (iov_iter_rw(iter) == READ) { |
d7e09d03 PT |
392 | if (file_offset >= i_size_read(inode)) |
393 | break; | |
91f79c43 AV |
394 | if (file_offset + count > i_size_read(inode)) |
395 | count = i_size_read(inode) - file_offset; | |
d7e09d03 PT |
396 | } |
397 | ||
91f79c43 AV |
398 | result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); |
399 | if (likely(result > 0)) { | |
ef96fddd | 400 | int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); |
50ffcb7e | 401 | |
6f673763 OS |
402 | result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter), |
403 | inode, file->f_mapping, | |
404 | result, file_offset, pages, | |
405 | n); | |
406 | ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ); | |
91f79c43 AV |
407 | } |
408 | if (unlikely(result <= 0)) { | |
409 | /* If we can't allocate a large enough buffer | |
410 | * for the request, shrink it to a smaller | |
411 | * PAGE_SIZE multiple and try again. | |
412 | * We should always be able to kmalloc for a | |
c0894c6c OD |
413 | * page worth of page pointers = 4MB on i386. |
414 | */ | |
91f79c43 | 415 | if (result == -ENOMEM && |
09cbfeaf KS |
416 | size > (PAGE_SIZE / sizeof(*pages)) * |
417 | PAGE_SIZE) { | |
91f79c43 | 418 | size = ((((size / 2) - 1) | |
616387e8 OD |
419 | ~PAGE_MASK) + 1) & |
420 | PAGE_MASK; | |
1d8cb70c | 421 | CDEBUG(D_VFSTRACE, "DIO size now %lu\n", |
91f79c43 AV |
422 | size); |
423 | continue; | |
d7e09d03 PT |
424 | } |
425 | ||
34e1f2bb | 426 | goto out; |
d7e09d03 | 427 | } |
91f79c43 AV |
428 | iov_iter_advance(iter, result); |
429 | tot_bytes += result; | |
430 | file_offset += result; | |
d7e09d03 PT |
431 | } |
432 | out: | |
8c7b0e1a | 433 | LASSERT(obj->vob_transient_pages == 0); |
6f673763 | 434 | if (iov_iter_rw(iter) == READ) |
5955102c | 435 | inode_unlock(inode); |
d7e09d03 PT |
436 | |
437 | if (tot_bytes > 0) { | |
e0a8144b | 438 | struct vvp_io *vio = vvp_env_io(env); |
77605e41 JX |
439 | |
440 | /* no commit async for direct IO */ | |
e0a8144b | 441 | vio->u.write.vui_written += tot_bytes; |
d7e09d03 PT |
442 | } |
443 | ||
444 | cl_env_put(env, &refcheck); | |
1b3f4f90 | 445 | return tot_bytes ? tot_bytes : result; |
d7e09d03 PT |
446 | } |
447 | ||
77605e41 JX |
448 | /** |
449 | * Prepare partially written-to page for a write. | |
450 | */ | |
451 | static int ll_prepare_partial_page(const struct lu_env *env, struct cl_io *io, | |
452 | struct cl_page *pg) | |
453 | { | |
9acc4500 | 454 | struct cl_attr *attr = vvp_env_thread_attr(env); |
7addf402 | 455 | struct cl_object *obj = io->ci_obj; |
3a52f803 JH |
456 | struct vvp_page *vpg = cl_object_page_slice(obj, pg); |
457 | loff_t offset = cl_offset(obj, vvp_index(vpg)); | |
77605e41 JX |
458 | int result; |
459 | ||
460 | cl_object_attr_lock(obj); | |
461 | result = cl_object_attr_get(env, obj, attr); | |
462 | cl_object_attr_unlock(obj); | |
463 | if (result == 0) { | |
77605e41 JX |
464 | /* |
465 | * If are writing to a new page, no need to read old data. | |
466 | * The extent locking will have updated the KMS, and for our | |
467 | * purposes here we can treat it like i_size. | |
468 | */ | |
469 | if (attr->cat_kms <= offset) { | |
3a52f803 | 470 | char *kaddr = kmap_atomic(vpg->vpg_page); |
77605e41 JX |
471 | |
472 | memset(kaddr, 0, cl_page_size(obj)); | |
473 | kunmap_atomic(kaddr); | |
3a52f803 JH |
474 | } else if (vpg->vpg_defer_uptodate) { |
475 | vpg->vpg_ra_used = 1; | |
77605e41 JX |
476 | } else { |
477 | result = ll_page_sync_io(env, io, pg, CRT_READ); | |
478 | } | |
479 | } | |
480 | return result; | |
d7e09d03 PT |
481 | } |
482 | ||
483 | static int ll_write_begin(struct file *file, struct address_space *mapping, | |
e15ba45d OD |
484 | loff_t pos, unsigned len, unsigned flags, |
485 | struct page **pagep, void **fsdata) | |
d7e09d03 | 486 | { |
77605e41 | 487 | struct ll_cl_context *lcc; |
966c4a8f | 488 | const struct lu_env *env; |
77605e41 JX |
489 | struct cl_io *io; |
490 | struct cl_page *page; | |
491 | struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; | |
09cbfeaf | 492 | pgoff_t index = pos >> PAGE_SHIFT; |
77605e41 | 493 | struct page *vmpage = NULL; |
5f479924 | 494 | unsigned int from = pos & (PAGE_SIZE - 1); |
77605e41 JX |
495 | unsigned int to = from + len; |
496 | int result = 0; | |
d7e09d03 | 497 | |
77605e41 | 498 | CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); |
d7e09d03 | 499 | |
966c4a8f JX |
500 | lcc = ll_cl_find(file); |
501 | if (!lcc) { | |
502 | result = -EIO; | |
77605e41 JX |
503 | goto out; |
504 | } | |
d7e09d03 | 505 | |
77605e41 JX |
506 | env = lcc->lcc_env; |
507 | io = lcc->lcc_io; | |
508 | ||
297e908f JX |
509 | /* To avoid deadlock, try to lock page first. */ |
510 | vmpage = grab_cache_page_nowait(mapping, index); | |
d37dd10b | 511 | if (unlikely(!vmpage || PageDirty(vmpage) || PageWriteback(vmpage))) { |
e0a8144b JH |
512 | struct vvp_io *vio = vvp_env_io(env); |
513 | struct cl_page_list *plist = &vio->u.write.vui_queue; | |
77605e41 JX |
514 | |
515 | /* if the page is already in dirty cache, we have to commit | |
516 | * the pages right now; otherwise, it may cause deadlock | |
517 | * because it holds page lock of a dirty page and request for | |
518 | * more grants. It's okay for the dirty page to be the first | |
519 | * one in commit page list, though. | |
520 | */ | |
d37dd10b | 521 | if (vmpage && plist->pl_nr > 0) { |
77605e41 | 522 | unlock_page(vmpage); |
5f479924 | 523 | put_page(vmpage); |
77605e41 JX |
524 | vmpage = NULL; |
525 | } | |
d7e09d03 | 526 | |
77605e41 JX |
527 | /* commit pages and then wait for page lock */ |
528 | result = vvp_io_write_commit(env, io); | |
529 | if (result < 0) | |
530 | goto out; | |
d7e09d03 | 531 | |
77605e41 JX |
532 | if (!vmpage) { |
533 | vmpage = grab_cache_page_write_begin(mapping, index, | |
534 | flags); | |
535 | if (!vmpage) { | |
536 | result = -ENOMEM; | |
537 | goto out; | |
538 | } | |
539 | } | |
d7e09d03 | 540 | } |
77605e41 JX |
541 | |
542 | page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); | |
543 | if (IS_ERR(page)) { | |
544 | result = PTR_ERR(page); | |
545 | goto out; | |
546 | } | |
547 | ||
548 | lcc->lcc_page = page; | |
549 | lu_ref_add(&page->cp_reference, "cl_io", io); | |
550 | ||
551 | cl_page_assume(env, io, page); | |
552 | if (!PageUptodate(vmpage)) { | |
553 | /* | |
554 | * We're completely overwriting an existing page, | |
555 | * so _don't_ set it up to date until commit_write | |
556 | */ | |
557 | if (from == 0 && to == PAGE_SIZE) { | |
558 | CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); | |
559 | POISON_PAGE(vmpage, 0x11); | |
560 | } else { | |
561 | /* TODO: can be optimized at OSC layer to check if it | |
562 | * is a lockless IO. In that case, it's not necessary | |
563 | * to read the data. | |
564 | */ | |
565 | result = ll_prepare_partial_page(env, io, page); | |
566 | if (result == 0) | |
567 | SetPageUptodate(vmpage); | |
568 | } | |
569 | } | |
570 | if (result < 0) | |
571 | cl_page_unassume(env, io, page); | |
572 | out: | |
573 | if (result < 0) { | |
574 | if (vmpage) { | |
575 | unlock_page(vmpage); | |
5f479924 | 576 | put_page(vmpage); |
77605e41 | 577 | } |
77605e41 JX |
578 | } else { |
579 | *pagep = vmpage; | |
580 | *fsdata = lcc; | |
581 | } | |
582 | return result; | |
d7e09d03 PT |
583 | } |
584 | ||
585 | static int ll_write_end(struct file *file, struct address_space *mapping, | |
586 | loff_t pos, unsigned len, unsigned copied, | |
77605e41 | 587 | struct page *vmpage, void *fsdata) |
d7e09d03 | 588 | { |
77605e41 | 589 | struct ll_cl_context *lcc = fsdata; |
966c4a8f | 590 | const struct lu_env *env; |
77605e41 | 591 | struct cl_io *io; |
e0a8144b | 592 | struct vvp_io *vio; |
77605e41 | 593 | struct cl_page *page; |
09cbfeaf | 594 | unsigned from = pos & (PAGE_SIZE - 1); |
77605e41 JX |
595 | bool unplug = false; |
596 | int result = 0; | |
597 | ||
5f479924 | 598 | put_page(vmpage); |
77605e41 JX |
599 | |
600 | env = lcc->lcc_env; | |
601 | page = lcc->lcc_page; | |
602 | io = lcc->lcc_io; | |
e0a8144b | 603 | vio = vvp_env_io(env); |
77605e41 JX |
604 | |
605 | LASSERT(cl_page_is_owned(page, io)); | |
606 | if (copied > 0) { | |
e0a8144b | 607 | struct cl_page_list *plist = &vio->u.write.vui_queue; |
77605e41 JX |
608 | |
609 | lcc->lcc_page = NULL; /* page will be queued */ | |
610 | ||
611 | /* Add it into write queue */ | |
612 | cl_page_list_add(plist, page); | |
613 | if (plist->pl_nr == 1) /* first page */ | |
e0a8144b | 614 | vio->u.write.vui_from = from; |
77605e41 JX |
615 | else |
616 | LASSERT(from == 0); | |
e0a8144b | 617 | vio->u.write.vui_to = from + copied; |
77605e41 | 618 | |
297e908f JX |
619 | /* |
620 | * To address the deadlock in balance_dirty_pages() where | |
621 | * this dirty page may be written back in the same thread. | |
622 | */ | |
623 | if (PageDirty(vmpage)) | |
624 | unplug = true; | |
625 | ||
77605e41 JX |
626 | /* We may have one full RPC, commit it soon */ |
627 | if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) | |
628 | unplug = true; | |
629 | ||
630 | CL_PAGE_DEBUG(D_VFSTRACE, env, page, | |
631 | "queued page: %d.\n", plist->pl_nr); | |
632 | } else { | |
633 | cl_page_disown(env, io, page); | |
634 | ||
966c4a8f JX |
635 | lcc->lcc_page = NULL; |
636 | lu_ref_del(&page->cp_reference, "cl_io", io); | |
637 | cl_page_put(env, page); | |
638 | ||
77605e41 JX |
639 | /* page list is not contiguous now, commit it now */ |
640 | unplug = true; | |
641 | } | |
d7e09d03 | 642 | |
77605e41 JX |
643 | if (unplug || |
644 | file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) | |
645 | result = vvp_io_write_commit(env, io); | |
d7e09d03 | 646 | |
77605e41 | 647 | return result >= 0 ? copied : result; |
d7e09d03 PT |
648 | } |
649 | ||
650 | #ifdef CONFIG_MIGRATION | |
2d95f10e | 651 | static int ll_migratepage(struct address_space *mapping, |
e15ba45d OD |
652 | struct page *newpage, struct page *page, |
653 | enum migrate_mode mode | |
d7e09d03 PT |
654 | ) |
655 | { | |
656 | /* Always fail page migration until we have a proper implementation */ | |
657 | return -EIO; | |
658 | } | |
659 | #endif | |
660 | ||
2d95f10e JH |
661 | const struct address_space_operations ll_aops = { |
662 | .readpage = ll_readpage, | |
d7e09d03 PT |
663 | .direct_IO = ll_direct_IO_26, |
664 | .writepage = ll_writepage, | |
665 | .writepages = ll_writepages, | |
7addf402 | 666 | .set_page_dirty = __set_page_dirty_nobuffers, |
d7e09d03 PT |
667 | .write_begin = ll_write_begin, |
668 | .write_end = ll_write_end, | |
669 | .invalidatepage = ll_invalidatepage, | |
670 | .releasepage = (void *)ll_releasepage, | |
671 | #ifdef CONFIG_MIGRATION | |
672 | .migratepage = ll_migratepage, | |
673 | #endif | |
d7e09d03 | 674 | }; |