Commit | Line | Data |
---|---|---|
0bd49f94 RK |
1 | /* |
2 | * page.c - buffer/page management specific to NILFS | |
3 | * | |
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | * | |
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | |
21 | * Seiji Kihara <kihara@osrg.net>. | |
22 | */ | |
23 | ||
24 | #include <linux/pagemap.h> | |
25 | #include <linux/writeback.h> | |
26 | #include <linux/swap.h> | |
27 | #include <linux/bitops.h> | |
28 | #include <linux/page-flags.h> | |
29 | #include <linux/list.h> | |
30 | #include <linux/highmem.h> | |
31 | #include <linux/pagevec.h> | |
32 | #include "nilfs.h" | |
33 | #include "page.h" | |
34 | #include "mdt.h" | |
35 | ||
36 | ||
37 | #define NILFS_BUFFER_INHERENT_BITS \ | |
38 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | |
39 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | |
40 | ||
41 | static struct buffer_head * | |
42 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | |
43 | int blkbits, unsigned long b_state) | |
44 | ||
45 | { | |
46 | unsigned long first_block; | |
47 | struct buffer_head *bh; | |
48 | ||
49 | if (!page_has_buffers(page)) | |
50 | create_empty_buffers(page, 1 << blkbits, b_state); | |
51 | ||
52 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | |
53 | bh = nilfs_page_get_nth_block(page, block - first_block); | |
54 | ||
55 | touch_buffer(bh); | |
56 | wait_on_buffer(bh); | |
57 | return bh; | |
58 | } | |
59 | ||
60 | /* | |
61 | * Since the page cache of B-tree node pages or data page cache of pseudo | |
62 | * inodes does not have a valid mapping->host pointer, calling | |
63 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | |
64 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | |
65 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | |
66 | */ | |
67 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | |
68 | { | |
69 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | |
70 | __set_page_dirty_nobuffers(bh->b_page); | |
71 | } | |
72 | ||
73 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | |
74 | struct address_space *mapping, | |
75 | unsigned long blkoff, | |
76 | unsigned long b_state) | |
77 | { | |
78 | int blkbits = inode->i_blkbits; | |
79 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | |
80 | struct page *page, *opage; | |
81 | struct buffer_head *bh, *obh; | |
82 | ||
83 | page = grab_cache_page(mapping, index); | |
84 | if (unlikely(!page)) | |
85 | return NULL; | |
86 | ||
87 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | |
88 | if (unlikely(!bh)) { | |
89 | unlock_page(page); | |
90 | page_cache_release(page); | |
91 | return NULL; | |
92 | } | |
93 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | |
94 | /* | |
95 | * Shadow page cache uses assoc_mapping to point its original | |
96 | * page cache. The following code tries the original cache | |
97 | * if the given cache is a shadow and it didn't hit. | |
98 | */ | |
99 | opage = find_lock_page(mapping->assoc_mapping, index); | |
100 | if (!opage) | |
101 | return bh; | |
102 | ||
103 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | |
104 | b_state); | |
105 | if (buffer_uptodate(obh)) { | |
106 | nilfs_copy_buffer(bh, obh); | |
107 | if (buffer_dirty(obh)) { | |
108 | nilfs_mark_buffer_dirty(bh); | |
109 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | |
110 | nilfs_mdt_mark_dirty(inode); | |
111 | } | |
112 | } | |
113 | brelse(obh); | |
114 | unlock_page(opage); | |
115 | page_cache_release(opage); | |
116 | } | |
117 | return bh; | |
118 | } | |
119 | ||
120 | /** | |
121 | * nilfs_forget_buffer - discard dirty state | |
122 | * @inode: owner inode of the buffer | |
123 | * @bh: buffer head of the buffer to be discarded | |
124 | */ | |
125 | void nilfs_forget_buffer(struct buffer_head *bh) | |
126 | { | |
127 | struct page *page = bh->b_page; | |
128 | ||
129 | lock_buffer(bh); | |
130 | clear_buffer_nilfs_volatile(bh); | |
84338237 RK |
131 | clear_buffer_dirty(bh); |
132 | if (nilfs_page_buffers_clean(page)) | |
0bd49f94 RK |
133 | __nilfs_clear_page_dirty(page); |
134 | ||
135 | clear_buffer_uptodate(bh); | |
136 | clear_buffer_mapped(bh); | |
137 | bh->b_blocknr = -1; | |
138 | ClearPageUptodate(page); | |
139 | ClearPageMappedToDisk(page); | |
140 | unlock_buffer(bh); | |
141 | brelse(bh); | |
142 | } | |
143 | ||
144 | /** | |
145 | * nilfs_copy_buffer -- copy buffer data and flags | |
146 | * @dbh: destination buffer | |
147 | * @sbh: source buffer | |
148 | */ | |
149 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | |
150 | { | |
151 | void *kaddr0, *kaddr1; | |
152 | unsigned long bits; | |
153 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | |
154 | struct buffer_head *bh; | |
155 | ||
156 | kaddr0 = kmap_atomic(spage, KM_USER0); | |
157 | kaddr1 = kmap_atomic(dpage, KM_USER1); | |
158 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | |
159 | kunmap_atomic(kaddr1, KM_USER1); | |
160 | kunmap_atomic(kaddr0, KM_USER0); | |
161 | ||
162 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | |
163 | dbh->b_blocknr = sbh->b_blocknr; | |
164 | dbh->b_bdev = sbh->b_bdev; | |
165 | ||
166 | bh = dbh; | |
167 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | |
168 | while ((bh = bh->b_this_page) != dbh) { | |
169 | lock_buffer(bh); | |
170 | bits &= bh->b_state; | |
171 | unlock_buffer(bh); | |
172 | } | |
173 | if (bits & (1UL << BH_Uptodate)) | |
174 | SetPageUptodate(dpage); | |
175 | else | |
176 | ClearPageUptodate(dpage); | |
177 | if (bits & (1UL << BH_Mapped)) | |
178 | SetPageMappedToDisk(dpage); | |
179 | else | |
180 | ClearPageMappedToDisk(dpage); | |
181 | } | |
182 | ||
183 | /** | |
184 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | |
185 | * @page: page to be checked | |
186 | * | |
187 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | |
188 | * Otherwise, it returns non-zero value. | |
189 | */ | |
190 | int nilfs_page_buffers_clean(struct page *page) | |
191 | { | |
192 | struct buffer_head *bh, *head; | |
193 | ||
194 | bh = head = page_buffers(page); | |
195 | do { | |
196 | if (buffer_dirty(bh)) | |
197 | return 0; | |
198 | bh = bh->b_this_page; | |
199 | } while (bh != head); | |
200 | return 1; | |
201 | } | |
202 | ||
203 | void nilfs_page_bug(struct page *page) | |
204 | { | |
205 | struct address_space *m; | |
206 | unsigned long ino = 0; | |
207 | ||
208 | if (unlikely(!page)) { | |
209 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | |
210 | return; | |
211 | } | |
212 | ||
213 | m = page->mapping; | |
214 | if (m) { | |
215 | struct inode *inode = NILFS_AS_I(m); | |
216 | if (inode != NULL) | |
217 | ino = inode->i_ino; | |
218 | } | |
219 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | |
220 | "mapping=%p ino=%lu\n", | |
221 | page, atomic_read(&page->_count), | |
222 | (unsigned long long)page->index, page->flags, m, ino); | |
223 | ||
224 | if (page_has_buffers(page)) { | |
225 | struct buffer_head *bh, *head; | |
226 | int i = 0; | |
227 | ||
228 | bh = head = page_buffers(page); | |
229 | do { | |
230 | printk(KERN_CRIT | |
231 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | |
232 | i++, bh, atomic_read(&bh->b_count), | |
233 | (unsigned long long)bh->b_blocknr, bh->b_state); | |
234 | bh = bh->b_this_page; | |
235 | } while (bh != head); | |
236 | } | |
237 | } | |
238 | ||
239 | /** | |
240 | * nilfs_alloc_private_page - allocate a private page with buffer heads | |
241 | * | |
242 | * Return Value: On success, a pointer to the allocated page is returned. | |
243 | * On error, NULL is returned. | |
244 | */ | |
245 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | |
246 | unsigned long state) | |
247 | { | |
248 | struct buffer_head *bh, *head, *tail; | |
249 | struct page *page; | |
250 | ||
251 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | |
252 | if (unlikely(!page)) | |
253 | return NULL; | |
254 | ||
255 | lock_page(page); | |
256 | head = alloc_page_buffers(page, size, 0); | |
257 | if (unlikely(!head)) { | |
258 | unlock_page(page); | |
259 | __free_page(page); | |
260 | return NULL; | |
261 | } | |
262 | ||
263 | bh = head; | |
264 | do { | |
265 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | |
266 | tail = bh; | |
267 | bh->b_bdev = bdev; | |
268 | bh = bh->b_this_page; | |
269 | } while (bh); | |
270 | ||
271 | tail->b_this_page = head; | |
272 | attach_page_buffers(page, head); | |
273 | ||
274 | return page; | |
275 | } | |
276 | ||
277 | void nilfs_free_private_page(struct page *page) | |
278 | { | |
279 | BUG_ON(!PageLocked(page)); | |
280 | BUG_ON(page->mapping); | |
281 | ||
282 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | |
283 | NILFS_PAGE_BUG(page, "failed to free page"); | |
284 | ||
285 | unlock_page(page); | |
286 | __free_page(page); | |
287 | } | |
288 | ||
289 | /** | |
290 | * nilfs_copy_page -- copy the page with buffers | |
291 | * @dst: destination page | |
292 | * @src: source page | |
293 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | |
294 | * | |
295 | * This fuction is for both data pages and btnode pages. The dirty flag | |
296 | * should be treated by caller. The page must not be under i/o. | |
297 | * Both src and dst page must be locked | |
298 | */ | |
299 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | |
300 | { | |
301 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | |
302 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | |
303 | ||
304 | BUG_ON(PageWriteback(dst)); | |
305 | ||
306 | sbh = sbufs = page_buffers(src); | |
307 | if (!page_has_buffers(dst)) | |
308 | create_empty_buffers(dst, sbh->b_size, 0); | |
309 | ||
310 | if (copy_dirty) | |
311 | mask |= (1UL << BH_Dirty); | |
312 | ||
313 | dbh = dbufs = page_buffers(dst); | |
314 | do { | |
315 | lock_buffer(sbh); | |
316 | lock_buffer(dbh); | |
317 | dbh->b_state = sbh->b_state & mask; | |
318 | dbh->b_blocknr = sbh->b_blocknr; | |
319 | dbh->b_bdev = sbh->b_bdev; | |
320 | sbh = sbh->b_this_page; | |
321 | dbh = dbh->b_this_page; | |
322 | } while (dbh != dbufs); | |
323 | ||
324 | copy_highpage(dst, src); | |
325 | ||
326 | if (PageUptodate(src) && !PageUptodate(dst)) | |
327 | SetPageUptodate(dst); | |
328 | else if (!PageUptodate(src) && PageUptodate(dst)) | |
329 | ClearPageUptodate(dst); | |
330 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | |
331 | SetPageMappedToDisk(dst); | |
332 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | |
333 | ClearPageMappedToDisk(dst); | |
334 | ||
335 | do { | |
336 | unlock_buffer(sbh); | |
337 | unlock_buffer(dbh); | |
338 | sbh = sbh->b_this_page; | |
339 | dbh = dbh->b_this_page; | |
340 | } while (dbh != dbufs); | |
341 | } | |
342 | ||
343 | int nilfs_copy_dirty_pages(struct address_space *dmap, | |
344 | struct address_space *smap) | |
345 | { | |
346 | struct pagevec pvec; | |
347 | unsigned int i; | |
348 | pgoff_t index = 0; | |
349 | int err = 0; | |
350 | ||
351 | pagevec_init(&pvec, 0); | |
352 | repeat: | |
353 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | |
354 | PAGEVEC_SIZE)) | |
355 | return 0; | |
356 | ||
357 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
358 | struct page *page = pvec.pages[i], *dpage; | |
359 | ||
360 | lock_page(page); | |
361 | if (unlikely(!PageDirty(page))) | |
362 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | |
363 | ||
364 | dpage = grab_cache_page(dmap, page->index); | |
365 | if (unlikely(!dpage)) { | |
366 | /* No empty page is added to the page cache */ | |
367 | err = -ENOMEM; | |
368 | unlock_page(page); | |
369 | break; | |
370 | } | |
371 | if (unlikely(!page_has_buffers(page))) | |
372 | NILFS_PAGE_BUG(page, | |
373 | "found empty page in dat page cache"); | |
374 | ||
375 | nilfs_copy_page(dpage, page, 1); | |
376 | __set_page_dirty_nobuffers(dpage); | |
377 | ||
378 | unlock_page(dpage); | |
379 | page_cache_release(dpage); | |
380 | unlock_page(page); | |
381 | } | |
382 | pagevec_release(&pvec); | |
383 | cond_resched(); | |
384 | ||
385 | if (likely(!err)) | |
386 | goto repeat; | |
387 | return err; | |
388 | } | |
389 | ||
390 | /** | |
391 | * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache | |
392 | * @dmap: destination page cache | |
393 | * @smap: source page cache | |
394 | * | |
395 | * No pages must no be added to the cache during this process. | |
396 | * This must be ensured by the caller. | |
397 | */ | |
398 | void nilfs_copy_back_pages(struct address_space *dmap, | |
399 | struct address_space *smap) | |
400 | { | |
401 | struct pagevec pvec; | |
402 | unsigned int i, n; | |
403 | pgoff_t index = 0; | |
404 | int err; | |
405 | ||
406 | pagevec_init(&pvec, 0); | |
407 | repeat: | |
408 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | |
409 | if (!n) | |
410 | return; | |
411 | index = pvec.pages[n - 1]->index + 1; | |
412 | ||
413 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
414 | struct page *page = pvec.pages[i], *dpage; | |
415 | pgoff_t offset = page->index; | |
416 | ||
417 | lock_page(page); | |
418 | dpage = find_lock_page(dmap, offset); | |
419 | if (dpage) { | |
420 | /* override existing page on the destination cache */ | |
1f5abe7e | 421 | WARN_ON(PageDirty(dpage)); |
0bd49f94 RK |
422 | nilfs_copy_page(dpage, page, 0); |
423 | unlock_page(dpage); | |
424 | page_cache_release(dpage); | |
425 | } else { | |
426 | struct page *page2; | |
427 | ||
428 | /* move the page to the destination cache */ | |
429 | spin_lock_irq(&smap->tree_lock); | |
430 | page2 = radix_tree_delete(&smap->page_tree, offset); | |
1f5abe7e RK |
431 | WARN_ON(page2 != page); |
432 | ||
0bd49f94 RK |
433 | smap->nrpages--; |
434 | spin_unlock_irq(&smap->tree_lock); | |
435 | ||
436 | spin_lock_irq(&dmap->tree_lock); | |
437 | err = radix_tree_insert(&dmap->page_tree, offset, page); | |
438 | if (unlikely(err < 0)) { | |
1f5abe7e | 439 | WARN_ON(err == -EEXIST); |
0bd49f94 RK |
440 | page->mapping = NULL; |
441 | page_cache_release(page); /* for cache */ | |
442 | } else { | |
443 | page->mapping = dmap; | |
444 | dmap->nrpages++; | |
445 | if (PageDirty(page)) | |
446 | radix_tree_tag_set(&dmap->page_tree, | |
447 | offset, | |
448 | PAGECACHE_TAG_DIRTY); | |
449 | } | |
450 | spin_unlock_irq(&dmap->tree_lock); | |
451 | } | |
452 | unlock_page(page); | |
453 | } | |
454 | pagevec_release(&pvec); | |
455 | cond_resched(); | |
456 | ||
457 | goto repeat; | |
458 | } | |
459 | ||
460 | void nilfs_clear_dirty_pages(struct address_space *mapping) | |
461 | { | |
462 | struct pagevec pvec; | |
463 | unsigned int i; | |
464 | pgoff_t index = 0; | |
465 | ||
466 | pagevec_init(&pvec, 0); | |
467 | ||
468 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | |
469 | PAGEVEC_SIZE)) { | |
470 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
471 | struct page *page = pvec.pages[i]; | |
472 | struct buffer_head *bh, *head; | |
473 | ||
474 | lock_page(page); | |
475 | ClearPageUptodate(page); | |
476 | ClearPageMappedToDisk(page); | |
477 | bh = head = page_buffers(page); | |
478 | do { | |
479 | lock_buffer(bh); | |
480 | clear_buffer_dirty(bh); | |
481 | clear_buffer_nilfs_volatile(bh); | |
482 | clear_buffer_uptodate(bh); | |
483 | clear_buffer_mapped(bh); | |
484 | unlock_buffer(bh); | |
485 | bh = bh->b_this_page; | |
486 | } while (bh != head); | |
487 | ||
488 | __nilfs_clear_page_dirty(page); | |
489 | unlock_page(page); | |
490 | } | |
491 | pagevec_release(&pvec); | |
492 | cond_resched(); | |
493 | } | |
494 | } | |
495 | ||
496 | unsigned nilfs_page_count_clean_buffers(struct page *page, | |
497 | unsigned from, unsigned to) | |
498 | { | |
499 | unsigned block_start, block_end; | |
500 | struct buffer_head *bh, *head; | |
501 | unsigned nc = 0; | |
502 | ||
503 | for (bh = head = page_buffers(page), block_start = 0; | |
504 | bh != head || !block_start; | |
505 | block_start = block_end, bh = bh->b_this_page) { | |
506 | block_end = block_start + bh->b_size; | |
507 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | |
508 | nc++; | |
509 | } | |
510 | return nc; | |
511 | } | |
512 | ||
513 | /* | |
514 | * NILFS2 needs clear_page_dirty() in the following two cases: | |
515 | * | |
516 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | |
517 | * page dirty flags when it copies back pages from the shadow cache | |
518 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | |
519 | * (dat->{i_mapping,i_btnode_cache}). | |
520 | * | |
521 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | |
522 | * in dirty state, and this needs to cancel the dirty state of their pages. | |
523 | */ | |
524 | int __nilfs_clear_page_dirty(struct page *page) | |
525 | { | |
526 | struct address_space *mapping = page->mapping; | |
527 | ||
528 | if (mapping) { | |
529 | spin_lock_irq(&mapping->tree_lock); | |
530 | if (test_bit(PG_dirty, &page->flags)) { | |
531 | radix_tree_tag_clear(&mapping->page_tree, | |
532 | page_index(page), | |
533 | PAGECACHE_TAG_DIRTY); | |
534 | spin_unlock_irq(&mapping->tree_lock); | |
535 | return clear_page_dirty_for_io(page); | |
536 | } | |
537 | spin_unlock_irq(&mapping->tree_lock); | |
538 | return 0; | |
539 | } | |
540 | return TestClearPageDirty(page); | |
541 | } |