Commit | Line | Data |
---|---|---|
ccd979bd MF |
1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | |
3 | * | |
4 | * extent_map.c | |
5 | * | |
363041a5 | 6 | * Block/Cluster mapping functions |
ccd979bd MF |
7 | * |
8 | * Copyright (C) 2004 Oracle. All rights reserved. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU General Public | |
12 | * License, version 2, as published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public | |
20 | * License along with this program; if not, write to the | |
21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 | * Boston, MA 021110-1307, USA. | |
23 | */ | |
24 | ||
25 | #include <linux/fs.h> | |
26 | #include <linux/init.h> | |
27 | #include <linux/types.h> | |
ccd979bd MF |
28 | |
29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | |
30 | #include <cluster/masklog.h> | |
31 | ||
32 | #include "ocfs2.h" | |
33 | ||
363041a5 | 34 | #include "alloc.h" |
ccd979bd MF |
35 | #include "extent_map.h" |
36 | #include "inode.h" | |
37 | #include "super.h" | |
38 | ||
39 | #include "buffer_head_io.h" | |
40 | ||
83418978 MF |
41 | /* |
42 | * The extent caching implementation is intentionally trivial. | |
43 | * | |
44 | * We only cache a small number of extents stored directly on the | |
45 | * inode, so linear order operations are acceptable. If we ever want | |
46 | * to increase the size of the extent map, then these algorithms must | |
47 | * get smarter. | |
48 | */ | |
49 | ||
50 | void ocfs2_extent_map_init(struct inode *inode) | |
51 | { | |
52 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | |
53 | ||
54 | oi->ip_extent_map.em_num_items = 0; | |
55 | INIT_LIST_HEAD(&oi->ip_extent_map.em_list); | |
56 | } | |
57 | ||
58 | static void __ocfs2_extent_map_lookup(struct ocfs2_extent_map *em, | |
59 | unsigned int cpos, | |
60 | struct ocfs2_extent_map_item **ret_emi) | |
61 | { | |
62 | unsigned int range; | |
63 | struct ocfs2_extent_map_item *emi; | |
64 | ||
65 | *ret_emi = NULL; | |
66 | ||
67 | list_for_each_entry(emi, &em->em_list, ei_list) { | |
68 | range = emi->ei_cpos + emi->ei_clusters; | |
69 | ||
70 | if (cpos >= emi->ei_cpos && cpos < range) { | |
71 | list_move(&emi->ei_list, &em->em_list); | |
72 | ||
73 | *ret_emi = emi; | |
74 | break; | |
75 | } | |
76 | } | |
77 | } | |
78 | ||
79 | static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos, | |
80 | unsigned int *phys, unsigned int *len, | |
81 | unsigned int *flags) | |
82 | { | |
83 | unsigned int coff; | |
84 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | |
85 | struct ocfs2_extent_map_item *emi; | |
86 | ||
87 | spin_lock(&oi->ip_lock); | |
88 | ||
89 | __ocfs2_extent_map_lookup(&oi->ip_extent_map, cpos, &emi); | |
90 | if (emi) { | |
91 | coff = cpos - emi->ei_cpos; | |
92 | *phys = emi->ei_phys + coff; | |
93 | if (len) | |
94 | *len = emi->ei_clusters - coff; | |
95 | if (flags) | |
96 | *flags = emi->ei_flags; | |
97 | } | |
98 | ||
99 | spin_unlock(&oi->ip_lock); | |
100 | ||
101 | if (emi == NULL) | |
102 | return -ENOENT; | |
103 | ||
104 | return 0; | |
105 | } | |
106 | ||
107 | /* | |
108 | * Forget about all clusters equal to or greater than cpos. | |
109 | */ | |
110 | void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) | |
111 | { | |
800deef3 | 112 | struct ocfs2_extent_map_item *emi, *n; |
83418978 MF |
113 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
114 | struct ocfs2_extent_map *em = &oi->ip_extent_map; | |
115 | LIST_HEAD(tmp_list); | |
116 | unsigned int range; | |
117 | ||
118 | spin_lock(&oi->ip_lock); | |
800deef3 | 119 | list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { |
83418978 MF |
120 | if (emi->ei_cpos >= cpos) { |
121 | /* Full truncate of this record. */ | |
122 | list_move(&emi->ei_list, &tmp_list); | |
123 | BUG_ON(em->em_num_items == 0); | |
124 | em->em_num_items--; | |
125 | continue; | |
126 | } | |
127 | ||
128 | range = emi->ei_cpos + emi->ei_clusters; | |
129 | if (range > cpos) { | |
130 | /* Partial truncate */ | |
131 | emi->ei_clusters = cpos - emi->ei_cpos; | |
132 | } | |
133 | } | |
134 | spin_unlock(&oi->ip_lock); | |
135 | ||
800deef3 | 136 | list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { |
83418978 MF |
137 | list_del(&emi->ei_list); |
138 | kfree(emi); | |
139 | } | |
140 | } | |
141 | ||
142 | /* | |
143 | * Is any part of emi2 contained within emi1 | |
144 | */ | |
145 | static int ocfs2_ei_is_contained(struct ocfs2_extent_map_item *emi1, | |
146 | struct ocfs2_extent_map_item *emi2) | |
147 | { | |
148 | unsigned int range1, range2; | |
149 | ||
150 | /* | |
151 | * Check if logical start of emi2 is inside emi1 | |
152 | */ | |
153 | range1 = emi1->ei_cpos + emi1->ei_clusters; | |
154 | if (emi2->ei_cpos >= emi1->ei_cpos && emi2->ei_cpos < range1) | |
155 | return 1; | |
156 | ||
157 | /* | |
158 | * Check if logical end of emi2 is inside emi1 | |
159 | */ | |
160 | range2 = emi2->ei_cpos + emi2->ei_clusters; | |
161 | if (range2 > emi1->ei_cpos && range2 <= range1) | |
162 | return 1; | |
163 | ||
164 | return 0; | |
165 | } | |
166 | ||
167 | static void ocfs2_copy_emi_fields(struct ocfs2_extent_map_item *dest, | |
168 | struct ocfs2_extent_map_item *src) | |
169 | { | |
170 | dest->ei_cpos = src->ei_cpos; | |
171 | dest->ei_phys = src->ei_phys; | |
172 | dest->ei_clusters = src->ei_clusters; | |
173 | dest->ei_flags = src->ei_flags; | |
174 | } | |
175 | ||
176 | /* | |
177 | * Try to merge emi with ins. Returns 1 if merge succeeds, zero | |
178 | * otherwise. | |
179 | */ | |
180 | static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi, | |
181 | struct ocfs2_extent_map_item *ins) | |
182 | { | |
183 | /* | |
184 | * Handle contiguousness | |
185 | */ | |
186 | if (ins->ei_phys == (emi->ei_phys + emi->ei_clusters) && | |
187 | ins->ei_cpos == (emi->ei_cpos + emi->ei_clusters) && | |
188 | ins->ei_flags == emi->ei_flags) { | |
189 | emi->ei_clusters += ins->ei_clusters; | |
190 | return 1; | |
191 | } else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys && | |
192 | (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys && | |
193 | ins->ei_flags == emi->ei_flags) { | |
194 | emi->ei_phys = ins->ei_phys; | |
195 | emi->ei_cpos = ins->ei_cpos; | |
196 | emi->ei_clusters += ins->ei_clusters; | |
197 | return 1; | |
198 | } | |
199 | ||
200 | /* | |
201 | * Overlapping extents - this shouldn't happen unless we've | |
202 | * split an extent to change it's flags. That is exceedingly | |
203 | * rare, so there's no sense in trying to optimize it yet. | |
204 | */ | |
205 | if (ocfs2_ei_is_contained(emi, ins) || | |
206 | ocfs2_ei_is_contained(ins, emi)) { | |
207 | ocfs2_copy_emi_fields(emi, ins); | |
208 | return 1; | |
209 | } | |
210 | ||
211 | /* No merge was possible. */ | |
212 | return 0; | |
213 | } | |
214 | ||
215 | /* | |
216 | * In order to reduce complexity on the caller, this insert function | |
217 | * is intentionally liberal in what it will accept. | |
218 | * | |
219 | * The only rule is that the truncate call *must* be used whenever | |
220 | * records have been deleted. This avoids inserting overlapping | |
221 | * records with different physical mappings. | |
222 | */ | |
223 | void ocfs2_extent_map_insert_rec(struct inode *inode, | |
224 | struct ocfs2_extent_rec *rec) | |
225 | { | |
226 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | |
227 | struct ocfs2_extent_map *em = &oi->ip_extent_map; | |
228 | struct ocfs2_extent_map_item *emi, *new_emi = NULL; | |
229 | struct ocfs2_extent_map_item ins; | |
230 | ||
231 | ins.ei_cpos = le32_to_cpu(rec->e_cpos); | |
232 | ins.ei_phys = ocfs2_blocks_to_clusters(inode->i_sb, | |
233 | le64_to_cpu(rec->e_blkno)); | |
234 | ins.ei_clusters = le16_to_cpu(rec->e_leaf_clusters); | |
235 | ins.ei_flags = rec->e_flags; | |
236 | ||
237 | search: | |
238 | spin_lock(&oi->ip_lock); | |
239 | ||
240 | list_for_each_entry(emi, &em->em_list, ei_list) { | |
241 | if (ocfs2_try_to_merge_extent_map(emi, &ins)) { | |
242 | list_move(&emi->ei_list, &em->em_list); | |
243 | spin_unlock(&oi->ip_lock); | |
244 | goto out; | |
245 | } | |
246 | } | |
247 | ||
248 | /* | |
249 | * No item could be merged. | |
250 | * | |
251 | * Either allocate and add a new item, or overwrite the last recently | |
252 | * inserted. | |
253 | */ | |
254 | ||
255 | if (em->em_num_items < OCFS2_MAX_EXTENT_MAP_ITEMS) { | |
256 | if (new_emi == NULL) { | |
257 | spin_unlock(&oi->ip_lock); | |
258 | ||
259 | new_emi = kmalloc(sizeof(*new_emi), GFP_NOFS); | |
260 | if (new_emi == NULL) | |
261 | goto out; | |
262 | ||
263 | goto search; | |
264 | } | |
265 | ||
266 | ocfs2_copy_emi_fields(new_emi, &ins); | |
267 | list_add(&new_emi->ei_list, &em->em_list); | |
268 | em->em_num_items++; | |
269 | new_emi = NULL; | |
270 | } else { | |
271 | BUG_ON(list_empty(&em->em_list) || em->em_num_items == 0); | |
272 | emi = list_entry(em->em_list.prev, | |
273 | struct ocfs2_extent_map_item, ei_list); | |
274 | list_move(&emi->ei_list, &em->em_list); | |
275 | ocfs2_copy_emi_fields(emi, &ins); | |
276 | } | |
277 | ||
278 | spin_unlock(&oi->ip_lock); | |
279 | ||
280 | out: | |
281 | if (new_emi) | |
282 | kfree(new_emi); | |
283 | } | |
284 | ||
4f902c37 MF |
285 | /* |
286 | * Return the 1st index within el which contains an extent start | |
287 | * larger than v_cluster. | |
288 | */ | |
289 | static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el, | |
290 | u32 v_cluster) | |
291 | { | |
292 | int i; | |
293 | struct ocfs2_extent_rec *rec; | |
294 | ||
295 | for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | |
296 | rec = &el->l_recs[i]; | |
297 | ||
298 | if (v_cluster < le32_to_cpu(rec->e_cpos)) | |
299 | break; | |
300 | } | |
301 | ||
302 | return i; | |
303 | } | |
304 | ||
305 | /* | |
306 | * Figure out the size of a hole which starts at v_cluster within the given | |
307 | * extent list. | |
308 | * | |
309 | * If there is no more allocation past v_cluster, we return the maximum | |
310 | * cluster size minus v_cluster. | |
311 | * | |
312 | * If we have in-inode extents, then el points to the dinode list and | |
313 | * eb_bh is NULL. Otherwise, eb_bh should point to the extent block | |
314 | * containing el. | |
315 | */ | |
316 | static int ocfs2_figure_hole_clusters(struct inode *inode, | |
317 | struct ocfs2_extent_list *el, | |
318 | struct buffer_head *eb_bh, | |
319 | u32 v_cluster, | |
320 | u32 *num_clusters) | |
321 | { | |
322 | int ret, i; | |
323 | struct buffer_head *next_eb_bh = NULL; | |
324 | struct ocfs2_extent_block *eb, *next_eb; | |
325 | ||
326 | i = ocfs2_search_for_hole_index(el, v_cluster); | |
327 | ||
328 | if (i == le16_to_cpu(el->l_next_free_rec) && eb_bh) { | |
329 | eb = (struct ocfs2_extent_block *)eb_bh->b_data; | |
330 | ||
331 | /* | |
332 | * Check the next leaf for any extents. | |
333 | */ | |
334 | ||
335 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | |
336 | goto no_more_extents; | |
337 | ||
338 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | |
339 | le64_to_cpu(eb->h_next_leaf_blk), | |
340 | &next_eb_bh, OCFS2_BH_CACHED, inode); | |
341 | if (ret) { | |
342 | mlog_errno(ret); | |
343 | goto out; | |
344 | } | |
345 | next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; | |
346 | ||
347 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) { | |
348 | ret = -EROFS; | |
349 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb); | |
350 | goto out; | |
351 | } | |
352 | ||
353 | el = &next_eb->h_list; | |
354 | ||
355 | i = ocfs2_search_for_hole_index(el, v_cluster); | |
356 | } | |
357 | ||
358 | no_more_extents: | |
359 | if (i == le16_to_cpu(el->l_next_free_rec)) { | |
360 | /* | |
361 | * We're at the end of our existing allocation. Just | |
362 | * return the maximum number of clusters we could | |
363 | * possibly allocate. | |
364 | */ | |
365 | *num_clusters = UINT_MAX - v_cluster; | |
366 | } else { | |
367 | *num_clusters = le32_to_cpu(el->l_recs[i].e_cpos) - v_cluster; | |
368 | } | |
369 | ||
370 | ret = 0; | |
371 | out: | |
372 | brelse(next_eb_bh); | |
373 | return ret; | |
374 | } | |
375 | ||
9517bac6 | 376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, |
49cb8d2d MF |
377 | u32 *p_cluster, u32 *num_clusters, |
378 | unsigned int *extent_flags) | |
ccd979bd | 379 | { |
363041a5 | 380 | int ret, i; |
49cb8d2d | 381 | unsigned int flags = 0; |
363041a5 MF |
382 | struct buffer_head *di_bh = NULL; |
383 | struct buffer_head *eb_bh = NULL; | |
ccd979bd | 384 | struct ocfs2_dinode *di; |
363041a5 | 385 | struct ocfs2_extent_block *eb; |
ccd979bd | 386 | struct ocfs2_extent_list *el; |
363041a5 MF |
387 | struct ocfs2_extent_rec *rec; |
388 | u32 coff; | |
ccd979bd | 389 | |
6798d35a MF |
390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
391 | ret = -ERANGE; | |
392 | mlog_errno(ret); | |
393 | goto out; | |
394 | } | |
395 | ||
83418978 MF |
396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, |
397 | num_clusters, extent_flags); | |
398 | if (ret == 0) | |
399 | goto out; | |
400 | ||
363041a5 MF |
401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, |
402 | &di_bh, OCFS2_BH_CACHED, inode); | |
ccd979bd MF |
403 | if (ret) { |
404 | mlog_errno(ret); | |
363041a5 | 405 | goto out; |
ccd979bd MF |
406 | } |
407 | ||
363041a5 MF |
408 | di = (struct ocfs2_dinode *) di_bh->b_data; |
409 | el = &di->id2.i_list; | |
ccd979bd | 410 | |
363041a5 MF |
411 | if (el->l_tree_depth) { |
412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | |
413 | if (ret) { | |
414 | mlog_errno(ret); | |
415 | goto out; | |
416 | } | |
ccd979bd | 417 | |
363041a5 MF |
418 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
419 | el = &eb->h_list; | |
e48edee2 MF |
420 | |
421 | if (el->l_tree_depth) { | |
422 | ocfs2_error(inode->i_sb, | |
423 | "Inode %lu has non zero tree depth in " | |
424 | "leaf block %llu\n", inode->i_ino, | |
425 | (unsigned long long)eb_bh->b_blocknr); | |
426 | ret = -EROFS; | |
427 | goto out; | |
428 | } | |
a43db30c | 429 | } |
ccd979bd | 430 | |
363041a5 MF |
431 | i = ocfs2_search_extent_list(el, v_cluster); |
432 | if (i == -1) { | |
a43db30c | 433 | /* |
363041a5 | 434 | * A hole was found. Return some canned values that |
4f902c37 MF |
435 | * callers can key on. If asked for, num_clusters will |
436 | * be populated with the size of the hole. | |
a43db30c | 437 | */ |
363041a5 | 438 | *p_cluster = 0; |
4f902c37 MF |
439 | if (num_clusters) { |
440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | |
441 | v_cluster, | |
442 | num_clusters); | |
443 | if (ret) { | |
444 | mlog_errno(ret); | |
445 | goto out; | |
446 | } | |
447 | } | |
363041a5 MF |
448 | } else { |
449 | rec = &el->l_recs[i]; | |
ccd979bd | 450 | |
363041a5 | 451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
ccd979bd | 452 | |
363041a5 MF |
453 | if (!rec->e_blkno) { |
454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | |
455 | "record (%u, %u, 0)", inode->i_ino, | |
456 | le32_to_cpu(rec->e_cpos), | |
e48edee2 | 457 | ocfs2_rec_clusters(el, rec)); |
363041a5 MF |
458 | ret = -EROFS; |
459 | goto out; | |
ccd979bd MF |
460 | } |
461 | ||
363041a5 | 462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
ccd979bd | 463 | |
363041a5 MF |
464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, |
465 | le64_to_cpu(rec->e_blkno)); | |
466 | *p_cluster = *p_cluster + coff; | |
ccd979bd | 467 | |
363041a5 | 468 | if (num_clusters) |
e48edee2 | 469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
49cb8d2d MF |
470 | |
471 | flags = rec->e_flags; | |
83418978 MF |
472 | |
473 | ocfs2_extent_map_insert_rec(inode, rec); | |
ccd979bd MF |
474 | } |
475 | ||
49cb8d2d MF |
476 | if (extent_flags) |
477 | *extent_flags = flags; | |
478 | ||
363041a5 MF |
479 | out: |
480 | brelse(di_bh); | |
481 | brelse(eb_bh); | |
ccd979bd MF |
482 | return ret; |
483 | } | |
484 | ||
ccd979bd | 485 | /* |
363041a5 MF |
486 | * This expects alloc_sem to be held. The allocation cannot change at |
487 | * all while the map is in the process of being updated. | |
ccd979bd | 488 | */ |
363041a5 | 489 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
4f902c37 | 490 | u64 *ret_count, unsigned int *extent_flags) |
ccd979bd MF |
491 | { |
492 | int ret; | |
ccd979bd | 493 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); |
363041a5 MF |
494 | u32 cpos, num_clusters, p_cluster; |
495 | u64 boff = 0; | |
ccd979bd MF |
496 | |
497 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); | |
ccd979bd | 498 | |
49cb8d2d MF |
499 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, |
500 | extent_flags); | |
ccd979bd MF |
501 | if (ret) { |
502 | mlog_errno(ret); | |
363041a5 | 503 | goto out; |
ccd979bd MF |
504 | } |
505 | ||
363041a5 MF |
506 | /* |
507 | * p_cluster == 0 indicates a hole. | |
508 | */ | |
509 | if (p_cluster) { | |
510 | boff = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | |
ccd979bd | 511 | boff += (v_blkno & (u64)(bpc - 1)); |
ccd979bd MF |
512 | } |
513 | ||
363041a5 | 514 | *p_blkno = boff; |
ccd979bd | 515 | |
363041a5 MF |
516 | if (ret_count) { |
517 | *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, num_clusters); | |
518 | *ret_count -= v_blkno & (u64)(bpc - 1); | |
ccd979bd | 519 | } |
ccd979bd | 520 | |
363041a5 MF |
521 | out: |
522 | return ret; | |
ccd979bd | 523 | } |