Commit | Line | Data |
---|---|---|
b14f8ab2 BH |
1 | /* |
2 | * Copyright (C) 2005, 2006 | |
27d2e149 | 3 | * Avishay Traeger (avishay@gmail.com) |
b14f8ab2 BH |
4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | |
6 | * | |
7 | * This file is part of exofs. | |
8 | * | |
9 | * exofs is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License as published by | |
11 | * the Free Software Foundation. Since it is based on ext2, and the only | |
12 | * valid version of GPL for the Linux kernel is version 2, the only valid | |
13 | * version of GPL for exofs is version 2. | |
14 | * | |
15 | * exofs is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU General Public License | |
21 | * along with exofs; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | */ | |
24 | ||
5a0e3ad6 | 25 | #include <linux/slab.h> |
5d952b83 | 26 | #include <asm/div64.h> |
b14f8ab2 | 27 | |
8ff660ab | 28 | #include <scsi/osd_ore.h> |
b14f8ab2 | 29 | |
8ff660ab | 30 | #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) |
34ce4e7c | 31 | |
8ff660ab BH |
32 | #ifdef CONFIG_EXOFS_DEBUG |
33 | #define ORE_DBGMSG(fmt, a...) \ | |
34 | printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) | |
35 | #else | |
36 | #define ORE_DBGMSG(fmt, a...) \ | |
37 | do { if (0) printk(fmt, ##a); } while (0) | |
38 | #endif | |
39 | ||
40 | /* u64 has problems with printk this will cast it to unsigned long long */ | |
41 | #define _LLU(x) (unsigned long long)(x) | |
42 | ||
43 | #define ORE_DBGMSG2(M...) do {} while (0) | |
44 | /* #define ORE_DBGMSG2 ORE_DBGMSG */ | |
45 | ||
cf283ade BH |
46 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); |
47 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | |
48 | MODULE_LICENSE("GPL"); | |
49 | ||
b916c5cd BH |
50 | static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
51 | struct ore_striping_info *si); | |
52 | ||
8ff660ab | 53 | static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) |
9e9db456 | 54 | { |
5bf696da | 55 | return ios->oc->comps[index & ios->oc->single_comp].cred; |
9e9db456 BH |
56 | } |
57 | ||
8ff660ab | 58 | static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) |
9e9db456 | 59 | { |
5bf696da | 60 | return &ios->oc->comps[index & ios->oc->single_comp].obj; |
9e9db456 BH |
61 | } |
62 | ||
8ff660ab | 63 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) |
9e9db456 | 64 | { |
d866d875 | 65 | return ore_comp_dev(ios->oc, index); |
9e9db456 BH |
66 | } |
67 | ||
b916c5cd BH |
68 | static int _get_io_state(struct ore_layout *layout, |
69 | struct ore_components *oc, unsigned numdevs, | |
70 | struct ore_io_state **pios) | |
b14f8ab2 | 71 | { |
8ff660ab | 72 | struct ore_io_state *ios; |
06886a5a BH |
73 | |
74 | /*TODO: Maybe use kmem_cach per sbi of size | |
45d3abcb | 75 | * exofs_io_state_size(layout->s_numdevs) |
06886a5a | 76 | */ |
b916c5cd | 77 | ios = kzalloc(ore_io_state_size(numdevs), GFP_KERNEL); |
06886a5a | 78 | if (unlikely(!ios)) { |
8ff660ab | 79 | ORE_DBGMSG("Failed kzalloc bytes=%d\n", |
b916c5cd | 80 | ore_io_state_size(numdevs)); |
06886a5a BH |
81 | *pios = NULL; |
82 | return -ENOMEM; | |
83 | } | |
84 | ||
45d3abcb | 85 | ios->layout = layout; |
5bf696da | 86 | ios->oc = oc; |
b916c5cd BH |
87 | *pios = ios; |
88 | return 0; | |
89 | } | |
90 | ||
91 | /* Allocate an io_state for only a single group of devices | |
92 | * | |
93 | * If a user needs to call ore_read/write() this version must be used becase it | |
94 | * allocates extra stuff for striping and raid. | |
95 | * The ore might decide to only IO less then @length bytes do to alignmets | |
96 | * and constrains as follows: | |
97 | * - The IO cannot cross group boundary. | |
98 | * - In raid5/6 The end of the IO must align at end of a stripe eg. | |
99 | * (@offset + @length) % strip_size == 0. Or the complete range is within a | |
100 | * single stripe. | |
101 | * - Memory condition only permitted a shorter IO. (A user can use @length=~0 | |
102 | * And check the returned ios->length for max_io_size.) | |
103 | * | |
104 | * The caller must check returned ios->length (and/or ios->nr_pages) and | |
105 | * re-issue these pages that fall outside of ios->length | |
106 | */ | |
107 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | |
108 | bool is_reading, u64 offset, u64 length, | |
109 | struct ore_io_state **pios) | |
110 | { | |
111 | struct ore_io_state *ios; | |
112 | unsigned numdevs = layout->group_width * layout->mirrors_p1; | |
113 | int ret; | |
114 | ||
115 | ret = _get_io_state(layout, oc, numdevs, pios); | |
116 | if (unlikely(ret)) | |
117 | return ret; | |
118 | ||
119 | ios = *pios; | |
e1042ba0 | 120 | ios->reading = is_reading; |
b916c5cd BH |
121 | ios->offset = offset; |
122 | ||
123 | if (length) { | |
98260754 BH |
124 | ore_calc_stripe_info(layout, offset, &ios->si); |
125 | ios->length = (length <= ios->si.group_length) ? length : | |
126 | ios->si.group_length; | |
b916c5cd BH |
127 | ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; |
128 | } | |
e1042ba0 | 129 | |
06886a5a | 130 | return 0; |
b14f8ab2 | 131 | } |
cf283ade | 132 | EXPORT_SYMBOL(ore_get_rw_state); |
b14f8ab2 | 133 | |
b916c5cd BH |
134 | /* Allocate an io_state for all the devices in the comps array |
135 | * | |
136 | * This version of io_state allocation is used mostly by create/remove | |
137 | * and trunc where we currently need all the devices. The only wastful | |
138 | * bit is the read/write_attributes with no IO. Those sites should | |
139 | * be converted to use ore_get_rw_state() with length=0 | |
140 | */ | |
5bf696da | 141 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc, |
b916c5cd | 142 | struct ore_io_state **pios) |
e1042ba0 | 143 | { |
b916c5cd | 144 | return _get_io_state(layout, oc, oc->numdevs, pios); |
e1042ba0 | 145 | } |
cf283ade | 146 | EXPORT_SYMBOL(ore_get_io_state); |
e1042ba0 | 147 | |
8ff660ab | 148 | void ore_put_io_state(struct ore_io_state *ios) |
b14f8ab2 | 149 | { |
06886a5a BH |
150 | if (ios) { |
151 | unsigned i; | |
b14f8ab2 | 152 | |
06886a5a | 153 | for (i = 0; i < ios->numdevs; i++) { |
8ff660ab | 154 | struct ore_per_dev_state *per_dev = &ios->per_dev[i]; |
06886a5a BH |
155 | |
156 | if (per_dev->or) | |
157 | osd_end_request(per_dev->or); | |
158 | if (per_dev->bio) | |
159 | bio_put(per_dev->bio); | |
160 | } | |
161 | ||
162 | kfree(ios); | |
b14f8ab2 | 163 | } |
06886a5a | 164 | } |
cf283ade | 165 | EXPORT_SYMBOL(ore_put_io_state); |
b14f8ab2 | 166 | |
8ff660ab | 167 | static void _sync_done(struct ore_io_state *ios, void *p) |
06886a5a BH |
168 | { |
169 | struct completion *waiting = p; | |
b14f8ab2 | 170 | |
06886a5a BH |
171 | complete(waiting); |
172 | } | |
173 | ||
174 | static void _last_io(struct kref *kref) | |
175 | { | |
8ff660ab BH |
176 | struct ore_io_state *ios = container_of( |
177 | kref, struct ore_io_state, kref); | |
06886a5a BH |
178 | |
179 | ios->done(ios, ios->private); | |
180 | } | |
181 | ||
182 | static void _done_io(struct osd_request *or, void *p) | |
183 | { | |
8ff660ab | 184 | struct ore_io_state *ios = p; |
06886a5a BH |
185 | |
186 | kref_put(&ios->kref, _last_io); | |
187 | } | |
188 | ||
8ff660ab | 189 | static int ore_io_execute(struct ore_io_state *ios) |
06886a5a BH |
190 | { |
191 | DECLARE_COMPLETION_ONSTACK(wait); | |
192 | bool sync = (ios->done == NULL); | |
193 | int i, ret; | |
194 | ||
195 | if (sync) { | |
196 | ios->done = _sync_done; | |
197 | ios->private = &wait; | |
198 | } | |
199 | ||
200 | for (i = 0; i < ios->numdevs; i++) { | |
201 | struct osd_request *or = ios->per_dev[i].or; | |
202 | if (unlikely(!or)) | |
203 | continue; | |
204 | ||
9e9db456 | 205 | ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); |
06886a5a | 206 | if (unlikely(ret)) { |
8ff660ab | 207 | ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", |
06886a5a BH |
208 | ret); |
209 | return ret; | |
210 | } | |
211 | } | |
212 | ||
213 | kref_init(&ios->kref); | |
214 | ||
215 | for (i = 0; i < ios->numdevs; i++) { | |
216 | struct osd_request *or = ios->per_dev[i].or; | |
217 | if (unlikely(!or)) | |
218 | continue; | |
219 | ||
220 | kref_get(&ios->kref); | |
221 | osd_execute_request_async(or, _done_io, ios); | |
222 | } | |
223 | ||
224 | kref_put(&ios->kref, _last_io); | |
225 | ret = 0; | |
226 | ||
227 | if (sync) { | |
228 | wait_for_completion(&wait); | |
8ff660ab | 229 | ret = ore_check_io(ios, NULL); |
06886a5a | 230 | } |
b14f8ab2 BH |
231 | return ret; |
232 | } | |
233 | ||
22ddc556 BH |
234 | static void _clear_bio(struct bio *bio) |
235 | { | |
236 | struct bio_vec *bv; | |
237 | unsigned i; | |
238 | ||
239 | __bio_for_each_segment(bv, bio, i, 0) { | |
240 | unsigned this_count = bv->bv_len; | |
241 | ||
242 | if (likely(PAGE_SIZE == this_count)) | |
243 | clear_highpage(bv->bv_page); | |
244 | else | |
245 | zero_user(bv->bv_page, bv->bv_offset, this_count); | |
246 | } | |
247 | } | |
248 | ||
8ff660ab | 249 | int ore_check_io(struct ore_io_state *ios, u64 *resid) |
b14f8ab2 | 250 | { |
06886a5a BH |
251 | enum osd_err_priority acumulated_osd_err = 0; |
252 | int acumulated_lin_err = 0; | |
253 | int i; | |
b14f8ab2 | 254 | |
06886a5a BH |
255 | for (i = 0; i < ios->numdevs; i++) { |
256 | struct osd_sense_info osi; | |
22ddc556 BH |
257 | struct osd_request *or = ios->per_dev[i].or; |
258 | int ret; | |
259 | ||
260 | if (unlikely(!or)) | |
261 | continue; | |
06886a5a | 262 | |
22ddc556 | 263 | ret = osd_req_decode_sense(or, &osi); |
06886a5a BH |
264 | if (likely(!ret)) |
265 | continue; | |
266 | ||
22ddc556 BH |
267 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { |
268 | /* start read offset passed endof file */ | |
269 | _clear_bio(ios->per_dev[i].bio); | |
8ff660ab | 270 | ORE_DBGMSG("start read offset passed end of file " |
22ddc556 | 271 | "offset=0x%llx, length=0x%llx\n", |
5d952b83 BH |
272 | _LLU(ios->per_dev[i].offset), |
273 | _LLU(ios->per_dev[i].length)); | |
22ddc556 BH |
274 | |
275 | continue; /* we recovered */ | |
06886a5a BH |
276 | } |
277 | ||
278 | if (osi.osd_err_pri >= acumulated_osd_err) { | |
279 | acumulated_osd_err = osi.osd_err_pri; | |
280 | acumulated_lin_err = ret; | |
281 | } | |
282 | } | |
283 | ||
284 | /* TODO: raid specific residual calculations */ | |
285 | if (resid) { | |
286 | if (likely(!acumulated_lin_err)) | |
287 | *resid = 0; | |
288 | else | |
289 | *resid = ios->length; | |
290 | } | |
291 | ||
292 | return acumulated_lin_err; | |
293 | } | |
cf283ade | 294 | EXPORT_SYMBOL(ore_check_io); |
06886a5a | 295 | |
b367e78b BH |
296 | /* |
297 | * L - logical offset into the file | |
298 | * | |
50a76fd3 | 299 | * U - The number of bytes in a stripe within a group |
b367e78b BH |
300 | * |
301 | * U = stripe_unit * group_width | |
302 | * | |
50a76fd3 BH |
303 | * T - The number of bytes striped within a group of component objects |
304 | * (before advancing to the next group) | |
b367e78b | 305 | * |
50a76fd3 BH |
306 | * T = stripe_unit * group_width * group_depth |
307 | * | |
308 | * S - The number of bytes striped across all component objects | |
309 | * before the pattern repeats | |
310 | * | |
311 | * S = stripe_unit * group_width * group_depth * group_count | |
312 | * | |
313 | * M - The "major" (i.e., across all components) stripe number | |
314 | * | |
315 | * M = L / S | |
316 | * | |
317 | * G - Counts the groups from the beginning of the major stripe | |
318 | * | |
319 | * G = (L - (M * S)) / T [or (L % S) / T] | |
320 | * | |
321 | * H - The byte offset within the group | |
322 | * | |
323 | * H = (L - (M * S)) % T [or (L % S) % T] | |
324 | * | |
325 | * N - The "minor" (i.e., across the group) stripe number | |
326 | * | |
327 | * N = H / U | |
b367e78b BH |
328 | * |
329 | * C - The component index coresponding to L | |
330 | * | |
50a76fd3 BH |
331 | * C = (H - (N * U)) / stripe_unit + G * group_width |
332 | * [or (L % U) / stripe_unit + G * group_width] | |
b367e78b BH |
333 | * |
334 | * O - The component offset coresponding to L | |
335 | * | |
50a76fd3 | 336 | * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit |
b367e78b | 337 | */ |
eb507bc1 BH |
338 | static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
339 | struct ore_striping_info *si) | |
5d952b83 | 340 | { |
16f75bb3 BH |
341 | u32 stripe_unit = layout->stripe_unit; |
342 | u32 group_width = layout->group_width; | |
343 | u64 group_depth = layout->group_depth; | |
50a76fd3 | 344 | |
b367e78b | 345 | u32 U = stripe_unit * group_width; |
50a76fd3 | 346 | u64 T = U * group_depth; |
16f75bb3 | 347 | u64 S = T * layout->group_count; |
50a76fd3 BH |
348 | u64 M = div64_u64(file_offset, S); |
349 | ||
350 | /* | |
351 | G = (L - (M * S)) / T | |
352 | H = (L - (M * S)) % T | |
353 | */ | |
354 | u64 LmodS = file_offset - M * S; | |
355 | u32 G = div64_u64(LmodS, T); | |
356 | u64 H = LmodS - G * T; | |
357 | ||
358 | u32 N = div_u64(H, U); | |
359 | ||
360 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | |
361 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | |
16f75bb3 | 362 | si->dev *= layout->mirrors_p1; |
b367e78b | 363 | |
50a76fd3 | 364 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); |
5d952b83 | 365 | |
50a76fd3 BH |
366 | si->obj_offset = si->unit_off + (N * stripe_unit) + |
367 | (M * group_depth * stripe_unit); | |
368 | ||
369 | si->group_length = T - H; | |
16f75bb3 | 370 | si->M = M; |
5d952b83 BH |
371 | } |
372 | ||
8ff660ab BH |
373 | static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, |
374 | unsigned pgbase, struct ore_per_dev_state *per_dev, | |
86093aaf | 375 | int cur_len) |
5d952b83 | 376 | { |
86093aaf | 377 | unsigned pg = *cur_pg; |
5d952b83 | 378 | struct request_queue *q = |
9e9db456 | 379 | osd_request_queue(_ios_od(ios, per_dev->dev)); |
5d952b83 BH |
380 | |
381 | per_dev->length += cur_len; | |
382 | ||
383 | if (per_dev->bio == NULL) { | |
384 | unsigned pages_in_stripe = ios->layout->group_width * | |
385 | (ios->layout->stripe_unit / PAGE_SIZE); | |
86093aaf | 386 | unsigned bio_size = (ios->nr_pages + pages_in_stripe) / |
5d952b83 BH |
387 | ios->layout->group_width; |
388 | ||
389 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); | |
390 | if (unlikely(!per_dev->bio)) { | |
8ff660ab | 391 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", |
5d952b83 BH |
392 | bio_size); |
393 | return -ENOMEM; | |
394 | } | |
395 | } | |
396 | ||
397 | while (cur_len > 0) { | |
86093aaf BH |
398 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); |
399 | unsigned added_len; | |
5d952b83 | 400 | |
86093aaf BH |
401 | BUG_ON(ios->nr_pages <= pg); |
402 | cur_len -= pglen; | |
5d952b83 | 403 | |
86093aaf BH |
404 | added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], |
405 | pglen, pgbase); | |
406 | if (unlikely(pglen != added_len)) | |
5d952b83 | 407 | return -ENOMEM; |
86093aaf BH |
408 | pgbase = 0; |
409 | ++pg; | |
5d952b83 BH |
410 | } |
411 | BUG_ON(cur_len); | |
412 | ||
86093aaf | 413 | *cur_pg = pg; |
5d952b83 BH |
414 | return 0; |
415 | } | |
416 | ||
98260754 | 417 | static int _prepare_for_striping(struct ore_io_state *ios) |
5d952b83 | 418 | { |
98260754 | 419 | struct ore_striping_info *si = &ios->si; |
5d952b83 | 420 | unsigned stripe_unit = ios->layout->stripe_unit; |
b367e78b | 421 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
50a76fd3 | 422 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; |
b367e78b | 423 | unsigned dev = si->dev; |
50a76fd3 | 424 | unsigned first_dev = dev - (dev % devs_in_group); |
50a76fd3 | 425 | unsigned cur_pg = ios->pages_consumed; |
98260754 | 426 | u64 length = ios->length; |
86093aaf | 427 | int ret = 0; |
5d952b83 | 428 | |
98260754 | 429 | if (!ios->pages) { |
98260754 BH |
430 | ios->numdevs = ios->layout->mirrors_p1; |
431 | return 0; | |
432 | } | |
433 | ||
434 | BUG_ON(length > si->group_length); | |
435 | ||
5d952b83 | 436 | while (length) { |
b916c5cd BH |
437 | unsigned comp = dev - first_dev; |
438 | struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; | |
b367e78b | 439 | unsigned cur_len, page_off = 0; |
5d952b83 BH |
440 | |
441 | if (!per_dev->length) { | |
b367e78b BH |
442 | per_dev->dev = dev; |
443 | if (dev < si->dev) { | |
444 | per_dev->offset = si->obj_offset + stripe_unit - | |
445 | si->unit_off; | |
446 | cur_len = stripe_unit; | |
447 | } else if (dev == si->dev) { | |
448 | per_dev->offset = si->obj_offset; | |
449 | cur_len = stripe_unit - si->unit_off; | |
450 | page_off = si->unit_off & ~PAGE_MASK; | |
451 | BUG_ON(page_off && (page_off != ios->pgbase)); | |
452 | } else { /* dev > si->dev */ | |
453 | per_dev->offset = si->obj_offset - si->unit_off; | |
454 | cur_len = stripe_unit; | |
455 | } | |
5d952b83 | 456 | } else { |
b367e78b | 457 | cur_len = stripe_unit; |
5d952b83 | 458 | } |
b367e78b BH |
459 | if (cur_len >= length) |
460 | cur_len = length; | |
5d952b83 | 461 | |
86093aaf BH |
462 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, |
463 | cur_len); | |
5d952b83 BH |
464 | if (unlikely(ret)) |
465 | goto out; | |
466 | ||
6e31609b BH |
467 | dev += mirrors_p1; |
468 | dev = (dev % devs_in_group) + first_dev; | |
5d952b83 BH |
469 | |
470 | length -= cur_len; | |
471 | } | |
472 | out: | |
b916c5cd | 473 | ios->numdevs = devs_in_group; |
50a76fd3 | 474 | ios->pages_consumed = cur_pg; |
5d952b83 BH |
475 | return ret; |
476 | } | |
477 | ||
8ff660ab | 478 | int ore_create(struct ore_io_state *ios) |
06886a5a BH |
479 | { |
480 | int i, ret; | |
481 | ||
5bf696da | 482 | for (i = 0; i < ios->oc->numdevs; i++) { |
06886a5a BH |
483 | struct osd_request *or; |
484 | ||
9e9db456 | 485 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
06886a5a | 486 | if (unlikely(!or)) { |
8ff660ab | 487 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
488 | ret = -ENOMEM; |
489 | goto out; | |
490 | } | |
491 | ios->per_dev[i].or = or; | |
492 | ios->numdevs++; | |
493 | ||
9e9db456 | 494 | osd_req_create_object(or, _ios_obj(ios, i)); |
06886a5a | 495 | } |
8ff660ab | 496 | ret = ore_io_execute(ios); |
06886a5a BH |
497 | |
498 | out: | |
499 | return ret; | |
500 | } | |
cf283ade | 501 | EXPORT_SYMBOL(ore_create); |
06886a5a | 502 | |
8ff660ab | 503 | int ore_remove(struct ore_io_state *ios) |
06886a5a BH |
504 | { |
505 | int i, ret; | |
506 | ||
5bf696da | 507 | for (i = 0; i < ios->oc->numdevs; i++) { |
06886a5a BH |
508 | struct osd_request *or; |
509 | ||
9e9db456 | 510 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
06886a5a | 511 | if (unlikely(!or)) { |
8ff660ab | 512 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
513 | ret = -ENOMEM; |
514 | goto out; | |
515 | } | |
516 | ios->per_dev[i].or = or; | |
517 | ios->numdevs++; | |
518 | ||
9e9db456 | 519 | osd_req_remove_object(or, _ios_obj(ios, i)); |
06886a5a | 520 | } |
8ff660ab | 521 | ret = ore_io_execute(ios); |
06886a5a BH |
522 | |
523 | out: | |
524 | return ret; | |
525 | } | |
cf283ade | 526 | EXPORT_SYMBOL(ore_remove); |
06886a5a | 527 | |
8ff660ab | 528 | static int _write_mirror(struct ore_io_state *ios, int cur_comp) |
06886a5a | 529 | { |
8ff660ab | 530 | struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; |
5d952b83 BH |
531 | unsigned dev = ios->per_dev[cur_comp].dev; |
532 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | |
533 | int ret = 0; | |
06886a5a | 534 | |
50a76fd3 BH |
535 | if (ios->pages && !master_dev->length) |
536 | return 0; /* Just an empty slot */ | |
537 | ||
5d952b83 | 538 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
8ff660ab | 539 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
06886a5a BH |
540 | struct osd_request *or; |
541 | ||
9e9db456 | 542 | or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); |
06886a5a | 543 | if (unlikely(!or)) { |
8ff660ab | 544 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
545 | ret = -ENOMEM; |
546 | goto out; | |
547 | } | |
5d952b83 | 548 | per_dev->or = or; |
06886a5a | 549 | |
86093aaf | 550 | if (ios->pages) { |
06886a5a BH |
551 | struct bio *bio; |
552 | ||
5d952b83 | 553 | if (per_dev != master_dev) { |
04dc1e88 | 554 | bio = bio_kmalloc(GFP_KERNEL, |
5d952b83 | 555 | master_dev->bio->bi_max_vecs); |
04dc1e88 | 556 | if (unlikely(!bio)) { |
8ff660ab | 557 | ORE_DBGMSG( |
426d3107 | 558 | "Failed to allocate BIO size=%u\n", |
5d952b83 | 559 | master_dev->bio->bi_max_vecs); |
04dc1e88 BH |
560 | ret = -ENOMEM; |
561 | goto out; | |
562 | } | |
563 | ||
5d952b83 | 564 | __bio_clone(bio, master_dev->bio); |
04dc1e88 BH |
565 | bio->bi_bdev = NULL; |
566 | bio->bi_next = NULL; | |
6851a5e5 | 567 | per_dev->offset = master_dev->offset; |
5d952b83 BH |
568 | per_dev->length = master_dev->length; |
569 | per_dev->bio = bio; | |
570 | per_dev->dev = dev; | |
04dc1e88 | 571 | } else { |
5d952b83 BH |
572 | bio = master_dev->bio; |
573 | /* FIXME: bio_set_dir() */ | |
7b6d91da | 574 | bio->bi_rw |= REQ_WRITE; |
04dc1e88 | 575 | } |
06886a5a | 576 | |
9e9db456 BH |
577 | osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, |
578 | bio, per_dev->length); | |
8ff660ab | 579 | ORE_DBGMSG("write(0x%llx) offset=0x%llx " |
34ce4e7c | 580 | "length=0x%llx dev=%d\n", |
9e9db456 BH |
581 | _LLU(_ios_obj(ios, dev)->id), |
582 | _LLU(per_dev->offset), | |
5d952b83 | 583 | _LLU(per_dev->length), dev); |
06886a5a | 584 | } else if (ios->kern_buff) { |
6851a5e5 BH |
585 | per_dev->offset = ios->si.obj_offset; |
586 | per_dev->dev = ios->si.dev + dev; | |
587 | ||
588 | /* no cross device without page array */ | |
589 | BUG_ON((ios->layout->group_width > 1) && | |
590 | (ios->si.unit_off + ios->length > | |
591 | ios->layout->stripe_unit)); | |
592 | ||
593 | ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev), | |
9e9db456 BH |
594 | per_dev->offset, |
595 | ios->kern_buff, ios->length); | |
5d952b83 BH |
596 | if (unlikely(ret)) |
597 | goto out; | |
8ff660ab | 598 | ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " |
34ce4e7c | 599 | "length=0x%llx dev=%d\n", |
9e9db456 BH |
600 | _LLU(_ios_obj(ios, dev)->id), |
601 | _LLU(per_dev->offset), | |
6851a5e5 | 602 | _LLU(ios->length), per_dev->dev); |
06886a5a | 603 | } else { |
9e9db456 | 604 | osd_req_set_attributes(or, _ios_obj(ios, dev)); |
8ff660ab | 605 | ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", |
9e9db456 BH |
606 | _LLU(_ios_obj(ios, dev)->id), |
607 | ios->out_attr_len, dev); | |
06886a5a BH |
608 | } |
609 | ||
610 | if (ios->out_attr) | |
611 | osd_req_add_set_attr_list(or, ios->out_attr, | |
612 | ios->out_attr_len); | |
613 | ||
614 | if (ios->in_attr) | |
615 | osd_req_add_get_attr_list(or, ios->in_attr, | |
616 | ios->in_attr_len); | |
b14f8ab2 | 617 | } |
06886a5a BH |
618 | |
619 | out: | |
620 | return ret; | |
621 | } | |
622 | ||
8ff660ab | 623 | int ore_write(struct ore_io_state *ios) |
5d952b83 BH |
624 | { |
625 | int i; | |
626 | int ret; | |
627 | ||
628 | ret = _prepare_for_striping(ios); | |
629 | if (unlikely(ret)) | |
630 | return ret; | |
631 | ||
632 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | |
8ff660ab | 633 | ret = _write_mirror(ios, i); |
5d952b83 BH |
634 | if (unlikely(ret)) |
635 | return ret; | |
636 | } | |
637 | ||
8ff660ab | 638 | ret = ore_io_execute(ios); |
5d952b83 BH |
639 | return ret; |
640 | } | |
cf283ade | 641 | EXPORT_SYMBOL(ore_write); |
5d952b83 | 642 | |
8ff660ab | 643 | static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) |
06886a5a | 644 | { |
46f4d973 | 645 | struct osd_request *or; |
8ff660ab | 646 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
9e9db456 BH |
647 | struct osd_obj_id *obj = _ios_obj(ios, cur_comp); |
648 | unsigned first_dev = (unsigned)obj->id; | |
06886a5a | 649 | |
50a76fd3 BH |
650 | if (ios->pages && !per_dev->length) |
651 | return 0; /* Just an empty slot */ | |
652 | ||
5d952b83 | 653 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; |
9e9db456 | 654 | or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); |
46f4d973 | 655 | if (unlikely(!or)) { |
8ff660ab | 656 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
46f4d973 BH |
657 | return -ENOMEM; |
658 | } | |
659 | per_dev->or = or; | |
46f4d973 | 660 | |
86093aaf | 661 | if (ios->pages) { |
9e9db456 | 662 | osd_req_read(or, obj, per_dev->offset, |
5d952b83 | 663 | per_dev->bio, per_dev->length); |
8ff660ab | 664 | ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" |
9e9db456 | 665 | " dev=%d\n", _LLU(obj->id), |
5d952b83 | 666 | _LLU(per_dev->offset), _LLU(per_dev->length), |
46f4d973 | 667 | first_dev); |
46f4d973 | 668 | } else { |
6851a5e5 BH |
669 | BUG_ON(ios->kern_buff); |
670 | ||
9e9db456 | 671 | osd_req_get_attributes(or, obj); |
8ff660ab | 672 | ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", |
9e9db456 BH |
673 | _LLU(obj->id), |
674 | ios->in_attr_len, first_dev); | |
46f4d973 | 675 | } |
46f4d973 BH |
676 | if (ios->out_attr) |
677 | osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); | |
b14f8ab2 | 678 | |
46f4d973 BH |
679 | if (ios->in_attr) |
680 | osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); | |
b14f8ab2 | 681 | |
5d952b83 BH |
682 | return 0; |
683 | } | |
684 | ||
8ff660ab | 685 | int ore_read(struct ore_io_state *ios) |
5d952b83 BH |
686 | { |
687 | int i; | |
688 | int ret; | |
689 | ||
690 | ret = _prepare_for_striping(ios); | |
691 | if (unlikely(ret)) | |
692 | return ret; | |
693 | ||
694 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | |
8ff660ab | 695 | ret = _read_mirror(ios, i); |
5d952b83 BH |
696 | if (unlikely(ret)) |
697 | return ret; | |
698 | } | |
699 | ||
8ff660ab | 700 | ret = ore_io_execute(ios); |
5d952b83 | 701 | return ret; |
b14f8ab2 | 702 | } |
cf283ade | 703 | EXPORT_SYMBOL(ore_read); |
b14f8ab2 | 704 | |
8ff660ab | 705 | int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) |
b14f8ab2 BH |
706 | { |
707 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | |
708 | void *iter = NULL; | |
709 | int nelem; | |
710 | ||
711 | do { | |
712 | nelem = 1; | |
06886a5a BH |
713 | osd_req_decode_get_attr_list(ios->per_dev[0].or, |
714 | &cur_attr, &nelem, &iter); | |
b14f8ab2 BH |
715 | if ((cur_attr.attr_page == attr->attr_page) && |
716 | (cur_attr.attr_id == attr->attr_id)) { | |
717 | attr->len = cur_attr.len; | |
718 | attr->val_ptr = cur_attr.val_ptr; | |
719 | return 0; | |
720 | } | |
721 | } while (iter); | |
722 | ||
723 | return -EIO; | |
724 | } | |
cf283ade | 725 | EXPORT_SYMBOL(extract_attr_from_ios); |
06886a5a | 726 | |
8ff660ab | 727 | static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, |
5d952b83 BH |
728 | struct osd_attr *attr) |
729 | { | |
730 | int last_comp = cur_comp + ios->layout->mirrors_p1; | |
731 | ||
732 | for (; cur_comp < last_comp; ++cur_comp) { | |
8ff660ab | 733 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
5d952b83 BH |
734 | struct osd_request *or; |
735 | ||
9e9db456 | 736 | or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); |
5d952b83 | 737 | if (unlikely(!or)) { |
8ff660ab | 738 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
5d952b83 BH |
739 | return -ENOMEM; |
740 | } | |
741 | per_dev->or = or; | |
742 | ||
9e9db456 | 743 | osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); |
5d952b83 BH |
744 | osd_req_add_set_attr_list(or, attr, 1); |
745 | } | |
746 | ||
747 | return 0; | |
748 | } | |
749 | ||
16f75bb3 | 750 | struct _trunc_info { |
eb507bc1 | 751 | struct ore_striping_info si; |
16f75bb3 BH |
752 | u64 prev_group_obj_off; |
753 | u64 next_group_obj_off; | |
754 | ||
755 | unsigned first_group_dev; | |
756 | unsigned nex_group_dev; | |
16f75bb3 BH |
757 | }; |
758 | ||
1958c7c2 HS |
759 | static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, |
760 | struct _trunc_info *ti) | |
16f75bb3 BH |
761 | { |
762 | unsigned stripe_unit = layout->stripe_unit; | |
763 | ||
eb507bc1 | 764 | ore_calc_stripe_info(layout, file_offset, &ti->si); |
16f75bb3 BH |
765 | |
766 | ti->prev_group_obj_off = ti->si.M * stripe_unit; | |
767 | ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; | |
768 | ||
769 | ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); | |
770 | ti->nex_group_dev = ti->first_group_dev + layout->group_width; | |
16f75bb3 BH |
771 | } |
772 | ||
5bf696da | 773 | int ore_truncate(struct ore_layout *layout, struct ore_components *oc, |
9e9db456 | 774 | u64 size) |
06886a5a | 775 | { |
8ff660ab | 776 | struct ore_io_state *ios; |
5d952b83 BH |
777 | struct exofs_trunc_attr { |
778 | struct osd_attr attr; | |
779 | __be64 newsize; | |
780 | } *size_attrs; | |
16f75bb3 | 781 | struct _trunc_info ti; |
06886a5a BH |
782 | int i, ret; |
783 | ||
5bf696da | 784 | ret = ore_get_io_state(layout, oc, &ios); |
5d952b83 BH |
785 | if (unlikely(ret)) |
786 | return ret; | |
787 | ||
16f75bb3 BH |
788 | _calc_trunk_info(ios->layout, size, &ti); |
789 | ||
b916c5cd | 790 | size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs), |
5d952b83 BH |
791 | GFP_KERNEL); |
792 | if (unlikely(!size_attrs)) { | |
793 | ret = -ENOMEM; | |
794 | goto out; | |
795 | } | |
06886a5a | 796 | |
5bf696da | 797 | ios->numdevs = ios->oc->numdevs; |
06886a5a | 798 | |
b916c5cd | 799 | for (i = 0; i < ios->numdevs; ++i) { |
5d952b83 BH |
800 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; |
801 | u64 obj_size; | |
06886a5a | 802 | |
16f75bb3 BH |
803 | if (i < ti.first_group_dev) |
804 | obj_size = ti.prev_group_obj_off; | |
805 | else if (i >= ti.nex_group_dev) | |
806 | obj_size = ti.next_group_obj_off; | |
807 | else if (i < ti.si.dev) /* dev within this group */ | |
808 | obj_size = ti.si.obj_offset + | |
809 | ios->layout->stripe_unit - ti.si.unit_off; | |
810 | else if (i == ti.si.dev) | |
811 | obj_size = ti.si.obj_offset; | |
812 | else /* i > ti.dev */ | |
813 | obj_size = ti.si.obj_offset - ti.si.unit_off; | |
06886a5a | 814 | |
5d952b83 BH |
815 | size_attr->newsize = cpu_to_be64(obj_size); |
816 | size_attr->attr = g_attr_logical_length; | |
817 | size_attr->attr.val_ptr = &size_attr->newsize; | |
818 | ||
8ff660ab | 819 | ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", |
5bf696da | 820 | _LLU(oc->comps->obj.id), _LLU(obj_size), i); |
5d952b83 BH |
821 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, |
822 | &size_attr->attr); | |
823 | if (unlikely(ret)) | |
824 | goto out; | |
06886a5a | 825 | } |
8ff660ab | 826 | ret = ore_io_execute(ios); |
06886a5a BH |
827 | |
828 | out: | |
5d952b83 | 829 | kfree(size_attrs); |
8ff660ab | 830 | ore_put_io_state(ios); |
06886a5a BH |
831 | return ret; |
832 | } | |
cf283ade | 833 | EXPORT_SYMBOL(ore_truncate); |
85e44df4 BH |
834 | |
835 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | |
836 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | |
cf283ade | 837 | EXPORT_SYMBOL(g_attr_logical_length); |