Commit | Line | Data |
---|---|---|
b14f8ab2 BH |
1 | /* |
2 | * Copyright (C) 2005, 2006 | |
27d2e149 | 3 | * Avishay Traeger (avishay@gmail.com) |
b14f8ab2 BH |
4 | * Copyright (C) 2008, 2009 |
5 | * Boaz Harrosh <bharrosh@panasas.com> | |
6 | * | |
7 | * This file is part of exofs. | |
8 | * | |
9 | * exofs is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License as published by | |
11 | * the Free Software Foundation. Since it is based on ext2, and the only | |
12 | * valid version of GPL for the Linux kernel is version 2, the only valid | |
13 | * version of GPL for exofs is version 2. | |
14 | * | |
15 | * exofs is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | * | |
20 | * You should have received a copy of the GNU General Public License | |
21 | * along with exofs; if not, write to the Free Software | |
22 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
23 | */ | |
24 | ||
5a0e3ad6 | 25 | #include <linux/slab.h> |
5d952b83 | 26 | #include <asm/div64.h> |
b14f8ab2 | 27 | |
8ff660ab | 28 | #include <scsi/osd_ore.h> |
b14f8ab2 | 29 | |
8ff660ab | 30 | #define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a) |
34ce4e7c | 31 | |
8ff660ab BH |
32 | #ifdef CONFIG_EXOFS_DEBUG |
33 | #define ORE_DBGMSG(fmt, a...) \ | |
34 | printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a) | |
35 | #else | |
36 | #define ORE_DBGMSG(fmt, a...) \ | |
37 | do { if (0) printk(fmt, ##a); } while (0) | |
38 | #endif | |
39 | ||
40 | /* u64 has problems with printk this will cast it to unsigned long long */ | |
41 | #define _LLU(x) (unsigned long long)(x) | |
42 | ||
43 | #define ORE_DBGMSG2(M...) do {} while (0) | |
44 | /* #define ORE_DBGMSG2 ORE_DBGMSG */ | |
45 | ||
cf283ade BH |
46 | MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>"); |
47 | MODULE_DESCRIPTION("Objects Raid Engine ore.ko"); | |
48 | MODULE_LICENSE("GPL"); | |
49 | ||
b916c5cd BH |
50 | static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
51 | struct ore_striping_info *si); | |
52 | ||
8ff660ab | 53 | static u8 *_ios_cred(struct ore_io_state *ios, unsigned index) |
9e9db456 | 54 | { |
5bf696da | 55 | return ios->oc->comps[index & ios->oc->single_comp].cred; |
9e9db456 BH |
56 | } |
57 | ||
8ff660ab | 58 | static struct osd_obj_id *_ios_obj(struct ore_io_state *ios, unsigned index) |
9e9db456 | 59 | { |
5bf696da | 60 | return &ios->oc->comps[index & ios->oc->single_comp].obj; |
9e9db456 BH |
61 | } |
62 | ||
8ff660ab | 63 | static struct osd_dev *_ios_od(struct ore_io_state *ios, unsigned index) |
9e9db456 | 64 | { |
d866d875 | 65 | return ore_comp_dev(ios->oc, index); |
9e9db456 BH |
66 | } |
67 | ||
b916c5cd BH |
68 | static int _get_io_state(struct ore_layout *layout, |
69 | struct ore_components *oc, unsigned numdevs, | |
70 | struct ore_io_state **pios) | |
b14f8ab2 | 71 | { |
8ff660ab | 72 | struct ore_io_state *ios; |
06886a5a BH |
73 | |
74 | /*TODO: Maybe use kmem_cach per sbi of size | |
45d3abcb | 75 | * exofs_io_state_size(layout->s_numdevs) |
06886a5a | 76 | */ |
b916c5cd | 77 | ios = kzalloc(ore_io_state_size(numdevs), GFP_KERNEL); |
06886a5a | 78 | if (unlikely(!ios)) { |
8ff660ab | 79 | ORE_DBGMSG("Failed kzalloc bytes=%d\n", |
b916c5cd | 80 | ore_io_state_size(numdevs)); |
06886a5a BH |
81 | *pios = NULL; |
82 | return -ENOMEM; | |
83 | } | |
84 | ||
45d3abcb | 85 | ios->layout = layout; |
5bf696da | 86 | ios->oc = oc; |
b916c5cd BH |
87 | *pios = ios; |
88 | return 0; | |
89 | } | |
90 | ||
91 | /* Allocate an io_state for only a single group of devices | |
92 | * | |
93 | * If a user needs to call ore_read/write() this version must be used becase it | |
94 | * allocates extra stuff for striping and raid. | |
95 | * The ore might decide to only IO less then @length bytes do to alignmets | |
96 | * and constrains as follows: | |
97 | * - The IO cannot cross group boundary. | |
98 | * - In raid5/6 The end of the IO must align at end of a stripe eg. | |
99 | * (@offset + @length) % strip_size == 0. Or the complete range is within a | |
100 | * single stripe. | |
101 | * - Memory condition only permitted a shorter IO. (A user can use @length=~0 | |
102 | * And check the returned ios->length for max_io_size.) | |
103 | * | |
104 | * The caller must check returned ios->length (and/or ios->nr_pages) and | |
105 | * re-issue these pages that fall outside of ios->length | |
106 | */ | |
107 | int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, | |
108 | bool is_reading, u64 offset, u64 length, | |
109 | struct ore_io_state **pios) | |
110 | { | |
111 | struct ore_io_state *ios; | |
112 | unsigned numdevs = layout->group_width * layout->mirrors_p1; | |
113 | int ret; | |
114 | ||
115 | ret = _get_io_state(layout, oc, numdevs, pios); | |
116 | if (unlikely(ret)) | |
117 | return ret; | |
118 | ||
119 | ios = *pios; | |
e1042ba0 | 120 | ios->reading = is_reading; |
b916c5cd BH |
121 | ios->offset = offset; |
122 | ||
123 | if (length) { | |
124 | struct ore_striping_info si; | |
125 | ||
126 | ore_calc_stripe_info(layout, offset, &si); | |
127 | ios->length = (length <= si.group_length) ? length : | |
128 | si.group_length; | |
129 | ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; | |
130 | } | |
e1042ba0 | 131 | |
06886a5a | 132 | return 0; |
b14f8ab2 | 133 | } |
cf283ade | 134 | EXPORT_SYMBOL(ore_get_rw_state); |
b14f8ab2 | 135 | |
b916c5cd BH |
136 | /* Allocate an io_state for all the devices in the comps array |
137 | * | |
138 | * This version of io_state allocation is used mostly by create/remove | |
139 | * and trunc where we currently need all the devices. The only wastful | |
140 | * bit is the read/write_attributes with no IO. Those sites should | |
141 | * be converted to use ore_get_rw_state() with length=0 | |
142 | */ | |
5bf696da | 143 | int ore_get_io_state(struct ore_layout *layout, struct ore_components *oc, |
b916c5cd | 144 | struct ore_io_state **pios) |
e1042ba0 | 145 | { |
b916c5cd | 146 | return _get_io_state(layout, oc, oc->numdevs, pios); |
e1042ba0 | 147 | } |
cf283ade | 148 | EXPORT_SYMBOL(ore_get_io_state); |
e1042ba0 | 149 | |
8ff660ab | 150 | void ore_put_io_state(struct ore_io_state *ios) |
b14f8ab2 | 151 | { |
06886a5a BH |
152 | if (ios) { |
153 | unsigned i; | |
b14f8ab2 | 154 | |
06886a5a | 155 | for (i = 0; i < ios->numdevs; i++) { |
8ff660ab | 156 | struct ore_per_dev_state *per_dev = &ios->per_dev[i]; |
06886a5a BH |
157 | |
158 | if (per_dev->or) | |
159 | osd_end_request(per_dev->or); | |
160 | if (per_dev->bio) | |
161 | bio_put(per_dev->bio); | |
162 | } | |
163 | ||
164 | kfree(ios); | |
b14f8ab2 | 165 | } |
06886a5a | 166 | } |
cf283ade | 167 | EXPORT_SYMBOL(ore_put_io_state); |
b14f8ab2 | 168 | |
8ff660ab | 169 | static void _sync_done(struct ore_io_state *ios, void *p) |
06886a5a BH |
170 | { |
171 | struct completion *waiting = p; | |
b14f8ab2 | 172 | |
06886a5a BH |
173 | complete(waiting); |
174 | } | |
175 | ||
176 | static void _last_io(struct kref *kref) | |
177 | { | |
8ff660ab BH |
178 | struct ore_io_state *ios = container_of( |
179 | kref, struct ore_io_state, kref); | |
06886a5a BH |
180 | |
181 | ios->done(ios, ios->private); | |
182 | } | |
183 | ||
184 | static void _done_io(struct osd_request *or, void *p) | |
185 | { | |
8ff660ab | 186 | struct ore_io_state *ios = p; |
06886a5a BH |
187 | |
188 | kref_put(&ios->kref, _last_io); | |
189 | } | |
190 | ||
8ff660ab | 191 | static int ore_io_execute(struct ore_io_state *ios) |
06886a5a BH |
192 | { |
193 | DECLARE_COMPLETION_ONSTACK(wait); | |
194 | bool sync = (ios->done == NULL); | |
195 | int i, ret; | |
196 | ||
197 | if (sync) { | |
198 | ios->done = _sync_done; | |
199 | ios->private = &wait; | |
200 | } | |
201 | ||
202 | for (i = 0; i < ios->numdevs; i++) { | |
203 | struct osd_request *or = ios->per_dev[i].or; | |
204 | if (unlikely(!or)) | |
205 | continue; | |
206 | ||
9e9db456 | 207 | ret = osd_finalize_request(or, 0, _ios_cred(ios, i), NULL); |
06886a5a | 208 | if (unlikely(ret)) { |
8ff660ab | 209 | ORE_DBGMSG("Failed to osd_finalize_request() => %d\n", |
06886a5a BH |
210 | ret); |
211 | return ret; | |
212 | } | |
213 | } | |
214 | ||
215 | kref_init(&ios->kref); | |
216 | ||
217 | for (i = 0; i < ios->numdevs; i++) { | |
218 | struct osd_request *or = ios->per_dev[i].or; | |
219 | if (unlikely(!or)) | |
220 | continue; | |
221 | ||
222 | kref_get(&ios->kref); | |
223 | osd_execute_request_async(or, _done_io, ios); | |
224 | } | |
225 | ||
226 | kref_put(&ios->kref, _last_io); | |
227 | ret = 0; | |
228 | ||
229 | if (sync) { | |
230 | wait_for_completion(&wait); | |
8ff660ab | 231 | ret = ore_check_io(ios, NULL); |
06886a5a | 232 | } |
b14f8ab2 BH |
233 | return ret; |
234 | } | |
235 | ||
22ddc556 BH |
236 | static void _clear_bio(struct bio *bio) |
237 | { | |
238 | struct bio_vec *bv; | |
239 | unsigned i; | |
240 | ||
241 | __bio_for_each_segment(bv, bio, i, 0) { | |
242 | unsigned this_count = bv->bv_len; | |
243 | ||
244 | if (likely(PAGE_SIZE == this_count)) | |
245 | clear_highpage(bv->bv_page); | |
246 | else | |
247 | zero_user(bv->bv_page, bv->bv_offset, this_count); | |
248 | } | |
249 | } | |
250 | ||
8ff660ab | 251 | int ore_check_io(struct ore_io_state *ios, u64 *resid) |
b14f8ab2 | 252 | { |
06886a5a BH |
253 | enum osd_err_priority acumulated_osd_err = 0; |
254 | int acumulated_lin_err = 0; | |
255 | int i; | |
b14f8ab2 | 256 | |
06886a5a BH |
257 | for (i = 0; i < ios->numdevs; i++) { |
258 | struct osd_sense_info osi; | |
22ddc556 BH |
259 | struct osd_request *or = ios->per_dev[i].or; |
260 | int ret; | |
261 | ||
262 | if (unlikely(!or)) | |
263 | continue; | |
06886a5a | 264 | |
22ddc556 | 265 | ret = osd_req_decode_sense(or, &osi); |
06886a5a BH |
266 | if (likely(!ret)) |
267 | continue; | |
268 | ||
22ddc556 BH |
269 | if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { |
270 | /* start read offset passed endof file */ | |
271 | _clear_bio(ios->per_dev[i].bio); | |
8ff660ab | 272 | ORE_DBGMSG("start read offset passed end of file " |
22ddc556 | 273 | "offset=0x%llx, length=0x%llx\n", |
5d952b83 BH |
274 | _LLU(ios->per_dev[i].offset), |
275 | _LLU(ios->per_dev[i].length)); | |
22ddc556 BH |
276 | |
277 | continue; /* we recovered */ | |
06886a5a BH |
278 | } |
279 | ||
280 | if (osi.osd_err_pri >= acumulated_osd_err) { | |
281 | acumulated_osd_err = osi.osd_err_pri; | |
282 | acumulated_lin_err = ret; | |
283 | } | |
284 | } | |
285 | ||
286 | /* TODO: raid specific residual calculations */ | |
287 | if (resid) { | |
288 | if (likely(!acumulated_lin_err)) | |
289 | *resid = 0; | |
290 | else | |
291 | *resid = ios->length; | |
292 | } | |
293 | ||
294 | return acumulated_lin_err; | |
295 | } | |
cf283ade | 296 | EXPORT_SYMBOL(ore_check_io); |
06886a5a | 297 | |
b367e78b BH |
298 | /* |
299 | * L - logical offset into the file | |
300 | * | |
50a76fd3 | 301 | * U - The number of bytes in a stripe within a group |
b367e78b BH |
302 | * |
303 | * U = stripe_unit * group_width | |
304 | * | |
50a76fd3 BH |
305 | * T - The number of bytes striped within a group of component objects |
306 | * (before advancing to the next group) | |
b367e78b | 307 | * |
50a76fd3 BH |
308 | * T = stripe_unit * group_width * group_depth |
309 | * | |
310 | * S - The number of bytes striped across all component objects | |
311 | * before the pattern repeats | |
312 | * | |
313 | * S = stripe_unit * group_width * group_depth * group_count | |
314 | * | |
315 | * M - The "major" (i.e., across all components) stripe number | |
316 | * | |
317 | * M = L / S | |
318 | * | |
319 | * G - Counts the groups from the beginning of the major stripe | |
320 | * | |
321 | * G = (L - (M * S)) / T [or (L % S) / T] | |
322 | * | |
323 | * H - The byte offset within the group | |
324 | * | |
325 | * H = (L - (M * S)) % T [or (L % S) % T] | |
326 | * | |
327 | * N - The "minor" (i.e., across the group) stripe number | |
328 | * | |
329 | * N = H / U | |
b367e78b BH |
330 | * |
331 | * C - The component index coresponding to L | |
332 | * | |
50a76fd3 BH |
333 | * C = (H - (N * U)) / stripe_unit + G * group_width |
334 | * [or (L % U) / stripe_unit + G * group_width] | |
b367e78b BH |
335 | * |
336 | * O - The component offset coresponding to L | |
337 | * | |
50a76fd3 | 338 | * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit |
b367e78b | 339 | */ |
eb507bc1 BH |
340 | static void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, |
341 | struct ore_striping_info *si) | |
5d952b83 | 342 | { |
16f75bb3 BH |
343 | u32 stripe_unit = layout->stripe_unit; |
344 | u32 group_width = layout->group_width; | |
345 | u64 group_depth = layout->group_depth; | |
50a76fd3 | 346 | |
b367e78b | 347 | u32 U = stripe_unit * group_width; |
50a76fd3 | 348 | u64 T = U * group_depth; |
16f75bb3 | 349 | u64 S = T * layout->group_count; |
50a76fd3 BH |
350 | u64 M = div64_u64(file_offset, S); |
351 | ||
352 | /* | |
353 | G = (L - (M * S)) / T | |
354 | H = (L - (M * S)) % T | |
355 | */ | |
356 | u64 LmodS = file_offset - M * S; | |
357 | u32 G = div64_u64(LmodS, T); | |
358 | u64 H = LmodS - G * T; | |
359 | ||
360 | u32 N = div_u64(H, U); | |
361 | ||
362 | /* "H - (N * U)" is just "H % U" so it's bound to u32 */ | |
363 | si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; | |
16f75bb3 | 364 | si->dev *= layout->mirrors_p1; |
b367e78b | 365 | |
50a76fd3 | 366 | div_u64_rem(file_offset, stripe_unit, &si->unit_off); |
5d952b83 | 367 | |
50a76fd3 BH |
368 | si->obj_offset = si->unit_off + (N * stripe_unit) + |
369 | (M * group_depth * stripe_unit); | |
370 | ||
371 | si->group_length = T - H; | |
16f75bb3 | 372 | si->M = M; |
5d952b83 BH |
373 | } |
374 | ||
8ff660ab BH |
375 | static int _add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, |
376 | unsigned pgbase, struct ore_per_dev_state *per_dev, | |
86093aaf | 377 | int cur_len) |
5d952b83 | 378 | { |
86093aaf | 379 | unsigned pg = *cur_pg; |
5d952b83 | 380 | struct request_queue *q = |
9e9db456 | 381 | osd_request_queue(_ios_od(ios, per_dev->dev)); |
5d952b83 BH |
382 | |
383 | per_dev->length += cur_len; | |
384 | ||
385 | if (per_dev->bio == NULL) { | |
386 | unsigned pages_in_stripe = ios->layout->group_width * | |
387 | (ios->layout->stripe_unit / PAGE_SIZE); | |
86093aaf | 388 | unsigned bio_size = (ios->nr_pages + pages_in_stripe) / |
5d952b83 BH |
389 | ios->layout->group_width; |
390 | ||
391 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); | |
392 | if (unlikely(!per_dev->bio)) { | |
8ff660ab | 393 | ORE_DBGMSG("Failed to allocate BIO size=%u\n", |
5d952b83 BH |
394 | bio_size); |
395 | return -ENOMEM; | |
396 | } | |
397 | } | |
398 | ||
399 | while (cur_len > 0) { | |
86093aaf BH |
400 | unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); |
401 | unsigned added_len; | |
5d952b83 | 402 | |
86093aaf BH |
403 | BUG_ON(ios->nr_pages <= pg); |
404 | cur_len -= pglen; | |
5d952b83 | 405 | |
86093aaf BH |
406 | added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg], |
407 | pglen, pgbase); | |
408 | if (unlikely(pglen != added_len)) | |
5d952b83 | 409 | return -ENOMEM; |
86093aaf BH |
410 | pgbase = 0; |
411 | ++pg; | |
5d952b83 BH |
412 | } |
413 | BUG_ON(cur_len); | |
414 | ||
86093aaf | 415 | *cur_pg = pg; |
5d952b83 BH |
416 | return 0; |
417 | } | |
418 | ||
8ff660ab | 419 | static int _prepare_one_group(struct ore_io_state *ios, u64 length, |
eb507bc1 | 420 | struct ore_striping_info *si) |
5d952b83 | 421 | { |
5d952b83 | 422 | unsigned stripe_unit = ios->layout->stripe_unit; |
b367e78b | 423 | unsigned mirrors_p1 = ios->layout->mirrors_p1; |
50a76fd3 | 424 | unsigned devs_in_group = ios->layout->group_width * mirrors_p1; |
b367e78b | 425 | unsigned dev = si->dev; |
50a76fd3 | 426 | unsigned first_dev = dev - (dev % devs_in_group); |
50a76fd3 | 427 | unsigned cur_pg = ios->pages_consumed; |
86093aaf | 428 | int ret = 0; |
5d952b83 | 429 | |
5d952b83 | 430 | while (length) { |
b916c5cd BH |
431 | unsigned comp = dev - first_dev; |
432 | struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; | |
b367e78b | 433 | unsigned cur_len, page_off = 0; |
5d952b83 BH |
434 | |
435 | if (!per_dev->length) { | |
b367e78b BH |
436 | per_dev->dev = dev; |
437 | if (dev < si->dev) { | |
438 | per_dev->offset = si->obj_offset + stripe_unit - | |
439 | si->unit_off; | |
440 | cur_len = stripe_unit; | |
441 | } else if (dev == si->dev) { | |
442 | per_dev->offset = si->obj_offset; | |
443 | cur_len = stripe_unit - si->unit_off; | |
444 | page_off = si->unit_off & ~PAGE_MASK; | |
445 | BUG_ON(page_off && (page_off != ios->pgbase)); | |
446 | } else { /* dev > si->dev */ | |
447 | per_dev->offset = si->obj_offset - si->unit_off; | |
448 | cur_len = stripe_unit; | |
449 | } | |
5d952b83 | 450 | } else { |
b367e78b | 451 | cur_len = stripe_unit; |
5d952b83 | 452 | } |
b367e78b BH |
453 | if (cur_len >= length) |
454 | cur_len = length; | |
5d952b83 | 455 | |
86093aaf BH |
456 | ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, |
457 | cur_len); | |
5d952b83 BH |
458 | if (unlikely(ret)) |
459 | goto out; | |
460 | ||
6e31609b BH |
461 | dev += mirrors_p1; |
462 | dev = (dev % devs_in_group) + first_dev; | |
5d952b83 BH |
463 | |
464 | length -= cur_len; | |
465 | } | |
466 | out: | |
b916c5cd | 467 | ios->numdevs = devs_in_group; |
50a76fd3 | 468 | ios->pages_consumed = cur_pg; |
5d952b83 BH |
469 | return ret; |
470 | } | |
471 | ||
8ff660ab | 472 | static int _prepare_for_striping(struct ore_io_state *ios) |
b367e78b | 473 | { |
eb507bc1 | 474 | struct ore_striping_info si; |
b916c5cd | 475 | int ret; |
b367e78b | 476 | |
b367e78b BH |
477 | if (!ios->pages) { |
478 | if (ios->kern_buff) { | |
8ff660ab | 479 | struct ore_per_dev_state *per_dev = &ios->per_dev[0]; |
b367e78b | 480 | |
eb507bc1 | 481 | ore_calc_stripe_info(ios->layout, ios->offset, &si); |
b367e78b BH |
482 | per_dev->offset = si.obj_offset; |
483 | per_dev->dev = si.dev; | |
484 | ||
485 | /* no cross device without page array */ | |
486 | BUG_ON((ios->layout->group_width > 1) && | |
487 | (si.unit_off + ios->length > | |
488 | ios->layout->stripe_unit)); | |
489 | } | |
490 | ios->numdevs = ios->layout->mirrors_p1; | |
491 | return 0; | |
492 | } | |
493 | ||
b916c5cd | 494 | ore_calc_stripe_info(ios->layout, ios->offset, &si); |
50a76fd3 | 495 | |
b916c5cd BH |
496 | BUG_ON(ios->length > si.group_length); |
497 | ret = _prepare_one_group(ios, ios->length, &si); | |
50a76fd3 | 498 | |
50a76fd3 | 499 | return ret; |
b367e78b BH |
500 | } |
501 | ||
8ff660ab | 502 | int ore_create(struct ore_io_state *ios) |
06886a5a BH |
503 | { |
504 | int i, ret; | |
505 | ||
5bf696da | 506 | for (i = 0; i < ios->oc->numdevs; i++) { |
06886a5a BH |
507 | struct osd_request *or; |
508 | ||
9e9db456 | 509 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
06886a5a | 510 | if (unlikely(!or)) { |
8ff660ab | 511 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
512 | ret = -ENOMEM; |
513 | goto out; | |
514 | } | |
515 | ios->per_dev[i].or = or; | |
516 | ios->numdevs++; | |
517 | ||
9e9db456 | 518 | osd_req_create_object(or, _ios_obj(ios, i)); |
06886a5a | 519 | } |
8ff660ab | 520 | ret = ore_io_execute(ios); |
06886a5a BH |
521 | |
522 | out: | |
523 | return ret; | |
524 | } | |
cf283ade | 525 | EXPORT_SYMBOL(ore_create); |
06886a5a | 526 | |
8ff660ab | 527 | int ore_remove(struct ore_io_state *ios) |
06886a5a BH |
528 | { |
529 | int i, ret; | |
530 | ||
5bf696da | 531 | for (i = 0; i < ios->oc->numdevs; i++) { |
06886a5a BH |
532 | struct osd_request *or; |
533 | ||
9e9db456 | 534 | or = osd_start_request(_ios_od(ios, i), GFP_KERNEL); |
06886a5a | 535 | if (unlikely(!or)) { |
8ff660ab | 536 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
537 | ret = -ENOMEM; |
538 | goto out; | |
539 | } | |
540 | ios->per_dev[i].or = or; | |
541 | ios->numdevs++; | |
542 | ||
9e9db456 | 543 | osd_req_remove_object(or, _ios_obj(ios, i)); |
06886a5a | 544 | } |
8ff660ab | 545 | ret = ore_io_execute(ios); |
06886a5a BH |
546 | |
547 | out: | |
548 | return ret; | |
549 | } | |
cf283ade | 550 | EXPORT_SYMBOL(ore_remove); |
06886a5a | 551 | |
8ff660ab | 552 | static int _write_mirror(struct ore_io_state *ios, int cur_comp) |
06886a5a | 553 | { |
8ff660ab | 554 | struct ore_per_dev_state *master_dev = &ios->per_dev[cur_comp]; |
5d952b83 BH |
555 | unsigned dev = ios->per_dev[cur_comp].dev; |
556 | unsigned last_comp = cur_comp + ios->layout->mirrors_p1; | |
557 | int ret = 0; | |
06886a5a | 558 | |
50a76fd3 BH |
559 | if (ios->pages && !master_dev->length) |
560 | return 0; /* Just an empty slot */ | |
561 | ||
5d952b83 | 562 | for (; cur_comp < last_comp; ++cur_comp, ++dev) { |
8ff660ab | 563 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
06886a5a BH |
564 | struct osd_request *or; |
565 | ||
9e9db456 | 566 | or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL); |
06886a5a | 567 | if (unlikely(!or)) { |
8ff660ab | 568 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
06886a5a BH |
569 | ret = -ENOMEM; |
570 | goto out; | |
571 | } | |
5d952b83 BH |
572 | per_dev->or = or; |
573 | per_dev->offset = master_dev->offset; | |
06886a5a | 574 | |
86093aaf | 575 | if (ios->pages) { |
06886a5a BH |
576 | struct bio *bio; |
577 | ||
5d952b83 | 578 | if (per_dev != master_dev) { |
04dc1e88 | 579 | bio = bio_kmalloc(GFP_KERNEL, |
5d952b83 | 580 | master_dev->bio->bi_max_vecs); |
04dc1e88 | 581 | if (unlikely(!bio)) { |
8ff660ab | 582 | ORE_DBGMSG( |
426d3107 | 583 | "Failed to allocate BIO size=%u\n", |
5d952b83 | 584 | master_dev->bio->bi_max_vecs); |
04dc1e88 BH |
585 | ret = -ENOMEM; |
586 | goto out; | |
587 | } | |
588 | ||
5d952b83 | 589 | __bio_clone(bio, master_dev->bio); |
04dc1e88 BH |
590 | bio->bi_bdev = NULL; |
591 | bio->bi_next = NULL; | |
5d952b83 BH |
592 | per_dev->length = master_dev->length; |
593 | per_dev->bio = bio; | |
594 | per_dev->dev = dev; | |
04dc1e88 | 595 | } else { |
5d952b83 BH |
596 | bio = master_dev->bio; |
597 | /* FIXME: bio_set_dir() */ | |
7b6d91da | 598 | bio->bi_rw |= REQ_WRITE; |
04dc1e88 | 599 | } |
06886a5a | 600 | |
9e9db456 BH |
601 | osd_req_write(or, _ios_obj(ios, dev), per_dev->offset, |
602 | bio, per_dev->length); | |
8ff660ab | 603 | ORE_DBGMSG("write(0x%llx) offset=0x%llx " |
34ce4e7c | 604 | "length=0x%llx dev=%d\n", |
9e9db456 BH |
605 | _LLU(_ios_obj(ios, dev)->id), |
606 | _LLU(per_dev->offset), | |
5d952b83 | 607 | _LLU(per_dev->length), dev); |
06886a5a | 608 | } else if (ios->kern_buff) { |
9e9db456 BH |
609 | ret = osd_req_write_kern(or, _ios_obj(ios, dev), |
610 | per_dev->offset, | |
611 | ios->kern_buff, ios->length); | |
5d952b83 BH |
612 | if (unlikely(ret)) |
613 | goto out; | |
8ff660ab | 614 | ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx " |
34ce4e7c | 615 | "length=0x%llx dev=%d\n", |
9e9db456 BH |
616 | _LLU(_ios_obj(ios, dev)->id), |
617 | _LLU(per_dev->offset), | |
5d952b83 | 618 | _LLU(ios->length), dev); |
06886a5a | 619 | } else { |
9e9db456 | 620 | osd_req_set_attributes(or, _ios_obj(ios, dev)); |
8ff660ab | 621 | ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n", |
9e9db456 BH |
622 | _LLU(_ios_obj(ios, dev)->id), |
623 | ios->out_attr_len, dev); | |
06886a5a BH |
624 | } |
625 | ||
626 | if (ios->out_attr) | |
627 | osd_req_add_set_attr_list(or, ios->out_attr, | |
628 | ios->out_attr_len); | |
629 | ||
630 | if (ios->in_attr) | |
631 | osd_req_add_get_attr_list(or, ios->in_attr, | |
632 | ios->in_attr_len); | |
b14f8ab2 | 633 | } |
06886a5a BH |
634 | |
635 | out: | |
636 | return ret; | |
637 | } | |
638 | ||
8ff660ab | 639 | int ore_write(struct ore_io_state *ios) |
5d952b83 BH |
640 | { |
641 | int i; | |
642 | int ret; | |
643 | ||
644 | ret = _prepare_for_striping(ios); | |
645 | if (unlikely(ret)) | |
646 | return ret; | |
647 | ||
648 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | |
8ff660ab | 649 | ret = _write_mirror(ios, i); |
5d952b83 BH |
650 | if (unlikely(ret)) |
651 | return ret; | |
652 | } | |
653 | ||
8ff660ab | 654 | ret = ore_io_execute(ios); |
5d952b83 BH |
655 | return ret; |
656 | } | |
cf283ade | 657 | EXPORT_SYMBOL(ore_write); |
5d952b83 | 658 | |
8ff660ab | 659 | static int _read_mirror(struct ore_io_state *ios, unsigned cur_comp) |
06886a5a | 660 | { |
46f4d973 | 661 | struct osd_request *or; |
8ff660ab | 662 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
9e9db456 BH |
663 | struct osd_obj_id *obj = _ios_obj(ios, cur_comp); |
664 | unsigned first_dev = (unsigned)obj->id; | |
06886a5a | 665 | |
50a76fd3 BH |
666 | if (ios->pages && !per_dev->length) |
667 | return 0; /* Just an empty slot */ | |
668 | ||
5d952b83 | 669 | first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1; |
9e9db456 | 670 | or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL); |
46f4d973 | 671 | if (unlikely(!or)) { |
8ff660ab | 672 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
46f4d973 BH |
673 | return -ENOMEM; |
674 | } | |
675 | per_dev->or = or; | |
46f4d973 | 676 | |
86093aaf | 677 | if (ios->pages) { |
9e9db456 | 678 | osd_req_read(or, obj, per_dev->offset, |
5d952b83 | 679 | per_dev->bio, per_dev->length); |
8ff660ab | 680 | ORE_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx" |
9e9db456 | 681 | " dev=%d\n", _LLU(obj->id), |
5d952b83 | 682 | _LLU(per_dev->offset), _LLU(per_dev->length), |
46f4d973 BH |
683 | first_dev); |
684 | } else if (ios->kern_buff) { | |
9e9db456 | 685 | int ret = osd_req_read_kern(or, obj, per_dev->offset, |
46f4d973 | 686 | ios->kern_buff, ios->length); |
8ff660ab | 687 | ORE_DBGMSG2("read_kern(0x%llx) offset=0x%llx " |
46f4d973 | 688 | "length=0x%llx dev=%d ret=>%d\n", |
9e9db456 | 689 | _LLU(obj->id), _LLU(per_dev->offset), |
46f4d973 BH |
690 | _LLU(ios->length), first_dev, ret); |
691 | if (unlikely(ret)) | |
692 | return ret; | |
693 | } else { | |
9e9db456 | 694 | osd_req_get_attributes(or, obj); |
8ff660ab | 695 | ORE_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n", |
9e9db456 BH |
696 | _LLU(obj->id), |
697 | ios->in_attr_len, first_dev); | |
46f4d973 | 698 | } |
46f4d973 BH |
699 | if (ios->out_attr) |
700 | osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len); | |
b14f8ab2 | 701 | |
46f4d973 BH |
702 | if (ios->in_attr) |
703 | osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len); | |
b14f8ab2 | 704 | |
5d952b83 BH |
705 | return 0; |
706 | } | |
707 | ||
8ff660ab | 708 | int ore_read(struct ore_io_state *ios) |
5d952b83 BH |
709 | { |
710 | int i; | |
711 | int ret; | |
712 | ||
713 | ret = _prepare_for_striping(ios); | |
714 | if (unlikely(ret)) | |
715 | return ret; | |
716 | ||
717 | for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { | |
8ff660ab | 718 | ret = _read_mirror(ios, i); |
5d952b83 BH |
719 | if (unlikely(ret)) |
720 | return ret; | |
721 | } | |
722 | ||
8ff660ab | 723 | ret = ore_io_execute(ios); |
5d952b83 | 724 | return ret; |
b14f8ab2 | 725 | } |
cf283ade | 726 | EXPORT_SYMBOL(ore_read); |
b14f8ab2 | 727 | |
8ff660ab | 728 | int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr) |
b14f8ab2 BH |
729 | { |
730 | struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ | |
731 | void *iter = NULL; | |
732 | int nelem; | |
733 | ||
734 | do { | |
735 | nelem = 1; | |
06886a5a BH |
736 | osd_req_decode_get_attr_list(ios->per_dev[0].or, |
737 | &cur_attr, &nelem, &iter); | |
b14f8ab2 BH |
738 | if ((cur_attr.attr_page == attr->attr_page) && |
739 | (cur_attr.attr_id == attr->attr_id)) { | |
740 | attr->len = cur_attr.len; | |
741 | attr->val_ptr = cur_attr.val_ptr; | |
742 | return 0; | |
743 | } | |
744 | } while (iter); | |
745 | ||
746 | return -EIO; | |
747 | } | |
cf283ade | 748 | EXPORT_SYMBOL(extract_attr_from_ios); |
06886a5a | 749 | |
8ff660ab | 750 | static int _truncate_mirrors(struct ore_io_state *ios, unsigned cur_comp, |
5d952b83 BH |
751 | struct osd_attr *attr) |
752 | { | |
753 | int last_comp = cur_comp + ios->layout->mirrors_p1; | |
754 | ||
755 | for (; cur_comp < last_comp; ++cur_comp) { | |
8ff660ab | 756 | struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp]; |
5d952b83 BH |
757 | struct osd_request *or; |
758 | ||
9e9db456 | 759 | or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL); |
5d952b83 | 760 | if (unlikely(!or)) { |
8ff660ab | 761 | ORE_ERR("%s: osd_start_request failed\n", __func__); |
5d952b83 BH |
762 | return -ENOMEM; |
763 | } | |
764 | per_dev->or = or; | |
765 | ||
9e9db456 | 766 | osd_req_set_attributes(or, _ios_obj(ios, cur_comp)); |
5d952b83 BH |
767 | osd_req_add_set_attr_list(or, attr, 1); |
768 | } | |
769 | ||
770 | return 0; | |
771 | } | |
772 | ||
16f75bb3 | 773 | struct _trunc_info { |
eb507bc1 | 774 | struct ore_striping_info si; |
16f75bb3 BH |
775 | u64 prev_group_obj_off; |
776 | u64 next_group_obj_off; | |
777 | ||
778 | unsigned first_group_dev; | |
779 | unsigned nex_group_dev; | |
16f75bb3 BH |
780 | }; |
781 | ||
1958c7c2 HS |
782 | static void _calc_trunk_info(struct ore_layout *layout, u64 file_offset, |
783 | struct _trunc_info *ti) | |
16f75bb3 BH |
784 | { |
785 | unsigned stripe_unit = layout->stripe_unit; | |
786 | ||
eb507bc1 | 787 | ore_calc_stripe_info(layout, file_offset, &ti->si); |
16f75bb3 BH |
788 | |
789 | ti->prev_group_obj_off = ti->si.M * stripe_unit; | |
790 | ti->next_group_obj_off = ti->si.M ? (ti->si.M - 1) * stripe_unit : 0; | |
791 | ||
792 | ti->first_group_dev = ti->si.dev - (ti->si.dev % layout->group_width); | |
793 | ti->nex_group_dev = ti->first_group_dev + layout->group_width; | |
16f75bb3 BH |
794 | } |
795 | ||
5bf696da | 796 | int ore_truncate(struct ore_layout *layout, struct ore_components *oc, |
9e9db456 | 797 | u64 size) |
06886a5a | 798 | { |
8ff660ab | 799 | struct ore_io_state *ios; |
5d952b83 BH |
800 | struct exofs_trunc_attr { |
801 | struct osd_attr attr; | |
802 | __be64 newsize; | |
803 | } *size_attrs; | |
16f75bb3 | 804 | struct _trunc_info ti; |
06886a5a BH |
805 | int i, ret; |
806 | ||
5bf696da | 807 | ret = ore_get_io_state(layout, oc, &ios); |
5d952b83 BH |
808 | if (unlikely(ret)) |
809 | return ret; | |
810 | ||
16f75bb3 BH |
811 | _calc_trunk_info(ios->layout, size, &ti); |
812 | ||
b916c5cd | 813 | size_attrs = kcalloc(ios->oc->numdevs, sizeof(*size_attrs), |
5d952b83 BH |
814 | GFP_KERNEL); |
815 | if (unlikely(!size_attrs)) { | |
816 | ret = -ENOMEM; | |
817 | goto out; | |
818 | } | |
06886a5a | 819 | |
5bf696da | 820 | ios->numdevs = ios->oc->numdevs; |
06886a5a | 821 | |
b916c5cd | 822 | for (i = 0; i < ios->numdevs; ++i) { |
5d952b83 BH |
823 | struct exofs_trunc_attr *size_attr = &size_attrs[i]; |
824 | u64 obj_size; | |
06886a5a | 825 | |
16f75bb3 BH |
826 | if (i < ti.first_group_dev) |
827 | obj_size = ti.prev_group_obj_off; | |
828 | else if (i >= ti.nex_group_dev) | |
829 | obj_size = ti.next_group_obj_off; | |
830 | else if (i < ti.si.dev) /* dev within this group */ | |
831 | obj_size = ti.si.obj_offset + | |
832 | ios->layout->stripe_unit - ti.si.unit_off; | |
833 | else if (i == ti.si.dev) | |
834 | obj_size = ti.si.obj_offset; | |
835 | else /* i > ti.dev */ | |
836 | obj_size = ti.si.obj_offset - ti.si.unit_off; | |
06886a5a | 837 | |
5d952b83 BH |
838 | size_attr->newsize = cpu_to_be64(obj_size); |
839 | size_attr->attr = g_attr_logical_length; | |
840 | size_attr->attr.val_ptr = &size_attr->newsize; | |
841 | ||
8ff660ab | 842 | ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", |
5bf696da | 843 | _LLU(oc->comps->obj.id), _LLU(obj_size), i); |
5d952b83 BH |
844 | ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, |
845 | &size_attr->attr); | |
846 | if (unlikely(ret)) | |
847 | goto out; | |
06886a5a | 848 | } |
8ff660ab | 849 | ret = ore_io_execute(ios); |
06886a5a BH |
850 | |
851 | out: | |
5d952b83 | 852 | kfree(size_attrs); |
8ff660ab | 853 | ore_put_io_state(ios); |
06886a5a BH |
854 | return ret; |
855 | } | |
cf283ade | 856 | EXPORT_SYMBOL(ore_truncate); |
85e44df4 BH |
857 | |
858 | const struct osd_attr g_attr_logical_length = ATTR_DEF( | |
859 | OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); | |
cf283ade | 860 | EXPORT_SYMBOL(g_attr_logical_length); |