2 * Copyright (C) 2005, 2006
3 * Avishay Traeger (avishay@gmail.com)
4 * Copyright (C) 2008, 2009
5 * Boaz Harrosh <bharrosh@panasas.com>
7 * This file is part of exofs.
9 * exofs is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation. Since it is based on ext2, and the only
12 * valid version of GPL for the Linux kernel is version 2, the only valid
13 * version of GPL for exofs is version 2.
15 * exofs is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with exofs; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include <scsi/scsi_device.h>
26 #include <asm/div64.h>
30 #define EXOFS_DBGMSG2(M...) do {} while (0)
31 /* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
33 void exofs_make_credential(u8 cred_a
[OSD_CAP_LEN
], const struct osd_obj_id
*obj
)
35 osd_sec_init_nosec_doall_caps(cred_a
, obj
, false, true);
38 int exofs_read_kern(struct osd_dev
*od
, u8
*cred
, struct osd_obj_id
*obj
,
39 u64 offset
, void *p
, unsigned length
)
41 struct osd_request
*or = osd_start_request(od
, GFP_KERNEL
);
42 /* struct osd_sense_info osi = {.key = 0};*/
46 EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__
);
49 ret
= osd_req_read_kern(or, obj
, offset
, p
, length
);
51 EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__
);
55 ret
= osd_finalize_request(or, 0, cred
, NULL
);
57 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret
);
61 ret
= osd_execute_request(or);
63 EXOFS_DBGMSG("osd_execute_request() => %d\n", ret
);
64 /* osd_req_decode_sense(or, ret); */
71 int exofs_get_io_state(struct exofs_layout
*layout
,
72 struct exofs_io_state
**pios
)
74 struct exofs_io_state
*ios
;
76 /*TODO: Maybe use kmem_cach per sbi of size
77 * exofs_io_state_size(layout->s_numdevs)
79 ios
= kzalloc(exofs_io_state_size(layout
->s_numdevs
), GFP_KERNEL
);
81 EXOFS_DBGMSG("Faild kzalloc bytes=%d\n",
82 exofs_io_state_size(layout
->s_numdevs
));
88 ios
->obj
.partition
= layout
->s_pid
;
93 void exofs_put_io_state(struct exofs_io_state
*ios
)
98 for (i
= 0; i
< ios
->numdevs
; i
++) {
99 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[i
];
102 osd_end_request(per_dev
->or);
104 bio_put(per_dev
->bio
);
111 unsigned exofs_layout_od_id(struct exofs_layout
*layout
,
112 osd_id obj_no
, unsigned layout_index
)
114 /* switch (layout->lay_func) {
115 case LAYOUT_MOVING_WINDOW:
117 unsigned dev_mod
= obj_no
;
119 return (layout_index
+ dev_mod
* layout
->mirrors_p1
) %
122 case LAYOUT_FUNC_IMPLICT:
123 return layout->devs[layout_index];
127 static inline struct osd_dev
*exofs_ios_od(struct exofs_io_state
*ios
,
128 unsigned layout_index
)
130 return ios
->layout
->s_ods
[
131 exofs_layout_od_id(ios
->layout
, ios
->obj
.id
, layout_index
)];
134 static void _sync_done(struct exofs_io_state
*ios
, void *p
)
136 struct completion
*waiting
= p
;
141 static void _last_io(struct kref
*kref
)
143 struct exofs_io_state
*ios
= container_of(
144 kref
, struct exofs_io_state
, kref
);
146 ios
->done(ios
, ios
->private);
149 static void _done_io(struct osd_request
*or, void *p
)
151 struct exofs_io_state
*ios
= p
;
153 kref_put(&ios
->kref
, _last_io
);
156 static int exofs_io_execute(struct exofs_io_state
*ios
)
158 DECLARE_COMPLETION_ONSTACK(wait
);
159 bool sync
= (ios
->done
== NULL
);
163 ios
->done
= _sync_done
;
164 ios
->private = &wait
;
167 for (i
= 0; i
< ios
->numdevs
; i
++) {
168 struct osd_request
*or = ios
->per_dev
[i
].or;
172 ret
= osd_finalize_request(or, 0, ios
->cred
, NULL
);
174 EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n",
180 kref_init(&ios
->kref
);
182 for (i
= 0; i
< ios
->numdevs
; i
++) {
183 struct osd_request
*or = ios
->per_dev
[i
].or;
187 kref_get(&ios
->kref
);
188 osd_execute_request_async(or, _done_io
, ios
);
191 kref_put(&ios
->kref
, _last_io
);
195 wait_for_completion(&wait
);
196 ret
= exofs_check_io(ios
, NULL
);
201 static void _clear_bio(struct bio
*bio
)
206 __bio_for_each_segment(bv
, bio
, i
, 0) {
207 unsigned this_count
= bv
->bv_len
;
209 if (likely(PAGE_SIZE
== this_count
))
210 clear_highpage(bv
->bv_page
);
212 zero_user(bv
->bv_page
, bv
->bv_offset
, this_count
);
216 int exofs_check_io(struct exofs_io_state
*ios
, u64
*resid
)
218 enum osd_err_priority acumulated_osd_err
= 0;
219 int acumulated_lin_err
= 0;
222 for (i
= 0; i
< ios
->numdevs
; i
++) {
223 struct osd_sense_info osi
;
224 struct osd_request
*or = ios
->per_dev
[i
].or;
230 ret
= osd_req_decode_sense(or, &osi
);
234 if (OSD_ERR_PRI_CLEAR_PAGES
== osi
.osd_err_pri
) {
235 /* start read offset passed endof file */
236 _clear_bio(ios
->per_dev
[i
].bio
);
237 EXOFS_DBGMSG("start read offset passed end of file "
238 "offset=0x%llx, length=0x%llx\n",
239 _LLU(ios
->per_dev
[i
].offset
),
240 _LLU(ios
->per_dev
[i
].length
));
242 continue; /* we recovered */
245 if (osi
.osd_err_pri
>= acumulated_osd_err
) {
246 acumulated_osd_err
= osi
.osd_err_pri
;
247 acumulated_lin_err
= ret
;
251 /* TODO: raid specific residual calculations */
253 if (likely(!acumulated_lin_err
))
256 *resid
= ios
->length
;
259 return acumulated_lin_err
;
262 /* REMOVEME: After review
263 Some quoteing from the standard
265 L = logical offset into the file
266 W = number of data components in a stripe
267 S = W * stripe_unit (S is Stripe length)
268 N = L / S (N is the stripe Number)
269 C = (L-(N*S)) / stripe_unit (C is the component)
270 O = (N*stripe_unit)+(L%stripe_unit) (O is the object's offset)
273 static void _offset_dev_unit_off(struct exofs_io_state
*ios
, u64 file_offset
,
274 u64
*obj_offset
, unsigned *dev
, unsigned *unit_off
)
276 unsigned stripe_unit
= ios
->layout
->stripe_unit
;
277 unsigned stripe_length
= stripe_unit
* ios
->layout
->group_width
;
278 u64 stripe_no
= file_offset
;
279 unsigned stripe_mod
= do_div(stripe_no
, stripe_length
);
281 *unit_off
= stripe_mod
% stripe_unit
;
282 *obj_offset
= stripe_no
* stripe_unit
+ *unit_off
;
283 *dev
= stripe_mod
/ stripe_unit
* ios
->layout
->mirrors_p1
;
286 static int _add_stripe_unit(struct exofs_io_state
*ios
, unsigned *cur_bvec
,
287 struct exofs_per_dev_state
*per_dev
, int cur_len
)
289 unsigned bv
= *cur_bvec
;
290 struct request_queue
*q
=
291 osd_request_queue(exofs_ios_od(ios
, per_dev
->dev
));
293 per_dev
->length
+= cur_len
;
295 if (per_dev
->bio
== NULL
) {
296 unsigned pages_in_stripe
= ios
->layout
->group_width
*
297 (ios
->layout
->stripe_unit
/ PAGE_SIZE
);
298 unsigned bio_size
= (ios
->bio
->bi_vcnt
+ pages_in_stripe
) /
299 ios
->layout
->group_width
;
301 per_dev
->bio
= bio_kmalloc(GFP_KERNEL
, bio_size
);
302 if (unlikely(!per_dev
->bio
)) {
303 EXOFS_DBGMSG("Faild to allocate BIO size=%u\n",
309 while (cur_len
> 0) {
311 struct bio_vec
*bvec
= &ios
->bio
->bi_io_vec
[bv
];
313 BUG_ON(ios
->bio
->bi_vcnt
<= bv
);
314 cur_len
-= bvec
->bv_len
;
316 added_len
= bio_add_pc_page(q
, per_dev
->bio
, bvec
->bv_page
,
317 bvec
->bv_len
, bvec
->bv_offset
);
318 if (unlikely(bvec
->bv_len
!= added_len
))
328 static int _prepare_for_striping(struct exofs_io_state
*ios
)
330 u64 length
= ios
->length
;
331 u64 offset
= ios
->offset
;
332 unsigned stripe_unit
= ios
->layout
->stripe_unit
;
334 unsigned stripes
= 0;
335 unsigned cur_bvec
= 0;
339 if (ios
->kern_buff
) {
340 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[0];
343 _offset_dev_unit_off(ios
, offset
, &per_dev
->offset
,
344 &per_dev
->dev
, &unit_off
);
345 /* no cross device without page array */
346 BUG_ON((ios
->layout
->group_width
> 1) &&
347 (unit_off
+ length
> stripe_unit
));
349 ios
->numdevs
= ios
->layout
->mirrors_p1
;
354 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[comp
];
357 if (!per_dev
->length
) {
360 _offset_dev_unit_off(ios
, offset
, &per_dev
->offset
,
361 &per_dev
->dev
, &unit_off
);
363 cur_len
= min_t(u64
, stripe_unit
- unit_off
, length
);
366 cur_len
= min_t(u64
, stripe_unit
, length
);
369 ret
= _add_stripe_unit(ios
, &cur_bvec
, per_dev
, cur_len
);
373 comp
+= ios
->layout
->mirrors_p1
;
374 comp
%= ios
->layout
->s_numdevs
;
379 ios
->numdevs
= stripes
* ios
->layout
->mirrors_p1
;
383 int exofs_sbi_create(struct exofs_io_state
*ios
)
387 for (i
= 0; i
< ios
->layout
->s_numdevs
; i
++) {
388 struct osd_request
*or;
390 or = osd_start_request(exofs_ios_od(ios
, i
), GFP_KERNEL
);
392 EXOFS_ERR("%s: osd_start_request failed\n", __func__
);
396 ios
->per_dev
[i
].or = or;
399 osd_req_create_object(or, &ios
->obj
);
401 ret
= exofs_io_execute(ios
);
407 int exofs_sbi_remove(struct exofs_io_state
*ios
)
411 for (i
= 0; i
< ios
->layout
->s_numdevs
; i
++) {
412 struct osd_request
*or;
414 or = osd_start_request(exofs_ios_od(ios
, i
), GFP_KERNEL
);
416 EXOFS_ERR("%s: osd_start_request failed\n", __func__
);
420 ios
->per_dev
[i
].or = or;
423 osd_req_remove_object(or, &ios
->obj
);
425 ret
= exofs_io_execute(ios
);
431 static int _sbi_write_mirror(struct exofs_io_state
*ios
, int cur_comp
)
433 struct exofs_per_dev_state
*master_dev
= &ios
->per_dev
[cur_comp
];
434 unsigned dev
= ios
->per_dev
[cur_comp
].dev
;
435 unsigned last_comp
= cur_comp
+ ios
->layout
->mirrors_p1
;
438 for (; cur_comp
< last_comp
; ++cur_comp
, ++dev
) {
439 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[cur_comp
];
440 struct osd_request
*or;
442 or = osd_start_request(exofs_ios_od(ios
, dev
), GFP_KERNEL
);
444 EXOFS_ERR("%s: osd_start_request failed\n", __func__
);
449 per_dev
->offset
= master_dev
->offset
;
454 if (per_dev
!= master_dev
) {
455 bio
= bio_kmalloc(GFP_KERNEL
,
456 master_dev
->bio
->bi_max_vecs
);
457 if (unlikely(!bio
)) {
459 "Faild to allocate BIO size=%u\n",
460 master_dev
->bio
->bi_max_vecs
);
465 __bio_clone(bio
, master_dev
->bio
);
468 per_dev
->length
= master_dev
->length
;
472 bio
= master_dev
->bio
;
473 /* FIXME: bio_set_dir() */
474 bio
->bi_rw
|= (1 << BIO_RW
);
477 osd_req_write(or, &ios
->obj
, per_dev
->offset
, bio
,
479 EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
480 "length=0x%llx dev=%d\n",
481 _LLU(ios
->obj
.id
), _LLU(per_dev
->offset
),
482 _LLU(per_dev
->length
), dev
);
483 } else if (ios
->kern_buff
) {
484 ret
= osd_req_write_kern(or, &ios
->obj
, per_dev
->offset
,
485 ios
->kern_buff
, ios
->length
);
488 EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
489 "length=0x%llx dev=%d\n",
490 _LLU(ios
->obj
.id
), _LLU(per_dev
->offset
),
491 _LLU(ios
->length
), dev
);
493 osd_req_set_attributes(or, &ios
->obj
);
494 EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
495 _LLU(ios
->obj
.id
), ios
->out_attr_len
, dev
);
499 osd_req_add_set_attr_list(or, ios
->out_attr
,
503 osd_req_add_get_attr_list(or, ios
->in_attr
,
511 int exofs_sbi_write(struct exofs_io_state
*ios
)
516 ret
= _prepare_for_striping(ios
);
520 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
521 ret
= _sbi_write_mirror(ios
, i
);
526 ret
= exofs_io_execute(ios
);
530 static int _sbi_read_mirror(struct exofs_io_state
*ios
, unsigned cur_comp
)
532 struct osd_request
*or;
533 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[cur_comp
];
534 unsigned first_dev
= (unsigned)ios
->obj
.id
;
536 first_dev
= per_dev
->dev
+ first_dev
% ios
->layout
->mirrors_p1
;
537 or = osd_start_request(exofs_ios_od(ios
, first_dev
), GFP_KERNEL
);
539 EXOFS_ERR("%s: osd_start_request failed\n", __func__
);
545 osd_req_read(or, &ios
->obj
, per_dev
->offset
,
546 per_dev
->bio
, per_dev
->length
);
547 EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
548 " dev=%d\n", _LLU(ios
->obj
.id
),
549 _LLU(per_dev
->offset
), _LLU(per_dev
->length
),
551 } else if (ios
->kern_buff
) {
552 int ret
= osd_req_read_kern(or, &ios
->obj
, per_dev
->offset
,
553 ios
->kern_buff
, ios
->length
);
554 EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
555 "length=0x%llx dev=%d ret=>%d\n",
556 _LLU(ios
->obj
.id
), _LLU(per_dev
->offset
),
557 _LLU(ios
->length
), first_dev
, ret
);
561 osd_req_get_attributes(or, &ios
->obj
);
562 EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
563 _LLU(ios
->obj
.id
), ios
->in_attr_len
, first_dev
);
566 osd_req_add_set_attr_list(or, ios
->out_attr
, ios
->out_attr_len
);
569 osd_req_add_get_attr_list(or, ios
->in_attr
, ios
->in_attr_len
);
574 int exofs_sbi_read(struct exofs_io_state
*ios
)
579 ret
= _prepare_for_striping(ios
);
583 for (i
= 0; i
< ios
->numdevs
; i
+= ios
->layout
->mirrors_p1
) {
584 ret
= _sbi_read_mirror(ios
, i
);
589 ret
= exofs_io_execute(ios
);
593 int extract_attr_from_ios(struct exofs_io_state
*ios
, struct osd_attr
*attr
)
595 struct osd_attr cur_attr
= {.attr_page
= 0}; /* start with zeros */
601 osd_req_decode_get_attr_list(ios
->per_dev
[0].or,
602 &cur_attr
, &nelem
, &iter
);
603 if ((cur_attr
.attr_page
== attr
->attr_page
) &&
604 (cur_attr
.attr_id
== attr
->attr_id
)) {
605 attr
->len
= cur_attr
.len
;
606 attr
->val_ptr
= cur_attr
.val_ptr
;
614 static int _truncate_mirrors(struct exofs_io_state
*ios
, unsigned cur_comp
,
615 struct osd_attr
*attr
)
617 int last_comp
= cur_comp
+ ios
->layout
->mirrors_p1
;
619 for (; cur_comp
< last_comp
; ++cur_comp
) {
620 struct exofs_per_dev_state
*per_dev
= &ios
->per_dev
[cur_comp
];
621 struct osd_request
*or;
623 or = osd_start_request(exofs_ios_od(ios
, cur_comp
), GFP_KERNEL
);
625 EXOFS_ERR("%s: osd_start_request failed\n", __func__
);
630 osd_req_set_attributes(or, &ios
->obj
);
631 osd_req_add_set_attr_list(or, attr
, 1);
637 int exofs_oi_truncate(struct exofs_i_info
*oi
, u64 size
)
639 struct exofs_sb_info
*sbi
= oi
->vfs_inode
.i_sb
->s_fs_info
;
640 struct exofs_io_state
*ios
;
641 struct exofs_trunc_attr
{
642 struct osd_attr attr
;
650 ret
= exofs_get_io_state(&sbi
->layout
, &ios
);
654 size_attrs
= kcalloc(ios
->layout
->group_width
, sizeof(*size_attrs
),
656 if (unlikely(!size_attrs
)) {
661 ios
->obj
.id
= exofs_oi_objno(oi
);
662 ios
->cred
= oi
->i_cred
;
664 ios
->numdevs
= ios
->layout
->s_numdevs
;
665 _offset_dev_unit_off(ios
, size
, &this_obj_size
, &dev
, &unit_off
);
667 for (i
= 0; i
< ios
->layout
->group_width
; ++i
) {
668 struct exofs_trunc_attr
*size_attr
= &size_attrs
[i
];
672 obj_size
= this_obj_size
+
673 ios
->layout
->stripe_unit
- unit_off
;
675 obj_size
= this_obj_size
;
677 obj_size
= this_obj_size
- unit_off
;
679 size_attr
->newsize
= cpu_to_be64(obj_size
);
680 size_attr
->attr
= g_attr_logical_length
;
681 size_attr
->attr
.val_ptr
= &size_attr
->newsize
;
683 ret
= _truncate_mirrors(ios
, i
* ios
->layout
->mirrors_p1
,
688 ret
= exofs_io_execute(ios
);
692 exofs_put_io_state(ios
);