uml: fix !NO_HZ busy-loop
[deliverable/linux.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
e16f5350 23#include "linux/kernel.h"
1da177e4
LT
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
1da177e4
LT
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
d052d1be 37#include "linux/platform_device.h"
23464ffa 38#include "linux/scatterlist.h"
1da177e4
LT
39#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
1da177e4
LT
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
1da177e4
LT
52#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
7b9014c1 57enum ubd_req { UBD_READ, UBD_WRITE };
1da177e4
LT
58
59struct io_thread_req {
62f96cb0 60 struct request *req;
91acb21f 61 enum ubd_req op;
1da177e4
LT
62 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
91acb21f
JD
68 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
1da177e4
LT
71 int error;
72};
73
6c29256c 74extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
75 char **backing_file_out, int *bitmap_offset_out,
76 unsigned long *bitmap_len_out, int *data_offset_out,
77 int *create_cow_out);
78extern int create_cow_file(char *cow_file, char *backing_file,
79 struct openflags flags, int sectorsize,
80 int alignment, int *bitmap_offset_out,
81 unsigned long *bitmap_len_out,
82 int *data_offset_out);
83extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
91acb21f 84extern void do_io(struct io_thread_req *req);
1da177e4 85
91acb21f 86static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
87{
88 __u64 n;
89 int bits, off;
90
91acb21f 91 bits = sizeof(data[0]) * 8;
1da177e4
LT
92 n = bit / bits;
93 off = bit % bits;
dc764e50 94 return (data[n] & (1 << off)) != 0;
1da177e4
LT
95}
96
91acb21f 97static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
98{
99 __u64 n;
100 int bits, off;
101
91acb21f 102 bits = sizeof(data[0]) * 8;
1da177e4
LT
103 n = bit / bits;
104 off = bit % bits;
91acb21f 105 data[n] |= (1 << off);
1da177e4
LT
106}
107/*End stuff from ubd_user.h*/
108
109#define DRIVER_NAME "uml-blkdev"
110
d7fb2c38 111static DEFINE_MUTEX(ubd_lock);
1da177e4 112
1da177e4
LT
113static int ubd_open(struct inode * inode, struct file * filp);
114static int ubd_release(struct inode * inode, struct file * file);
115static int ubd_ioctl(struct inode * inode, struct file * file,
116 unsigned int cmd, unsigned long arg);
a885c8c4 117static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 118
97d88ac8 119#define MAX_DEV (16)
1da177e4 120
1da177e4
LT
121static struct block_device_operations ubd_blops = {
122 .owner = THIS_MODULE,
123 .open = ubd_open,
124 .release = ubd_release,
125 .ioctl = ubd_ioctl,
a885c8c4 126 .getgeo = ubd_getgeo,
1da177e4
LT
127};
128
1da177e4
LT
129/* Protected by ubd_lock */
130static int fake_major = MAJOR_NR;
1da177e4
LT
131static struct gendisk *ubd_gendisk[MAX_DEV];
132static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 133
1da177e4
LT
134#ifdef CONFIG_BLK_DEV_UBD_SYNC
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
136 .cl = 1 })
137#else
138#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
139 .cl = 1 })
140#endif
1da177e4
LT
141static struct openflags global_openflags = OPEN_FLAGS;
142
143struct cow {
2a9d32f6 144 /* backing file name */
1da177e4 145 char *file;
2a9d32f6 146 /* backing file fd */
1da177e4
LT
147 int fd;
148 unsigned long *bitmap;
149 unsigned long bitmap_len;
150 int bitmap_offset;
dc764e50 151 int data_offset;
1da177e4
LT
152};
153
a0044bdf
JD
154#define MAX_SG 64
155
1da177e4 156struct ubd {
a0044bdf 157 struct list_head restart;
2a9d32f6
PBG
158 /* name (and fd, below) of the file opened for writing, either the
159 * backing or the cow file. */
1da177e4
LT
160 char *file;
161 int count;
162 int fd;
163 __u64 size;
164 struct openflags boot_openflags;
165 struct openflags openflags;
84e945e3
PBG
166 unsigned shared:1;
167 unsigned no_cow:1;
1da177e4
LT
168 struct cow cow;
169 struct platform_device pdev;
62f96cb0
JD
170 struct request_queue *queue;
171 spinlock_t lock;
a0044bdf
JD
172 struct scatterlist sg[MAX_SG];
173 struct request *request;
174 int start_sg, end_sg;
1da177e4
LT
175};
176
177#define DEFAULT_COW { \
178 .file = NULL, \
dc764e50
JD
179 .fd = -1, \
180 .bitmap = NULL, \
1da177e4 181 .bitmap_offset = 0, \
dc764e50 182 .data_offset = 0, \
1da177e4
LT
183}
184
185#define DEFAULT_UBD { \
186 .file = NULL, \
187 .count = 0, \
188 .fd = -1, \
189 .size = -1, \
190 .boot_openflags = OPEN_FLAGS, \
191 .openflags = OPEN_FLAGS, \
dc764e50 192 .no_cow = 0, \
6c29256c 193 .shared = 0, \
dc764e50 194 .cow = DEFAULT_COW, \
62f96cb0 195 .lock = SPIN_LOCK_UNLOCKED, \
a0044bdf
JD
196 .request = NULL, \
197 .start_sg = 0, \
198 .end_sg = 0, \
1da177e4
LT
199}
200
b8831a1d 201/* Protected by ubd_lock */
7d314e34 202struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
1da177e4 203
1da177e4
LT
204/* Only changed by fake_ide_setup which is a setup */
205static int fake_ide = 0;
206static struct proc_dir_entry *proc_ide_root = NULL;
207static struct proc_dir_entry *proc_ide = NULL;
208
209static void make_proc_ide(void)
210{
211 proc_ide_root = proc_mkdir("ide", NULL);
212 proc_ide = proc_mkdir("ide0", proc_ide_root);
213}
214
215static int proc_ide_read_media(char *page, char **start, off_t off, int count,
216 int *eof, void *data)
217{
218 int len;
219
220 strcpy(page, "disk\n");
221 len = strlen("disk\n");
222 len -= off;
223 if (len < count){
224 *eof = 1;
225 if (len <= 0) return 0;
226 }
227 else len = count;
228 *start = page + off;
229 return len;
230}
231
232static void make_ide_entries(char *dev_name)
233{
234 struct proc_dir_entry *dir, *ent;
235 char name[64];
236
237 if(proc_ide_root == NULL) make_proc_ide();
238
239 dir = proc_mkdir(dev_name, proc_ide);
240 if(!dir) return;
241
242 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
243 if(!ent) return;
1da177e4
LT
244 ent->data = NULL;
245 ent->read_proc = proc_ide_read_media;
246 ent->write_proc = NULL;
247 sprintf(name,"ide0/%s", dev_name);
248 proc_symlink(dev_name, proc_ide_root, name);
249}
250
251static int fake_ide_setup(char *str)
252{
253 fake_ide = 1;
dc764e50 254 return 1;
1da177e4
LT
255}
256
257__setup("fake_ide", fake_ide_setup);
258
259__uml_help(fake_ide_setup,
260"fake_ide\n"
261" Create ide0 entries that map onto ubd devices.\n\n"
262);
263
264static int parse_unit(char **ptr)
265{
266 char *str = *ptr, *end;
267 int n = -1;
268
269 if(isdigit(*str)) {
270 n = simple_strtoul(str, &end, 0);
271 if(end == str)
dc764e50 272 return -1;
1da177e4
LT
273 *ptr = end;
274 }
97d88ac8 275 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
276 n = *str - 'a';
277 str++;
278 *ptr = str;
279 }
dc764e50 280 return n;
1da177e4
LT
281}
282
d8d7c28e
PBG
283/* If *index_out == -1 at exit, the passed option was a general one;
284 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
285 * should not be freed on exit.
286 */
f28169d2 287static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 288{
7d314e34 289 struct ubd *ubd_dev;
1da177e4
LT
290 struct openflags flags = global_openflags;
291 char *backing_file;
b8831a1d 292 int n, err = 0, i;
1da177e4
LT
293
294 if(index_out) *index_out = -1;
295 n = *str;
296 if(n == '='){
297 char *end;
298 int major;
299
300 str++;
1da177e4
LT
301 if(!strcmp(str, "sync")){
302 global_openflags = of_sync(global_openflags);
b8831a1d 303 goto out1;
1da177e4 304 }
b8831a1d
JD
305
306 err = -EINVAL;
1da177e4
LT
307 major = simple_strtoul(str, &end, 0);
308 if((*end != '\0') || (end == str)){
f28169d2 309 *error_out = "Didn't parse major number";
b8831a1d 310 goto out1;
1da177e4
LT
311 }
312
f28169d2
JD
313 mutex_lock(&ubd_lock);
314 if(fake_major != MAJOR_NR){
315 *error_out = "Can't assign a fake major twice";
316 goto out1;
317 }
6c29256c 318
f28169d2 319 fake_major = major;
1da177e4
LT
320
321 printk(KERN_INFO "Setting extra ubd major number to %d\n",
322 major);
f28169d2
JD
323 err = 0;
324 out1:
325 mutex_unlock(&ubd_lock);
326 return err;
1da177e4
LT
327 }
328
329 n = parse_unit(&str);
330 if(n < 0){
f28169d2
JD
331 *error_out = "Couldn't parse device number";
332 return -EINVAL;
1da177e4
LT
333 }
334 if(n >= MAX_DEV){
f28169d2
JD
335 *error_out = "Device number out of range";
336 return 1;
1da177e4
LT
337 }
338
f28169d2 339 err = -EBUSY;
d7fb2c38 340 mutex_lock(&ubd_lock);
1da177e4 341
7d314e34
PBG
342 ubd_dev = &ubd_devs[n];
343 if(ubd_dev->file != NULL){
f28169d2 344 *error_out = "Device is already configured";
1da177e4
LT
345 goto out;
346 }
347
348 if (index_out)
349 *index_out = n;
350
f28169d2 351 err = -EINVAL;
6c29256c 352 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
353 switch (*str) {
354 case 'r':
355 flags.w = 0;
356 break;
357 case 's':
358 flags.s = 1;
359 break;
360 case 'd':
7d314e34 361 ubd_dev->no_cow = 1;
1da177e4 362 break;
6c29256c 363 case 'c':
7d314e34 364 ubd_dev->shared = 1;
6c29256c 365 break;
1da177e4
LT
366 case '=':
367 str++;
368 goto break_loop;
369 default:
f28169d2
JD
370 *error_out = "Expected '=' or flag letter "
371 "(r, s, c, or d)";
1da177e4
LT
372 goto out;
373 }
374 str++;
375 }
376
f28169d2
JD
377 if (*str == '=')
378 *error_out = "Too many flags specified";
379 else
380 *error_out = "Missing '='";
1da177e4
LT
381 goto out;
382
383break_loop:
1da177e4
LT
384 backing_file = strchr(str, ',');
385
f28169d2 386 if (backing_file == NULL)
1da177e4 387 backing_file = strchr(str, ':');
1da177e4 388
f28169d2
JD
389 if(backing_file != NULL){
390 if(ubd_dev->no_cow){
391 *error_out = "Can't specify both 'd' and a cow file";
392 goto out;
393 }
1da177e4
LT
394 else {
395 *backing_file = '\0';
396 backing_file++;
397 }
398 }
f28169d2 399 err = 0;
7d314e34
PBG
400 ubd_dev->file = str;
401 ubd_dev->cow.file = backing_file;
402 ubd_dev->boot_openflags = flags;
1da177e4 403out:
d7fb2c38 404 mutex_unlock(&ubd_lock);
f28169d2 405 return err;
1da177e4
LT
406}
407
408static int ubd_setup(char *str)
409{
f28169d2
JD
410 char *error;
411 int err;
412
413 err = ubd_setup_common(str, NULL, &error);
414 if(err)
415 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
416 "%s\n", str, error);
417 return 1;
1da177e4
LT
418}
419
420__setup("ubd", ubd_setup);
421__uml_help(ubd_setup,
422"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
423" This is used to associate a device with a file in the underlying\n"
424" filesystem. When specifying two filenames, the first one is the\n"
425" COW name and the second is the backing file name. As separator you can\n"
426" use either a ':' or a ',': the first one allows writing things like;\n"
427" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
428" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 429" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
430" a COW file or a backing file. To override this detection, add the 'd'\n"
431" flag:\n"
432" ubd0d=BackingFile\n"
433" Usually, there is a filesystem in the file, but \n"
434" that's not required. Swap devices containing swap files can be\n"
435" specified like this. Also, a file which doesn't contain a\n"
436" filesystem can have its contents read in the virtual \n"
437" machine by running 'dd' on the device. <n> must be in the range\n"
438" 0 to 7. Appending an 'r' to the number will cause that device\n"
439" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
440" an 's' will cause data to be written to disk on the host immediately.\n\n"
441);
442
443static int udb_setup(char *str)
444{
445 printk("udb%s specified on command line is almost certainly a ubd -> "
446 "udb TYPO\n", str);
dc764e50 447 return 1;
1da177e4
LT
448}
449
450__setup("udb", udb_setup);
451__uml_help(udb_setup,
452"udb\n"
0894e27e
JD
453" This option is here solely to catch ubd -> udb typos, which can be\n"
454" to impossible to catch visually unless you specifically look for\n"
455" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
456" in the boot output.\n\n"
457);
458
459static int fakehd_set = 0;
460static int fakehd(char *str)
461{
462 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
463 fakehd_set = 1;
464 return 1;
465}
466
467__setup("fakehd", fakehd);
468__uml_help(fakehd,
469"fakehd\n"
470" Change the ubd device name to \"hd\".\n\n"
471);
472
165125e1 473static void do_ubd_request(struct request_queue * q);
91acb21f
JD
474
475/* Only changed by ubd_init, which is an initcall. */
476int thread_fd = -1;
1da177e4 477
a0044bdf 478static void ubd_end_request(struct request *req, int bytes, int uptodate)
1da177e4 479{
a0044bdf
JD
480 if (!end_that_request_first(req, uptodate, bytes >> 9)) {
481 struct ubd *dev = req->rq_disk->private_data;
482 unsigned long flags;
483
484 add_disk_randomness(req->rq_disk);
485 spin_lock_irqsave(&dev->lock, flags);
486 end_that_request_last(req, uptodate);
487 spin_unlock_irqrestore(&dev->lock, flags);
1da177e4 488 }
1da177e4
LT
489}
490
33f775ee
PBG
491/* Callable only from interrupt context - otherwise you need to do
492 * spin_lock_irq()/spin_lock_irqsave() */
a0044bdf 493static inline void ubd_finish(struct request *req, int bytes)
1da177e4 494{
a0044bdf
JD
495 if(bytes < 0){
496 ubd_end_request(req, 0, 0);
497 return;
498 }
499 ubd_end_request(req, bytes, 1);
1da177e4
LT
500}
501
a0044bdf
JD
502static LIST_HEAD(restart);
503
2fe30a34 504/* XXX - move this inside ubd_intr. */
62f96cb0 505/* Called without dev->lock held, and only in interrupt context. */
91acb21f 506static void ubd_handler(void)
1da177e4 507{
2adcec21 508 struct io_thread_req *req;
62f96cb0 509 struct request *rq;
a0044bdf
JD
510 struct ubd *ubd;
511 struct list_head *list, *next_ele;
512 unsigned long flags;
91acb21f
JD
513 int n;
514
a0044bdf 515 while(1){
a6ea4cce
JD
516 n = os_read_file(thread_fd, &req,
517 sizeof(struct io_thread_req *));
a0044bdf
JD
518 if(n != sizeof(req)){
519 if(n == -EAGAIN)
520 break;
521 printk(KERN_ERR "spurious interrupt in ubd_handler, "
522 "err = %d\n", -n);
523 return;
524 }
62f96cb0 525
2adcec21
JD
526 rq = req->req;
527 rq->nr_sectors -= req->length >> 9;
a0044bdf
JD
528 if(rq->nr_sectors == 0)
529 ubd_finish(rq, rq->hard_nr_sectors << 9);
2adcec21 530 kfree(req);
a0044bdf 531 }
62f96cb0 532 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
533
534 list_for_each_safe(list, next_ele, &restart){
535 ubd = container_of(list, struct ubd, restart);
536 list_del_init(&ubd->restart);
537 spin_lock_irqsave(&ubd->lock, flags);
538 do_ubd_request(ubd->queue);
539 spin_unlock_irqrestore(&ubd->lock, flags);
540 }
1da177e4
LT
541}
542
7bea96fd 543static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 544{
91acb21f 545 ubd_handler();
dc764e50 546 return IRQ_HANDLED;
91acb21f 547}
09ace81c 548
91acb21f
JD
549/* Only changed by ubd_init, which is an initcall. */
550static int io_pid = -1;
09ace81c 551
91acb21f
JD
552void kill_io_thread(void)
553{
6c29256c 554 if(io_pid != -1)
91acb21f 555 os_kill_process(io_pid, 1);
09ace81c 556}
1da177e4 557
91acb21f
JD
558__uml_exitcall(kill_io_thread);
559
d8d7c28e 560static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
561{
562 char *file;
563
7d314e34 564 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
dc764e50 565 return os_file_size(file, size_out);
1da177e4
LT
566}
567
5f75a4f8 568static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 569{
7d314e34
PBG
570 os_close_file(ubd_dev->fd);
571 if(ubd_dev->cow.file == NULL)
1da177e4
LT
572 return;
573
7d314e34
PBG
574 os_close_file(ubd_dev->cow.fd);
575 vfree(ubd_dev->cow.bitmap);
576 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
577}
578
7d314e34 579static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
580{
581 struct openflags flags;
582 char **back_ptr;
583 int err, create_cow, *create_ptr;
0bf16bff 584 int fd;
1da177e4 585
7d314e34 586 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 587 create_cow = 0;
7d314e34
PBG
588 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
589 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
590
591 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
592 back_ptr, &ubd_dev->cow.bitmap_offset,
593 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 594 create_ptr);
1da177e4 595
0bf16bff
PBG
596 if((fd == -ENOENT) && create_cow){
597 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
598 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
599 &ubd_dev->cow.bitmap_offset,
600 &ubd_dev->cow.bitmap_len,
601 &ubd_dev->cow.data_offset);
0bf16bff 602 if(fd >= 0){
1da177e4 603 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 604 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
605 }
606 }
607
0bf16bff 608 if(fd < 0){
7d314e34 609 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
610 -fd);
611 return fd;
1da177e4 612 }
0bf16bff 613 ubd_dev->fd = fd;
1da177e4 614
7d314e34 615 if(ubd_dev->cow.file != NULL){
f4768ffd
JD
616 blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
617
1da177e4 618 err = -ENOMEM;
da2486ba 619 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 620 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
621 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
622 goto error;
623 }
624 flush_tlb_kernel_vm();
625
7d314e34
PBG
626 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
627 ubd_dev->cow.bitmap_offset,
628 ubd_dev->cow.bitmap_len);
1da177e4
LT
629 if(err < 0)
630 goto error;
631
7d314e34 632 flags = ubd_dev->openflags;
1da177e4 633 flags.w = 0;
7d314e34 634 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 635 NULL, NULL, NULL, NULL);
1da177e4 636 if(err < 0) goto error;
7d314e34 637 ubd_dev->cow.fd = err;
1da177e4 638 }
dc764e50 639 return 0;
1da177e4 640 error:
7d314e34 641 os_close_file(ubd_dev->fd);
dc764e50 642 return err;
1da177e4
LT
643}
644
2e3f5251
JD
645static void ubd_device_release(struct device *dev)
646{
647 struct ubd *ubd_dev = dev->driver_data;
648
649 blk_cleanup_queue(ubd_dev->queue);
650 *ubd_dev = ((struct ubd) DEFAULT_UBD);
651}
652
5f75a4f8 653static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 654 struct gendisk **disk_out)
1da177e4
LT
655{
656 struct gendisk *disk;
1da177e4
LT
657
658 disk = alloc_disk(1 << UBD_SHIFT);
659 if(disk == NULL)
dc764e50 660 return -ENOMEM;
1da177e4
LT
661
662 disk->major = major;
663 disk->first_minor = unit << UBD_SHIFT;
664 disk->fops = &ubd_blops;
665 set_capacity(disk, size / 512);
ce7b0f46 666 if(major == MAJOR_NR)
1da177e4 667 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 668 else
1da177e4 669 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
670
671 /* sysfs register (not for ide fake devices) */
672 if (major == MAJOR_NR) {
7d314e34
PBG
673 ubd_devs[unit].pdev.id = unit;
674 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251
JD
675 ubd_devs[unit].pdev.dev.release = ubd_device_release;
676 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
7d314e34
PBG
677 platform_device_register(&ubd_devs[unit].pdev);
678 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
679 }
680
7d314e34 681 disk->private_data = &ubd_devs[unit];
62f96cb0 682 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
683 add_disk(disk);
684
685 *disk_out = disk;
686 return 0;
687}
688
689#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
690
f28169d2 691static int ubd_add(int n, char **error_out)
1da177e4 692{
7d314e34 693 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 694 int err = 0;
1da177e4 695
7d314e34 696 if(ubd_dev->file == NULL)
ec7cf783 697 goto out;
1da177e4 698
7d314e34 699 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
700 if(err < 0){
701 *error_out = "Couldn't determine size of device's file";
80c13749 702 goto out;
f28169d2 703 }
1da177e4 704
7d314e34 705 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 706
a0044bdf 707 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 708 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 709
62f96cb0
JD
710 err = -ENOMEM;
711 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
712 if (ubd_dev->queue == NULL) {
713 *error_out = "Failed to initialize device queue";
80c13749 714 goto out;
62f96cb0
JD
715 }
716 ubd_dev->queue->queuedata = ubd_dev;
717
a0044bdf 718 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
62f96cb0
JD
719 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
720 if(err){
721 *error_out = "Failed to register device";
722 goto out_cleanup;
723 }
6c29256c 724
1da177e4 725 if(fake_major != MAJOR_NR)
5f75a4f8 726 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 727 &fake_gendisk[n]);
1da177e4
LT
728
729 /* perhaps this should also be under the "if (fake_major)" above */
730 /* using the fake_disk->disk_name and also the fakehd_set name */
731 if (fake_ide)
732 make_ide_entries(ubd_gendisk[n]->disk_name);
733
ec7cf783 734 err = 0;
ec7cf783
JD
735out:
736 return err;
62f96cb0
JD
737
738out_cleanup:
739 blk_cleanup_queue(ubd_dev->queue);
740 goto out;
1da177e4
LT
741}
742
f28169d2 743static int ubd_config(char *str, char **error_out)
1da177e4 744{
e7f6552f 745 int n, ret;
1da177e4 746
f28169d2
JD
747 /* This string is possibly broken up and stored, so it's only
748 * freed if ubd_setup_common fails, or if only general options
749 * were set.
750 */
970d6e3a 751 str = kstrdup(str, GFP_KERNEL);
e7f6552f 752 if (str == NULL) {
f28169d2
JD
753 *error_out = "Failed to allocate memory";
754 return -ENOMEM;
1da177e4 755 }
f28169d2
JD
756
757 ret = ubd_setup_common(str, &n, error_out);
758 if (ret)
e7f6552f 759 goto err_free;
f28169d2 760
e7f6552f
PBG
761 if (n == -1) {
762 ret = 0;
d8d7c28e 763 goto err_free;
1da177e4 764 }
1da177e4 765
dc764e50 766 mutex_lock(&ubd_lock);
f28169d2 767 ret = ubd_add(n, error_out);
e7f6552f 768 if (ret)
7d314e34 769 ubd_devs[n].file = NULL;
dc764e50 770 mutex_unlock(&ubd_lock);
1da177e4 771
e7f6552f 772out:
dc764e50 773 return ret;
e7f6552f
PBG
774
775err_free:
776 kfree(str);
777 goto out;
1da177e4
LT
778}
779
780static int ubd_get_config(char *name, char *str, int size, char **error_out)
781{
7d314e34 782 struct ubd *ubd_dev;
1da177e4
LT
783 int n, len = 0;
784
785 n = parse_unit(&name);
786 if((n >= MAX_DEV) || (n < 0)){
787 *error_out = "ubd_get_config : device number out of range";
dc764e50 788 return -1;
1da177e4
LT
789 }
790
7d314e34 791 ubd_dev = &ubd_devs[n];
d7fb2c38 792 mutex_lock(&ubd_lock);
1da177e4 793
7d314e34 794 if(ubd_dev->file == NULL){
1da177e4
LT
795 CONFIG_CHUNK(str, size, len, "", 1);
796 goto out;
797 }
798
7d314e34 799 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 800
7d314e34 801 if(ubd_dev->cow.file != NULL){
1da177e4 802 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 803 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
804 }
805 else CONFIG_CHUNK(str, size, len, "", 1);
806
807 out:
d7fb2c38 808 mutex_unlock(&ubd_lock);
dc764e50 809 return len;
1da177e4
LT
810}
811
29d56cfe
JD
812static int ubd_id(char **str, int *start_out, int *end_out)
813{
dc764e50 814 int n;
29d56cfe
JD
815
816 n = parse_unit(str);
dc764e50
JD
817 *start_out = 0;
818 *end_out = MAX_DEV - 1;
819 return n;
29d56cfe
JD
820}
821
f28169d2 822static int ubd_remove(int n, char **error_out)
1da177e4 823{
2e3f5251 824 struct gendisk *disk = ubd_gendisk[n];
7d314e34 825 struct ubd *ubd_dev;
29d56cfe 826 int err = -ENODEV;
1da177e4 827
d7fb2c38 828 mutex_lock(&ubd_lock);
1da177e4 829
7d314e34 830 ubd_dev = &ubd_devs[n];
1da177e4 831
7d314e34 832 if(ubd_dev->file == NULL)
29d56cfe 833 goto out;
1da177e4 834
29d56cfe
JD
835 /* you cannot remove a open disk */
836 err = -EBUSY;
7d314e34 837 if(ubd_dev->count > 0)
1da177e4
LT
838 goto out;
839
dc764e50 840 ubd_gendisk[n] = NULL;
b47d2deb
JD
841 if(disk != NULL){
842 del_gendisk(disk);
843 put_disk(disk);
844 }
1da177e4
LT
845
846 if(fake_gendisk[n] != NULL){
847 del_gendisk(fake_gendisk[n]);
848 put_disk(fake_gendisk[n]);
849 fake_gendisk[n] = NULL;
850 }
851
1da177e4 852 err = 0;
2e3f5251 853 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 854out:
d7fb2c38 855 mutex_unlock(&ubd_lock);
29d56cfe 856 return err;
1da177e4
LT
857}
858
f28169d2 859/* All these are called by mconsole in process context and without
b8831a1d 860 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 861 */
1da177e4 862static struct mc_device ubd_mc = {
84f48d4f 863 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
864 .name = "ubd",
865 .config = ubd_config,
dc764e50 866 .get_config = ubd_get_config,
29d56cfe 867 .id = ubd_id,
1da177e4
LT
868 .remove = ubd_remove,
869};
870
d8d7c28e 871static int __init ubd_mc_init(void)
1da177e4
LT
872{
873 mconsole_register_dev(&ubd_mc);
874 return 0;
875}
876
877__initcall(ubd_mc_init);
878
d8d7c28e
PBG
879static int __init ubd0_init(void)
880{
881 struct ubd *ubd_dev = &ubd_devs[0];
882
b8831a1d 883 mutex_lock(&ubd_lock);
d8d7c28e
PBG
884 if(ubd_dev->file == NULL)
885 ubd_dev->file = "root_fs";
b8831a1d
JD
886 mutex_unlock(&ubd_lock);
887
dc764e50 888 return 0;
d8d7c28e
PBG
889}
890
891__initcall(ubd0_init);
892
b8831a1d 893/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
894static struct platform_driver ubd_driver = {
895 .driver = {
896 .name = DRIVER_NAME,
897 },
1da177e4
LT
898};
899
d8d7c28e 900static int __init ubd_init(void)
1da177e4 901{
f28169d2
JD
902 char *error;
903 int i, err;
1da177e4 904
1da177e4
LT
905 if (register_blkdev(MAJOR_NR, "ubd"))
906 return -1;
907
1da177e4
LT
908 if (fake_major != MAJOR_NR) {
909 char name[sizeof("ubd_nnn\0")];
910
911 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
912 if (register_blkdev(fake_major, "ubd"))
913 return -1;
914 }
3ae5eaec 915 platform_driver_register(&ubd_driver);
dc764e50 916 mutex_lock(&ubd_lock);
f28169d2
JD
917 for (i = 0; i < MAX_DEV; i++){
918 err = ubd_add(i, &error);
919 if(err)
920 printk(KERN_ERR "Failed to initialize ubd device %d :"
921 "%s\n", i, error);
922 }
dc764e50 923 mutex_unlock(&ubd_lock);
1da177e4
LT
924 return 0;
925}
926
927late_initcall(ubd_init);
928
d8d7c28e 929static int __init ubd_driver_init(void){
91acb21f
JD
930 unsigned long stack;
931 int err;
932
933 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
934 if(global_openflags.s){
935 printk(KERN_INFO "ubd: Synchronous mode\n");
936 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
937 * enough. So use anyway the io thread. */
938 }
939 stack = alloc_stack(0, 0);
6c29256c 940 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
941 &thread_fd);
942 if(io_pid < 0){
6c29256c 943 printk(KERN_ERR
91acb21f
JD
944 "ubd : Failed to start I/O thread (errno = %d) - "
945 "falling back to synchronous I/O\n", -io_pid);
946 io_pid = -1;
dc764e50 947 return 0;
91acb21f 948 }
6c29256c 949 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
7d314e34 950 IRQF_DISABLED, "ubd", ubd_devs);
91acb21f
JD
951 if(err != 0)
952 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 953 return 0;
91acb21f
JD
954}
955
956device_initcall(ubd_driver_init);
957
1da177e4
LT
958static int ubd_open(struct inode *inode, struct file *filp)
959{
960 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 961 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
962 int err = 0;
963
7d314e34
PBG
964 if(ubd_dev->count == 0){
965 err = ubd_open_dev(ubd_dev);
1da177e4
LT
966 if(err){
967 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 968 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
969 goto out;
970 }
971 }
7d314e34
PBG
972 ubd_dev->count++;
973 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
974
975 /* This should no more be needed. And it didn't work anyway to exclude
976 * read-write remounting of filesystems.*/
7d314e34 977 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 978 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 979 err = -EROFS;
2c49be99 980 }*/
1da177e4 981 out:
dc764e50 982 return err;
1da177e4
LT
983}
984
985static int ubd_release(struct inode * inode, struct file * file)
986{
987 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 988 struct ubd *ubd_dev = disk->private_data;
1da177e4 989
7d314e34 990 if(--ubd_dev->count == 0)
5f75a4f8 991 ubd_close_dev(ubd_dev);
dc764e50 992 return 0;
1da177e4
LT
993}
994
91acb21f
JD
995static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
996 __u64 *cow_offset, unsigned long *bitmap,
997 __u64 bitmap_offset, unsigned long *bitmap_words,
998 __u64 bitmap_len)
1da177e4 999{
91acb21f
JD
1000 __u64 sector = io_offset >> 9;
1001 int i, update_bitmap = 0;
1002
1003 for(i = 0; i < length >> 9; i++){
1004 if(cow_mask != NULL)
1005 ubd_set_bit(i, (unsigned char *) cow_mask);
1006 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1007 continue;
1da177e4 1008
91acb21f
JD
1009 update_bitmap = 1;
1010 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1011 }
1012
1013 if(!update_bitmap)
1014 return;
1da177e4 1015
91acb21f 1016 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1017
91acb21f
JD
1018 /* This takes care of the case where we're exactly at the end of the
1019 * device, and *cow_offset + 1 is off the end. So, just back it up
1020 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1021 * for the original diagnosis.
1022 */
1023 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1024 sizeof(unsigned long) - 1))
1025 (*cow_offset)--;
1026
1027 bitmap_words[0] = bitmap[*cow_offset];
1028 bitmap_words[1] = bitmap[*cow_offset + 1];
1029
1030 *cow_offset *= sizeof(unsigned long);
1031 *cow_offset += bitmap_offset;
1032}
1033
1034static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1035 __u64 bitmap_offset, __u64 bitmap_len)
1036{
1037 __u64 sector = req->offset >> 9;
1038 int i;
1039
1040 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1041 panic("Operation too long");
1042
1043 if(req->op == UBD_READ) {
1044 for(i = 0; i < req->length >> 9; i++){
1045 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1046 ubd_set_bit(i, (unsigned char *)
91acb21f 1047 &req->sector_mask);
dc764e50 1048 }
91acb21f
JD
1049 }
1050 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1051 &req->cow_offset, bitmap, bitmap_offset,
1052 req->bitmap_words, bitmap_len);
1da177e4
LT
1053}
1054
62f96cb0 1055/* Called with dev->lock held */
a0044bdf
JD
1056static void prepare_request(struct request *req, struct io_thread_req *io_req,
1057 unsigned long long offset, int page_offset,
1058 int len, struct page *page)
1da177e4
LT
1059{
1060 struct gendisk *disk = req->rq_disk;
7d314e34 1061 struct ubd *ubd_dev = disk->private_data;
91acb21f 1062
62f96cb0 1063 io_req->req = req;
a0044bdf
JD
1064 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1065 ubd_dev->fd;
7d314e34 1066 io_req->fds[1] = ubd_dev->fd;
91acb21f 1067 io_req->cow_offset = -1;
1da177e4
LT
1068 io_req->offset = offset;
1069 io_req->length = len;
1070 io_req->error = 0;
91acb21f
JD
1071 io_req->sector_mask = 0;
1072
1073 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1074 io_req->offsets[0] = 0;
7d314e34 1075 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1076 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1077 io_req->sectorsize = 1 << 9;
1078
7d314e34 1079 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1080 cowify_req(io_req, ubd_dev->cow.bitmap,
1081 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1082
1da177e4
LT
1083}
1084
62f96cb0 1085/* Called with dev->lock held */
165125e1 1086static void do_ubd_request(struct request_queue *q)
1da177e4 1087{
2adcec21 1088 struct io_thread_req *io_req;
1da177e4 1089 struct request *req;
0a6d3a2a 1090 int n, last_sectors;
a0044bdf
JD
1091
1092 while(1){
2a9529a0 1093 struct ubd *dev = q->queuedata;
a0044bdf
JD
1094 if(dev->end_sg == 0){
1095 struct request *req = elv_next_request(q);
1096 if(req == NULL)
1097 return;
1098
1099 dev->request = req;
1100 blkdev_dequeue_request(req);
1101 dev->start_sg = 0;
1102 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1103 }
1104
1105 req = dev->request;
0a6d3a2a 1106 last_sectors = 0;
a0044bdf
JD
1107 while(dev->start_sg < dev->end_sg){
1108 struct scatterlist *sg = &dev->sg[dev->start_sg];
1109
0a6d3a2a 1110 req->sector += last_sectors;
2adcec21 1111 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1112 GFP_ATOMIC);
2adcec21
JD
1113 if(io_req == NULL){
1114 if(list_empty(&dev->restart))
1115 list_add(&dev->restart, &restart);
1116 return;
1117 }
1118 prepare_request(req, io_req,
a0044bdf 1119 (unsigned long long) req->sector << 9,
45711f1a 1120 sg->offset, sg->length, sg_page(sg));
a0044bdf 1121
0a6d3a2a 1122 last_sectors = sg->length >> 9;
a6ea4cce
JD
1123 n = os_write_file(thread_fd, &io_req,
1124 sizeof(struct io_thread_req *));
2adcec21 1125 if(n != sizeof(struct io_thread_req *)){
a0044bdf
JD
1126 if(n != -EAGAIN)
1127 printk("write to io thread failed, "
1128 "errno = %d\n", -n);
1129 else if(list_empty(&dev->restart))
1130 list_add(&dev->restart, &restart);
1131 return;
1132 }
1133
a0044bdf 1134 dev->start_sg++;
1da177e4 1135 }
a0044bdf
JD
1136 dev->end_sg = 0;
1137 dev->request = NULL;
1da177e4
LT
1138 }
1139}
1140
a885c8c4
CH
1141static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1142{
7d314e34 1143 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1144
1145 geo->heads = 128;
1146 geo->sectors = 32;
7d314e34 1147 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1148 return 0;
1149}
1150
1da177e4
LT
1151static int ubd_ioctl(struct inode * inode, struct file * file,
1152 unsigned int cmd, unsigned long arg)
1153{
7d314e34 1154 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1da177e4
LT
1155 struct hd_driveid ubd_id = {
1156 .cyls = 0,
1157 .heads = 128,
1158 .sectors = 32,
1159 };
1160
1161 switch (cmd) {
1da177e4 1162 struct cdrom_volctrl volume;
1da177e4 1163 case HDIO_GET_IDENTITY:
7d314e34 1164 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1da177e4
LT
1165 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1166 sizeof(ubd_id)))
dc764e50
JD
1167 return -EFAULT;
1168 return 0;
b8831a1d 1169
1da177e4
LT
1170 case CDROMVOLREAD:
1171 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1172 return -EFAULT;
1da177e4
LT
1173 volume.channel0 = 255;
1174 volume.channel1 = 255;
1175 volume.channel2 = 255;
1176 volume.channel3 = 255;
1177 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1178 return -EFAULT;
1179 return 0;
1da177e4 1180 }
dc764e50 1181 return -EINVAL;
1da177e4
LT
1182}
1183
4833aff7 1184static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1da177e4
LT
1185{
1186 struct uml_stat buf1, buf2;
1187 int err;
1188
4833aff7
PBG
1189 if(from_cmdline == NULL)
1190 return 0;
1191 if(!strcmp(from_cmdline, from_cow))
1192 return 0;
1da177e4
LT
1193
1194 err = os_stat_file(from_cmdline, &buf1);
1195 if(err < 0){
1196 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
4833aff7 1197 return 0;
1da177e4
LT
1198 }
1199 err = os_stat_file(from_cow, &buf2);
1200 if(err < 0){
1201 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
4833aff7 1202 return 1;
1da177e4
LT
1203 }
1204 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
4833aff7 1205 return 0;
1da177e4
LT
1206
1207 printk("Backing file mismatch - \"%s\" requested,\n"
1208 "\"%s\" specified in COW header of \"%s\"\n",
1209 from_cmdline, from_cow, cow);
4833aff7 1210 return 1;
1da177e4
LT
1211}
1212
1213static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1214{
1215 unsigned long modtime;
fe1db50c 1216 unsigned long long actual;
1da177e4
LT
1217 int err;
1218
1219 err = os_file_modtime(file, &modtime);
1220 if(err < 0){
1221 printk("Failed to get modification time of backing file "
1222 "\"%s\", err = %d\n", file, -err);
dc764e50 1223 return err;
1da177e4
LT
1224 }
1225
1226 err = os_file_size(file, &actual);
1227 if(err < 0){
1228 printk("Failed to get size of backing file \"%s\", "
1229 "err = %d\n", file, -err);
dc764e50 1230 return err;
1da177e4
LT
1231 }
1232
dc764e50 1233 if(actual != size){
1da177e4
LT
1234 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1235 * the typecast.*/
1236 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1237 "file\n", (unsigned long long) size, actual);
dc764e50 1238 return -EINVAL;
1da177e4
LT
1239 }
1240 if(modtime != mtime){
1241 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1242 "file\n", mtime, modtime);
dc764e50 1243 return -EINVAL;
1da177e4 1244 }
dc764e50 1245 return 0;
1da177e4
LT
1246}
1247
1248int read_cow_bitmap(int fd, void *buf, int offset, int len)
1249{
1250 int err;
1251
1252 err = os_seek_file(fd, offset);
1253 if(err < 0)
dc764e50 1254 return err;
1da177e4 1255
dc764e50 1256 err = os_read_file(fd, buf, len);
1da177e4 1257 if(err < 0)
dc764e50 1258 return err;
1da177e4 1259
dc764e50 1260 return 0;
1da177e4
LT
1261}
1262
6c29256c 1263int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
1264 char **backing_file_out, int *bitmap_offset_out,
1265 unsigned long *bitmap_len_out, int *data_offset_out,
1266 int *create_cow_out)
1267{
1268 time_t mtime;
1269 unsigned long long size;
1270 __u32 version, align;
1271 char *backing_file;
4833aff7 1272 int fd, err, sectorsize, asked_switch, mode = 0644;
1da177e4
LT
1273
1274 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1275 if (fd < 0) {
1276 if ((fd == -ENOENT) && (create_cow_out != NULL))
1da177e4 1277 *create_cow_out = 1;
dc764e50
JD
1278 if (!openflags->w ||
1279 ((fd != -EROFS) && (fd != -EACCES)))
a374a48f 1280 return fd;
1da177e4
LT
1281 openflags->w = 0;
1282 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1283 if (fd < 0)
1284 return fd;
dc764e50 1285 }
1da177e4 1286
6c29256c
JD
1287 if(shared)
1288 printk("Not locking \"%s\" on the host\n", file);
1289 else {
1290 err = os_lock_file(fd, openflags->w);
1291 if(err < 0){
1292 printk("Failed to lock '%s', err = %d\n", file, -err);
1293 goto out_close;
1294 }
1da177e4
LT
1295 }
1296
d6e05edc 1297 /* Successful return case! */
a374a48f 1298 if(backing_file_out == NULL)
dc764e50 1299 return fd;
1da177e4
LT
1300
1301 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1302 &size, &sectorsize, &align, bitmap_offset_out);
1303 if(err && (*backing_file_out != NULL)){
1304 printk("Failed to read COW header from COW file \"%s\", "
1305 "errno = %d\n", file, -err);
1306 goto out_close;
1307 }
a374a48f 1308 if(err)
dc764e50 1309 return fd;
1da177e4 1310
4833aff7 1311 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1da177e4 1312
4833aff7
PBG
1313 /* Allow switching only if no mismatch. */
1314 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1da177e4
LT
1315 printk("Switching backing file to '%s'\n", *backing_file_out);
1316 err = write_cow_header(file, fd, *backing_file_out,
1317 sectorsize, align, &size);
a374a48f 1318 if (err) {
1da177e4 1319 printk("Switch failed, errno = %d\n", -err);
4833aff7 1320 goto out_close;
1da177e4 1321 }
a374a48f 1322 } else {
1da177e4
LT
1323 *backing_file_out = backing_file;
1324 err = backing_file_mismatch(*backing_file_out, size, mtime);
a374a48f
PBG
1325 if (err)
1326 goto out_close;
1da177e4
LT
1327 }
1328
1329 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1330 bitmap_len_out, data_offset_out);
1331
dc764e50 1332 return fd;
1da177e4
LT
1333 out_close:
1334 os_close_file(fd);
a374a48f 1335 return err;
1da177e4
LT
1336}
1337
1338int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1339 int sectorsize, int alignment, int *bitmap_offset_out,
1340 unsigned long *bitmap_len_out, int *data_offset_out)
1341{
1342 int err, fd;
1343
1344 flags.c = 1;
6c29256c 1345 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1da177e4
LT
1346 if(fd < 0){
1347 err = fd;
1348 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1349 -err);
1350 goto out;
1351 }
1352
1353 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1354 bitmap_offset_out, bitmap_len_out,
1355 data_offset_out);
1356 if(!err)
dc764e50 1357 return fd;
1da177e4
LT
1358 os_close_file(fd);
1359 out:
dc764e50 1360 return err;
1da177e4
LT
1361}
1362
91acb21f 1363static int update_bitmap(struct io_thread_req *req)
1da177e4 1364{
91acb21f 1365 int n;
1da177e4 1366
91acb21f 1367 if(req->cow_offset == -1)
dc764e50 1368 return 0;
1da177e4 1369
91acb21f
JD
1370 n = os_seek_file(req->fds[1], req->cow_offset);
1371 if(n < 0){
1372 printk("do_io - bitmap lseek failed : err = %d\n", -n);
dc764e50 1373 return 1;
91acb21f 1374 }
1da177e4 1375
a6ea4cce
JD
1376 n = os_write_file(req->fds[1], &req->bitmap_words,
1377 sizeof(req->bitmap_words));
91acb21f
JD
1378 if(n != sizeof(req->bitmap_words)){
1379 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1380 req->fds[1]);
dc764e50 1381 return 1;
91acb21f 1382 }
1da177e4 1383
dc764e50 1384 return 0;
91acb21f 1385}
1da177e4 1386
91acb21f
JD
1387void do_io(struct io_thread_req *req)
1388{
1389 char *buf;
1390 unsigned long len;
1391 int n, nsectors, start, end, bit;
1392 int err;
1393 __u64 off;
1394
1395 nsectors = req->length / req->sectorsize;
1396 start = 0;
1397 do {
1398 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1399 end = start;
1400 while((end < nsectors) &&
1401 (ubd_test_bit(end, (unsigned char *)
1402 &req->sector_mask) == bit))
1403 end++;
1404
1405 off = req->offset + req->offsets[bit] +
1406 start * req->sectorsize;
1407 len = (end - start) * req->sectorsize;
1408 buf = &req->buffer[start * req->sectorsize];
1409
1410 err = os_seek_file(req->fds[bit], off);
1411 if(err < 0){
1412 printk("do_io - lseek failed : err = %d\n", -err);
1413 req->error = 1;
1414 return;
1415 }
1416 if(req->op == UBD_READ){
1417 n = 0;
1418 do {
1419 buf = &buf[n];
1420 len -= n;
a6ea4cce 1421 n = os_read_file(req->fds[bit], buf, len);
91acb21f
JD
1422 if (n < 0) {
1423 printk("do_io - read failed, err = %d "
1424 "fd = %d\n", -n, req->fds[bit]);
1425 req->error = 1;
1426 return;
1427 }
1428 } while((n < len) && (n != 0));
1429 if (n < len) memset(&buf[n], 0, len - n);
1430 } else {
a6ea4cce 1431 n = os_write_file(req->fds[bit], buf, len);
91acb21f
JD
1432 if(n != len){
1433 printk("do_io - write failed err = %d "
1434 "fd = %d\n", -n, req->fds[bit]);
1435 req->error = 1;
1436 return;
1437 }
1438 }
1439
1440 start = end;
1441 } while(start < nsectors);
1da177e4 1442
91acb21f 1443 req->error = update_bitmap(req);
1da177e4 1444}
91acb21f
JD
1445
1446/* Changed in start_io_thread, which is serialized by being called only
1447 * from ubd_init, which is an initcall.
1448 */
1449int kernel_fd = -1;
1450
d8d7c28e
PBG
1451/* Only changed by the io thread. XXX: currently unused. */
1452static int io_count = 0;
91acb21f
JD
1453
1454int io_thread(void *arg)
1455{
2adcec21 1456 struct io_thread_req *req;
91acb21f
JD
1457 int n;
1458
1459 ignore_sigwinch_sig();
1460 while(1){
a6ea4cce 1461 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1462 sizeof(struct io_thread_req *));
1463 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1464 if(n < 0)
1465 printk("io_thread - read failed, fd = %d, "
1466 "err = %d\n", kernel_fd, -n);
1467 else {
1468 printk("io_thread - short read, fd = %d, "
1469 "length = %d\n", kernel_fd, n);
1470 }
1471 continue;
1472 }
1473 io_count++;
2adcec21 1474 do_io(req);
a6ea4cce 1475 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1476 sizeof(struct io_thread_req *));
1477 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1478 printk("io_thread - write failed, fd = %d, err = %d\n",
1479 kernel_fd, -n);
1480 }
91acb21f 1481
1b57e9c2
JD
1482 return 0;
1483}
This page took 0.474764 seconds and 5 git commands to generate.