1 #include <linux/module.h>
3 #include <linux/moduleparam.h>
4 #include <linux/sched.h>
6 #include <linux/blkdev.h>
7 #include <linux/init.h>
8 #include <linux/slab.h>
9 #include <linux/blk-mq.h>
10 #include <linux/hrtimer.h>
11 #include <linux/lightnvm.h>
14 struct list_head list
;
15 struct llist_node ll_list
;
16 struct call_single_data csd
;
20 struct nullb_queue
*nq
;
24 unsigned long *tag_map
;
25 wait_queue_head_t wait
;
26 unsigned int queue_depth
;
28 struct nullb_cmd
*cmds
;
32 struct list_head list
;
34 struct request_queue
*q
;
36 struct blk_mq_tag_set tag_set
;
38 unsigned int queue_depth
;
41 struct nullb_queue
*queues
;
42 unsigned int nr_queues
;
43 char disk_name
[DISK_NAME_LEN
];
46 static LIST_HEAD(nullb_list
);
47 static struct mutex lock
;
48 static int null_major
;
49 static int nullb_indexes
;
50 static struct kmem_cache
*ppa_cache
;
52 struct completion_queue
{
53 struct llist_head list
;
58 * These are per-cpu for now, they will need to be configured by the
59 * complete_queues parameter and appropriately mapped.
61 static DEFINE_PER_CPU(struct completion_queue
, completion_queues
);
75 static int submit_queues
;
76 module_param(submit_queues
, int, S_IRUGO
);
77 MODULE_PARM_DESC(submit_queues
, "Number of submission queues");
79 static int home_node
= NUMA_NO_NODE
;
80 module_param(home_node
, int, S_IRUGO
);
81 MODULE_PARM_DESC(home_node
, "Home node for the device");
83 static int queue_mode
= NULL_Q_MQ
;
85 static int null_param_store_val(const char *str
, int *val
, int min
, int max
)
89 ret
= kstrtoint(str
, 10, &new_val
);
93 if (new_val
< min
|| new_val
> max
)
100 static int null_set_queue_mode(const char *str
, const struct kernel_param
*kp
)
102 return null_param_store_val(str
, &queue_mode
, NULL_Q_BIO
, NULL_Q_MQ
);
105 static const struct kernel_param_ops null_queue_mode_param_ops
= {
106 .set
= null_set_queue_mode
,
107 .get
= param_get_int
,
110 device_param_cb(queue_mode
, &null_queue_mode_param_ops
, &queue_mode
, S_IRUGO
);
111 MODULE_PARM_DESC(queue_mode
, "Block interface to use (0=bio,1=rq,2=multiqueue)");
114 module_param(gb
, int, S_IRUGO
);
115 MODULE_PARM_DESC(gb
, "Size in GB");
118 module_param(bs
, int, S_IRUGO
);
119 MODULE_PARM_DESC(bs
, "Block size (in bytes)");
121 static int nr_devices
= 2;
122 module_param(nr_devices
, int, S_IRUGO
);
123 MODULE_PARM_DESC(nr_devices
, "Number of devices to register");
125 static bool use_lightnvm
;
126 module_param(use_lightnvm
, bool, S_IRUGO
);
127 MODULE_PARM_DESC(use_lightnvm
, "Register as a LightNVM device");
129 static int irqmode
= NULL_IRQ_SOFTIRQ
;
131 static int null_set_irqmode(const char *str
, const struct kernel_param
*kp
)
133 return null_param_store_val(str
, &irqmode
, NULL_IRQ_NONE
,
137 static const struct kernel_param_ops null_irqmode_param_ops
= {
138 .set
= null_set_irqmode
,
139 .get
= param_get_int
,
142 device_param_cb(irqmode
, &null_irqmode_param_ops
, &irqmode
, S_IRUGO
);
143 MODULE_PARM_DESC(irqmode
, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
145 static int completion_nsec
= 10000;
146 module_param(completion_nsec
, int, S_IRUGO
);
147 MODULE_PARM_DESC(completion_nsec
, "Time in ns to complete a request in hardware. Default: 10,000ns");
149 static int hw_queue_depth
= 64;
150 module_param(hw_queue_depth
, int, S_IRUGO
);
151 MODULE_PARM_DESC(hw_queue_depth
, "Queue depth for each hardware queue. Default: 64");
153 static bool use_per_node_hctx
= false;
154 module_param(use_per_node_hctx
, bool, S_IRUGO
);
155 MODULE_PARM_DESC(use_per_node_hctx
, "Use per-node allocation for hardware context queues. Default: false");
157 static void put_tag(struct nullb_queue
*nq
, unsigned int tag
)
159 clear_bit_unlock(tag
, nq
->tag_map
);
161 if (waitqueue_active(&nq
->wait
))
165 static unsigned int get_tag(struct nullb_queue
*nq
)
170 tag
= find_first_zero_bit(nq
->tag_map
, nq
->queue_depth
);
171 if (tag
>= nq
->queue_depth
)
173 } while (test_and_set_bit_lock(tag
, nq
->tag_map
));
178 static void free_cmd(struct nullb_cmd
*cmd
)
180 put_tag(cmd
->nq
, cmd
->tag
);
183 static struct nullb_cmd
*__alloc_cmd(struct nullb_queue
*nq
)
185 struct nullb_cmd
*cmd
;
190 cmd
= &nq
->cmds
[tag
];
199 static struct nullb_cmd
*alloc_cmd(struct nullb_queue
*nq
, int can_wait
)
201 struct nullb_cmd
*cmd
;
204 cmd
= __alloc_cmd(nq
);
205 if (cmd
|| !can_wait
)
209 prepare_to_wait(&nq
->wait
, &wait
, TASK_UNINTERRUPTIBLE
);
210 cmd
= __alloc_cmd(nq
);
217 finish_wait(&nq
->wait
, &wait
);
221 static void end_cmd(struct nullb_cmd
*cmd
)
223 switch (queue_mode
) {
225 blk_mq_end_request(cmd
->rq
, 0);
228 INIT_LIST_HEAD(&cmd
->rq
->queuelist
);
229 blk_end_request_all(cmd
->rq
, 0);
239 static enum hrtimer_restart
null_cmd_timer_expired(struct hrtimer
*timer
)
241 struct completion_queue
*cq
;
242 struct llist_node
*entry
;
243 struct nullb_cmd
*cmd
;
245 cq
= &per_cpu(completion_queues
, smp_processor_id());
247 while ((entry
= llist_del_all(&cq
->list
)) != NULL
) {
248 entry
= llist_reverse_order(entry
);
250 struct request_queue
*q
= NULL
;
252 cmd
= container_of(entry
, struct nullb_cmd
, ll_list
);
258 if (q
&& !q
->mq_ops
&& blk_queue_stopped(q
)) {
259 spin_lock(q
->queue_lock
);
260 if (blk_queue_stopped(q
))
262 spin_unlock(q
->queue_lock
);
267 return HRTIMER_NORESTART
;
270 static void null_cmd_end_timer(struct nullb_cmd
*cmd
)
272 struct completion_queue
*cq
= &per_cpu(completion_queues
, get_cpu());
274 cmd
->ll_list
.next
= NULL
;
275 if (llist_add(&cmd
->ll_list
, &cq
->list
)) {
276 ktime_t kt
= ktime_set(0, completion_nsec
);
278 hrtimer_start(&cq
->timer
, kt
, HRTIMER_MODE_REL_PINNED
);
284 static void null_softirq_done_fn(struct request
*rq
)
286 if (queue_mode
== NULL_Q_MQ
)
287 end_cmd(blk_mq_rq_to_pdu(rq
));
289 end_cmd(rq
->special
);
292 static inline void null_handle_cmd(struct nullb_cmd
*cmd
)
294 /* Complete IO by inline, softirq or timer */
296 case NULL_IRQ_SOFTIRQ
:
297 switch (queue_mode
) {
299 blk_mq_complete_request(cmd
->rq
, cmd
->rq
->errors
);
302 blk_complete_request(cmd
->rq
);
306 * XXX: no proper submitting cpu information available.
316 null_cmd_end_timer(cmd
);
321 static struct nullb_queue
*nullb_to_queue(struct nullb
*nullb
)
325 if (nullb
->nr_queues
!= 1)
326 index
= raw_smp_processor_id() / ((nr_cpu_ids
+ nullb
->nr_queues
- 1) / nullb
->nr_queues
);
328 return &nullb
->queues
[index
];
331 static blk_qc_t
null_queue_bio(struct request_queue
*q
, struct bio
*bio
)
333 struct nullb
*nullb
= q
->queuedata
;
334 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
335 struct nullb_cmd
*cmd
;
337 cmd
= alloc_cmd(nq
, 1);
340 null_handle_cmd(cmd
);
341 return BLK_QC_T_NONE
;
344 static int null_rq_prep_fn(struct request_queue
*q
, struct request
*req
)
346 struct nullb
*nullb
= q
->queuedata
;
347 struct nullb_queue
*nq
= nullb_to_queue(nullb
);
348 struct nullb_cmd
*cmd
;
350 cmd
= alloc_cmd(nq
, 0);
358 return BLKPREP_DEFER
;
361 static void null_request_fn(struct request_queue
*q
)
365 while ((rq
= blk_fetch_request(q
)) != NULL
) {
366 struct nullb_cmd
*cmd
= rq
->special
;
368 spin_unlock_irq(q
->queue_lock
);
369 null_handle_cmd(cmd
);
370 spin_lock_irq(q
->queue_lock
);
374 static int null_queue_rq(struct blk_mq_hw_ctx
*hctx
,
375 const struct blk_mq_queue_data
*bd
)
377 struct nullb_cmd
*cmd
= blk_mq_rq_to_pdu(bd
->rq
);
380 cmd
->nq
= hctx
->driver_data
;
382 blk_mq_start_request(bd
->rq
);
384 null_handle_cmd(cmd
);
385 return BLK_MQ_RQ_QUEUE_OK
;
388 static void null_init_queue(struct nullb
*nullb
, struct nullb_queue
*nq
)
393 init_waitqueue_head(&nq
->wait
);
394 nq
->queue_depth
= nullb
->queue_depth
;
397 static int null_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
400 struct nullb
*nullb
= data
;
401 struct nullb_queue
*nq
= &nullb
->queues
[index
];
403 hctx
->driver_data
= nq
;
404 null_init_queue(nullb
, nq
);
410 static struct blk_mq_ops null_mq_ops
= {
411 .queue_rq
= null_queue_rq
,
412 .map_queue
= blk_mq_map_queue
,
413 .init_hctx
= null_init_hctx
,
414 .complete
= null_softirq_done_fn
,
417 static void cleanup_queue(struct nullb_queue
*nq
)
423 static void cleanup_queues(struct nullb
*nullb
)
427 for (i
= 0; i
< nullb
->nr_queues
; i
++)
428 cleanup_queue(&nullb
->queues
[i
]);
430 kfree(nullb
->queues
);
433 static void null_del_dev(struct nullb
*nullb
)
435 list_del_init(&nullb
->list
);
438 nvm_unregister(nullb
->disk_name
);
440 del_gendisk(nullb
->disk
);
441 blk_cleanup_queue(nullb
->q
);
442 if (queue_mode
== NULL_Q_MQ
)
443 blk_mq_free_tag_set(&nullb
->tag_set
);
445 put_disk(nullb
->disk
);
446 cleanup_queues(nullb
);
452 static void null_lnvm_end_io(struct request
*rq
, int error
)
454 struct nvm_rq
*rqd
= rq
->end_io_data
;
455 struct nvm_dev
*dev
= rqd
->dev
;
457 dev
->mt
->end_io(rqd
, error
);
462 static int null_lnvm_submit_io(struct request_queue
*q
, struct nvm_rq
*rqd
)
465 struct bio
*bio
= rqd
->bio
;
467 rq
= blk_mq_alloc_request(q
, bio_rw(bio
), GFP_KERNEL
, 0);
471 rq
->cmd_type
= REQ_TYPE_DRV_PRIV
;
472 rq
->__sector
= bio
->bi_iter
.bi_sector
;
473 rq
->ioprio
= bio_prio(bio
);
475 if (bio_has_data(bio
))
476 rq
->nr_phys_segments
= bio_phys_segments(q
, bio
);
478 rq
->__data_len
= bio
->bi_iter
.bi_size
;
479 rq
->bio
= rq
->biotail
= bio
;
481 rq
->end_io_data
= rqd
;
483 blk_execute_rq_nowait(q
, NULL
, rq
, 0, null_lnvm_end_io
);
488 static int null_lnvm_id(struct request_queue
*q
, struct nvm_id
*id
)
490 sector_t size
= gb
* 1024 * 1024 * 1024ULL;
492 struct nvm_id_group
*grp
;
500 id
->ppaf
.blk_offset
= 0;
501 id
->ppaf
.blk_len
= 16;
502 id
->ppaf
.pg_offset
= 16;
503 id
->ppaf
.pg_len
= 16;
504 id
->ppaf
.sect_offset
= 32;
505 id
->ppaf
.sect_len
= 8;
506 id
->ppaf
.pln_offset
= 40;
507 id
->ppaf
.pln_len
= 8;
508 id
->ppaf
.lun_offset
= 48;
509 id
->ppaf
.lun_len
= 8;
510 id
->ppaf
.ch_offset
= 56;
513 do_div(size
, bs
); /* convert size to pages */
514 do_div(size
, 256); /* concert size to pgs pr blk */
515 grp
= &id
->groups
[0];
521 do_div(size
, (1 << 16));
522 grp
->num_lun
= size
+ 1;
523 do_div(blksize
, grp
->num_lun
);
524 grp
->num_blk
= blksize
;
535 grp
->mpos
= 0x010101; /* single plane rwe */
536 grp
->cpar
= hw_queue_depth
;
541 static void *null_lnvm_create_dma_pool(struct request_queue
*q
, char *name
)
543 mempool_t
*virtmem_pool
;
545 virtmem_pool
= mempool_create_slab_pool(64, ppa_cache
);
547 pr_err("null_blk: Unable to create virtual memory pool\n");
554 static void null_lnvm_destroy_dma_pool(void *pool
)
556 mempool_destroy(pool
);
559 static void *null_lnvm_dev_dma_alloc(struct request_queue
*q
, void *pool
,
560 gfp_t mem_flags
, dma_addr_t
*dma_handler
)
562 return mempool_alloc(pool
, mem_flags
);
565 static void null_lnvm_dev_dma_free(void *pool
, void *entry
,
566 dma_addr_t dma_handler
)
568 mempool_free(entry
, pool
);
571 static struct nvm_dev_ops null_lnvm_dev_ops
= {
572 .identity
= null_lnvm_id
,
573 .submit_io
= null_lnvm_submit_io
,
575 .create_dma_pool
= null_lnvm_create_dma_pool
,
576 .destroy_dma_pool
= null_lnvm_destroy_dma_pool
,
577 .dev_dma_alloc
= null_lnvm_dev_dma_alloc
,
578 .dev_dma_free
= null_lnvm_dev_dma_free
,
580 /* Simulate nvme protocol restriction */
584 static struct nvm_dev_ops null_lnvm_dev_ops
;
585 #endif /* CONFIG_NVM */
587 static int null_open(struct block_device
*bdev
, fmode_t mode
)
592 static void null_release(struct gendisk
*disk
, fmode_t mode
)
596 static const struct block_device_operations null_fops
= {
597 .owner
= THIS_MODULE
,
599 .release
= null_release
,
602 static int setup_commands(struct nullb_queue
*nq
)
604 struct nullb_cmd
*cmd
;
607 nq
->cmds
= kzalloc(nq
->queue_depth
* sizeof(*cmd
), GFP_KERNEL
);
611 tag_size
= ALIGN(nq
->queue_depth
, BITS_PER_LONG
) / BITS_PER_LONG
;
612 nq
->tag_map
= kzalloc(tag_size
* sizeof(unsigned long), GFP_KERNEL
);
618 for (i
= 0; i
< nq
->queue_depth
; i
++) {
620 INIT_LIST_HEAD(&cmd
->list
);
621 cmd
->ll_list
.next
= NULL
;
628 static int setup_queues(struct nullb
*nullb
)
630 nullb
->queues
= kzalloc(submit_queues
* sizeof(struct nullb_queue
),
635 nullb
->nr_queues
= 0;
636 nullb
->queue_depth
= hw_queue_depth
;
641 static int init_driver_queues(struct nullb
*nullb
)
643 struct nullb_queue
*nq
;
646 for (i
= 0; i
< submit_queues
; i
++) {
647 nq
= &nullb
->queues
[i
];
649 null_init_queue(nullb
, nq
);
651 ret
= setup_commands(nq
);
659 static int null_add_dev(void)
661 struct gendisk
*disk
;
666 nullb
= kzalloc_node(sizeof(*nullb
), GFP_KERNEL
, home_node
);
672 spin_lock_init(&nullb
->lock
);
674 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
)
675 submit_queues
= nr_online_nodes
;
677 rv
= setup_queues(nullb
);
681 if (queue_mode
== NULL_Q_MQ
) {
682 nullb
->tag_set
.ops
= &null_mq_ops
;
683 nullb
->tag_set
.nr_hw_queues
= submit_queues
;
684 nullb
->tag_set
.queue_depth
= hw_queue_depth
;
685 nullb
->tag_set
.numa_node
= home_node
;
686 nullb
->tag_set
.cmd_size
= sizeof(struct nullb_cmd
);
687 nullb
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
688 nullb
->tag_set
.driver_data
= nullb
;
690 rv
= blk_mq_alloc_tag_set(&nullb
->tag_set
);
692 goto out_cleanup_queues
;
694 nullb
->q
= blk_mq_init_queue(&nullb
->tag_set
);
695 if (IS_ERR(nullb
->q
)) {
697 goto out_cleanup_tags
;
699 } else if (queue_mode
== NULL_Q_BIO
) {
700 nullb
->q
= blk_alloc_queue_node(GFP_KERNEL
, home_node
);
703 goto out_cleanup_queues
;
705 blk_queue_make_request(nullb
->q
, null_queue_bio
);
706 rv
= init_driver_queues(nullb
);
708 goto out_cleanup_blk_queue
;
710 nullb
->q
= blk_init_queue_node(null_request_fn
, &nullb
->lock
, home_node
);
713 goto out_cleanup_queues
;
715 blk_queue_prep_rq(nullb
->q
, null_rq_prep_fn
);
716 blk_queue_softirq_done(nullb
->q
, null_softirq_done_fn
);
717 rv
= init_driver_queues(nullb
);
719 goto out_cleanup_blk_queue
;
722 nullb
->q
->queuedata
= nullb
;
723 queue_flag_set_unlocked(QUEUE_FLAG_NONROT
, nullb
->q
);
724 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM
, nullb
->q
);
728 list_add_tail(&nullb
->list
, &nullb_list
);
729 nullb
->index
= nullb_indexes
++;
732 blk_queue_logical_block_size(nullb
->q
, bs
);
733 blk_queue_physical_block_size(nullb
->q
, bs
);
735 sprintf(nullb
->disk_name
, "nullb%d", nullb
->index
);
738 rv
= nvm_register(nullb
->q
, nullb
->disk_name
,
741 goto out_cleanup_blk_queue
;
745 disk
= nullb
->disk
= alloc_disk_node(1, home_node
);
748 goto out_cleanup_lightnvm
;
750 size
= gb
* 1024 * 1024 * 1024ULL;
751 set_capacity(disk
, size
>> 9);
753 disk
->flags
|= GENHD_FL_EXT_DEVT
| GENHD_FL_SUPPRESS_PARTITION_INFO
;
754 disk
->major
= null_major
;
755 disk
->first_minor
= nullb
->index
;
756 disk
->fops
= &null_fops
;
757 disk
->private_data
= nullb
;
758 disk
->queue
= nullb
->q
;
759 strncpy(disk
->disk_name
, nullb
->disk_name
, DISK_NAME_LEN
);
765 out_cleanup_lightnvm
:
767 nvm_unregister(nullb
->disk_name
);
768 out_cleanup_blk_queue
:
769 blk_cleanup_queue(nullb
->q
);
771 if (queue_mode
== NULL_Q_MQ
)
772 blk_mq_free_tag_set(&nullb
->tag_set
);
774 cleanup_queues(nullb
);
781 static int __init
null_init(void)
785 if (bs
> PAGE_SIZE
) {
786 pr_warn("null_blk: invalid block size\n");
787 pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE
);
791 if (use_lightnvm
&& bs
!= 4096) {
792 pr_warn("null_blk: LightNVM only supports 4k block size\n");
793 pr_warn("null_blk: defaults block size to 4k\n");
797 if (use_lightnvm
&& queue_mode
!= NULL_Q_MQ
) {
798 pr_warn("null_blk: LightNVM only supported for blk-mq\n");
799 pr_warn("null_blk: defaults queue mode to blk-mq\n");
800 queue_mode
= NULL_Q_MQ
;
803 if (queue_mode
== NULL_Q_MQ
&& use_per_node_hctx
) {
804 if (submit_queues
< nr_online_nodes
) {
805 pr_warn("null_blk: submit_queues param is set to %u.",
807 submit_queues
= nr_online_nodes
;
809 } else if (submit_queues
> nr_cpu_ids
)
810 submit_queues
= nr_cpu_ids
;
811 else if (!submit_queues
)
816 /* Initialize a separate list for each CPU for issuing softirqs */
817 for_each_possible_cpu(i
) {
818 struct completion_queue
*cq
= &per_cpu(completion_queues
, i
);
820 init_llist_head(&cq
->list
);
822 if (irqmode
!= NULL_IRQ_TIMER
)
825 hrtimer_init(&cq
->timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
826 cq
->timer
.function
= null_cmd_timer_expired
;
829 null_major
= register_blkdev(0, "nullb");
834 ppa_cache
= kmem_cache_create("ppa_cache", 64 * sizeof(u64
),
837 pr_err("null_blk: unable to create ppa cache\n");
842 for (i
= 0; i
< nr_devices
; i
++) {
843 if (null_add_dev()) {
844 unregister_blkdev(null_major
, "nullb");
849 pr_info("null: module loaded\n");
852 kmem_cache_destroy(ppa_cache
);
856 static void __exit
null_exit(void)
860 unregister_blkdev(null_major
, "nullb");
863 while (!list_empty(&nullb_list
)) {
864 nullb
= list_entry(nullb_list
.next
, struct nullb
, list
);
869 kmem_cache_destroy(ppa_cache
);
872 module_init(null_init
);
873 module_exit(null_exit
);
875 MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
876 MODULE_LICENSE("GPL");