Merge remote-tracking branch 'nvdimm/libnvdimm-for-next'
[deliverable/linux.git] / drivers / dax / dax.c
CommitLineData
ab68f262
DW
1/*
2 * Copyright(c) 2016 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h>
14#include <linux/module.h>
15#include <linux/device.h>
3bc52c45 16#include <linux/mount.h>
ab68f262 17#include <linux/pfn_t.h>
3bc52c45 18#include <linux/hash.h>
ba09c01d 19#include <linux/cdev.h>
ab68f262
DW
20#include <linux/slab.h>
21#include <linux/dax.h>
22#include <linux/fs.h>
23#include <linux/mm.h>
ccdb07f6 24#include "dax.h"
ab68f262 25
ba09c01d 26static dev_t dax_devt;
ab68f262
DW
27static struct class *dax_class;
28static DEFINE_IDA(dax_minor_ida);
ba09c01d
DW
29static int nr_dax = CONFIG_NR_DEV_DAX;
30module_param(nr_dax, int, S_IRUGO);
3bc52c45
DW
31static struct vfsmount *dax_mnt;
32static struct kmem_cache *dax_cache __read_mostly;
33static struct super_block *dax_superblock __read_mostly;
ba09c01d 34MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");
ab68f262
DW
35
36/**
37 * struct dax_region - mapping infrastructure for dax devices
38 * @id: kernel-wide unique region for a memory range
39 * @base: linear address corresponding to @res
40 * @kref: to pin while other agents have a need to do lookups
70f93a37 41 * @lock: synchronize changes / consistent-access to the resource tree (@res)
ab68f262 42 * @dev: parent device backing this region
d4c6b777 43 * @seed: next device for dynamic allocation / configuration
ab68f262
DW
44 * @align: allocation and mapping alignment for child dax devices
45 * @res: physical address range of the region
d4c6b777 46 * @child_count: number of registered dax device instances
ab68f262
DW
47 * @pfn_flags: identify whether the pfns are paged back or not
48 */
49struct dax_region {
50 int id;
51 struct ida ida;
52 void *base;
53 struct kref kref;
70f93a37 54 struct mutex lock;
ab68f262 55 struct device *dev;
d4c6b777 56 struct device *seed;
ab68f262
DW
57 unsigned int align;
58 struct resource res;
d4c6b777 59 atomic_t child_count;
ab68f262
DW
60 unsigned long pfn_flags;
61};
62
63/**
64 * struct dax_dev - subdivision of a dax region
65 * @region - parent region
66 * @dev - device backing the character device
ba09c01d 67 * @cdev - core chardev data
dee41079 68 * @alive - !alive + rcu grace period == no new mappings can be established
ab68f262
DW
69 * @id - child id in the region
70 * @num_resources - number of physical address extents in this device
71 * @res - array of physical address ranges
72 */
73struct dax_dev {
74 struct dax_region *region;
3bc52c45 75 struct inode *inode;
ebd84d72 76 struct device dev;
ba09c01d 77 struct cdev cdev;
dee41079 78 bool alive;
ab68f262
DW
79 int id;
80 int num_resources;
70f93a37 81 struct resource **res;
ab68f262
DW
82};
83
70f93a37
DW
84#define for_each_dax_region_resource(dax_region, res) \
85 for (res = (dax_region)->res.child; res; res = res->sibling)
ab68f262 86
70f93a37
DW
87static unsigned long long dax_region_avail_size(
88 struct dax_region *dax_region)
ab68f262 89{
70f93a37
DW
90 unsigned long long size;
91 struct resource *res;
ab68f262 92
70f93a37
DW
93 mutex_lock(&dax_region->lock);
94 size = resource_size(&dax_region->res);
95 for_each_dax_region_resource(dax_region, res)
96 size -= resource_size(res);
97 mutex_unlock(&dax_region->lock);
ab68f262 98
70f93a37 99 return size;
ab68f262
DW
100}
101
70f93a37
DW
102static ssize_t available_size_show(struct device *dev,
103 struct device_attribute *attr, char *buf)
ab68f262
DW
104{
105 struct dax_region *dax_region;
70f93a37 106 ssize_t rc = -ENXIO;
ab68f262 107
70f93a37
DW
108 device_lock(dev);
109 dax_region = dev_get_drvdata(dev);
110 if (dax_region)
111 rc = sprintf(buf, "%llu\n", dax_region_avail_size(dax_region));
112 device_unlock(dev);
ab68f262 113
70f93a37 114 return rc;
ab68f262 115}
70f93a37 116static DEVICE_ATTR_RO(available_size);
ab68f262 117
d4c6b777 118static ssize_t seed_show(struct device *dev,
ab68f262
DW
119 struct device_attribute *attr, char *buf)
120{
d4c6b777
DW
121 struct dax_region *dax_region;
122 ssize_t rc = -ENXIO;
ab68f262 123
d4c6b777
DW
124 device_lock(dev);
125 dax_region = dev_get_drvdata(dev);
126 if (dax_region) {
127 mutex_lock(&dax_region->lock);
128 if (dax_region->seed)
129 rc = sprintf(buf, "%s\n", dev_name(dax_region->seed));
130 mutex_unlock(&dax_region->lock);
131 }
132 device_unlock(dev);
ab68f262 133
d4c6b777 134 return rc;
ab68f262 135}
d4c6b777 136static DEVICE_ATTR_RO(seed);
ab68f262 137
70f93a37
DW
138static struct attribute *dax_region_attributes[] = {
139 &dev_attr_available_size.attr,
d4c6b777 140 &dev_attr_seed.attr,
ab68f262
DW
141 NULL,
142};
143
70f93a37
DW
144static const struct attribute_group dax_region_attribute_group = {
145 .name = "dax_region",
146 .attrs = dax_region_attributes,
ab68f262
DW
147};
148
70f93a37
DW
149static const struct attribute_group *dax_region_attribute_groups[] = {
150 &dax_region_attribute_group,
ab68f262
DW
151 NULL,
152};
153
3bc52c45 154static struct inode *dax_alloc_inode(struct super_block *sb)
ab68f262 155{
3bc52c45
DW
156 return kmem_cache_alloc(dax_cache, GFP_KERNEL);
157}
ab68f262 158
3bc52c45
DW
159static void dax_i_callback(struct rcu_head *head)
160{
161 struct inode *inode = container_of(head, struct inode, i_rcu);
ab68f262 162
3bc52c45
DW
163 kmem_cache_free(dax_cache, inode);
164}
dee41079 165
3bc52c45
DW
166static void dax_destroy_inode(struct inode *inode)
167{
168 call_rcu(&inode->i_rcu, dax_i_callback);
ab68f262
DW
169}
170
3bc52c45
DW
171static const struct super_operations dax_sops = {
172 .statfs = simple_statfs,
173 .alloc_inode = dax_alloc_inode,
174 .destroy_inode = dax_destroy_inode,
175 .drop_inode = generic_delete_inode,
176};
177
178static struct dentry *dax_mount(struct file_system_type *fs_type,
179 int flags, const char *dev_name, void *data)
ab68f262 180{
3bc52c45
DW
181 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
182}
ab68f262 183
3bc52c45
DW
184static struct file_system_type dax_type = {
185 .name = "dax",
186 .mount = dax_mount,
187 .kill_sb = kill_anon_super,
188};
ab68f262 189
3bc52c45
DW
190static int dax_test(struct inode *inode, void *data)
191{
192 return inode->i_cdev == data;
193}
ab68f262 194
3bc52c45
DW
195static int dax_set(struct inode *inode, void *data)
196{
197 inode->i_cdev = data;
198 return 0;
199}
ab68f262 200
3bc52c45
DW
201static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt)
202{
203 struct inode *inode;
ab68f262 204
3bc52c45
DW
205 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
206 dax_test, dax_set, cdev);
ab68f262 207
3bc52c45
DW
208 if (!inode)
209 return NULL;
ab68f262 210
3bc52c45
DW
211 if (inode->i_state & I_NEW) {
212 inode->i_mode = S_IFCHR;
213 inode->i_flags = S_DAX;
214 inode->i_rdev = devt;
215 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
216 unlock_new_inode(inode);
217 }
218 return inode;
219}
ab68f262 220
3bc52c45
DW
221static void init_once(void *inode)
222{
223 inode_init_once(inode);
ab68f262 224}
ab68f262 225
3bc52c45 226static int dax_inode_init(void)
dee41079 227{
3bc52c45 228 int rc;
dee41079 229
3bc52c45
DW
230 dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0,
231 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
232 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
233 init_once);
234 if (!dax_cache)
235 return -ENOMEM;
dee41079 236
3bc52c45
DW
237 rc = register_filesystem(&dax_type);
238 if (rc)
239 goto err_register_fs;
dee41079 240
3bc52c45
DW
241 dax_mnt = kern_mount(&dax_type);
242 if (IS_ERR(dax_mnt)) {
243 rc = PTR_ERR(dax_mnt);
244 goto err_mount;
245 }
246 dax_superblock = dax_mnt->mnt_sb;
dee41079 247
3bc52c45 248 return 0;
dee41079 249
3bc52c45
DW
250 err_mount:
251 unregister_filesystem(&dax_type);
252 err_register_fs:
253 kmem_cache_destroy(dax_cache);
254
255 return rc;
dee41079
DW
256}
257
3bc52c45 258static void dax_inode_exit(void)
dee41079 259{
3bc52c45
DW
260 kern_unmount(dax_mnt);
261 unregister_filesystem(&dax_type);
262 kmem_cache_destroy(dax_cache);
263}
dee41079 264
ab68f262
DW
265static void dax_region_free(struct kref *kref)
266{
267 struct dax_region *dax_region;
268
269 dax_region = container_of(kref, struct dax_region, kref);
d4c6b777
DW
270 WARN(atomic_read(&dax_region->child_count),
271 "%s: child count not zero\n",
272 dev_name(dax_region->dev));
ab68f262 273 kfree(dax_region);
dee41079
DW
274}
275
ab68f262 276void dax_region_put(struct dax_region *dax_region)
dee41079 277{
ab68f262 278 kref_put(&dax_region->kref, dax_region_free);
dee41079 279}
ab68f262
DW
280EXPORT_SYMBOL_GPL(dax_region_put);
281
dee41079 282
70f93a37 283static void dax_region_unregister(void *region)
dee41079 284{
70f93a37 285 struct dax_region *dax_region = region;
dee41079 286
70f93a37
DW
287 sysfs_remove_groups(&dax_region->dev->kobj,
288 dax_region_attribute_groups);
289 dax_region_put(dax_region);
290}
dee41079 291
ab68f262
DW
292struct dax_region *alloc_dax_region(struct device *parent, int region_id,
293 struct resource *res, unsigned int align, void *addr,
294 unsigned long pfn_flags)
295{
296 struct dax_region *dax_region;
297
70f93a37
DW
298 if (dev_get_drvdata(parent)) {
299 dev_WARN(parent, "dax core found drvdata already in use\n");
300 return NULL;
dee41079 301 }
dee41079 302
9d2d01a0
DW
303 if (!IS_ALIGNED(res->start, align)
304 || !IS_ALIGNED(resource_size(res), align))
305 return NULL;
ab68f262 306
9d2d01a0 307 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
ab68f262
DW
308 if (!dax_region)
309 return NULL;
70f93a37
DW
310 dev_set_drvdata(parent, dax_region);
311 dax_region->res.name = dev_name(parent);
312 dax_region->res.start = res->start;
313 dax_region->res.end = res->end;
ab68f262 314 dax_region->pfn_flags = pfn_flags;
70f93a37 315 mutex_init(&dax_region->lock);
ab68f262
DW
316 kref_init(&dax_region->kref);
317 dax_region->id = region_id;
318 ida_init(&dax_region->ida);
319 dax_region->align = align;
320 dax_region->dev = parent;
321 dax_region->base = addr;
70f93a37
DW
322 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
323 kfree(dax_region);
324 return NULL;;
dee41079 325 }
ab68f262 326
70f93a37
DW
327 kref_get(&dax_region->kref);
328 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
329 return NULL;
ab68f262
DW
330 return dax_region;
331}
332EXPORT_SYMBOL_GPL(alloc_dax_region);
333
ebd84d72
DW
334static struct dax_dev *to_dax_dev(struct device *dev)
335{
336 return container_of(dev, struct dax_dev, dev);
dee41079
DW
337}
338
ab68f262
DW
339static ssize_t size_show(struct device *dev,
340 struct device_attribute *attr, char *buf)
dee41079 341{
ebd84d72 342 struct dax_dev *dax_dev = to_dax_dev(dev);
ab68f262
DW
343 unsigned long long size = 0;
344 int i;
dee41079 345
ab68f262 346 for (i = 0; i < dax_dev->num_resources; i++)
70f93a37 347 size += resource_size(dax_dev->res[i]);
dee41079 348
ab68f262 349 return sprintf(buf, "%llu\n", size);
dee41079 350}
ab68f262
DW
351static DEVICE_ATTR_RO(size);
352
353static struct attribute *dax_device_attributes[] = {
354 &dev_attr_size.attr,
355 NULL,
356};
357
358static const struct attribute_group dax_device_attribute_group = {
359 .attrs = dax_device_attributes,
360};
361
362static const struct attribute_group *dax_attribute_groups[] = {
363 &dax_device_attribute_group,
364 NULL,
365};
dee41079
DW
366
367static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
368 const char *func)
369{
370 struct dax_region *dax_region = dax_dev->region;
ebd84d72 371 struct device *dev = &dax_dev->dev;
dee41079
DW
372 unsigned long mask;
373
374 if (!dax_dev->alive)
375 return -ENXIO;
376
377 /* prevent private / writable mappings from being established */
378 if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) {
379 dev_info(dev, "%s: %s: fail, attempted private mapping\n",
380 current->comm, func);
381 return -EINVAL;
382 }
383
384 mask = dax_region->align - 1;
385 if (vma->vm_start & mask || vma->vm_end & mask) {
386 dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
387 current->comm, func, vma->vm_start, vma->vm_end,
388 mask);
389 return -EINVAL;
390 }
391
392 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
393 && (vma->vm_flags & VM_DONTCOPY) == 0) {
394 dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
395 current->comm, func);
396 return -EINVAL;
397 }
398
399 if (!vma_is_dax(vma)) {
400 dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
401 current->comm, func);
402 return -EINVAL;
403 }
404
405 return 0;
406}
407
408static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
409 unsigned long size)
410{
411 struct resource *res;
412 phys_addr_t phys;
413 int i;
414
415 for (i = 0; i < dax_dev->num_resources; i++) {
70f93a37 416 res = dax_dev->res[i];
dee41079
DW
417 phys = pgoff * PAGE_SIZE + res->start;
418 if (phys >= res->start && phys <= res->end)
419 break;
420 pgoff -= PHYS_PFN(resource_size(res));
421 }
422
423 if (i < dax_dev->num_resources) {
70f93a37 424 res = dax_dev->res[i];
dee41079
DW
425 if (phys + size - 1 <= res->end)
426 return phys;
427 }
428
429 return -1;
430}
431
432static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
433 struct vm_fault *vmf)
434{
435 unsigned long vaddr = (unsigned long) vmf->virtual_address;
ebd84d72 436 struct device *dev = &dax_dev->dev;
dee41079
DW
437 struct dax_region *dax_region;
438 int rc = VM_FAULT_SIGBUS;
439 phys_addr_t phys;
440 pfn_t pfn;
441
442 if (check_vma(dax_dev, vma, __func__))
443 return VM_FAULT_SIGBUS;
444
445 dax_region = dax_dev->region;
446 if (dax_region->align > PAGE_SIZE) {
447 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
448 return VM_FAULT_SIGBUS;
449 }
450
451 phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
452 if (phys == -1) {
453 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
454 vmf->pgoff);
455 return VM_FAULT_SIGBUS;
456 }
457
458 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
459
460 rc = vm_insert_mixed(vma, vaddr, pfn);
461
462 if (rc == -ENOMEM)
463 return VM_FAULT_OOM;
464 if (rc < 0 && rc != -EBUSY)
465 return VM_FAULT_SIGBUS;
466
467 return VM_FAULT_NOPAGE;
468}
469
470static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
471{
472 int rc;
473 struct file *filp = vma->vm_file;
474 struct dax_dev *dax_dev = filp->private_data;
475
ebd84d72 476 dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
dee41079
DW
477 current->comm, (vmf->flags & FAULT_FLAG_WRITE)
478 ? "write" : "read", vma->vm_start, vma->vm_end);
479 rcu_read_lock();
480 rc = __dax_dev_fault(dax_dev, vma, vmf);
481 rcu_read_unlock();
482
483 return rc;
484}
485
486static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
487 struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
488 unsigned int flags)
489{
490 unsigned long pmd_addr = addr & PMD_MASK;
ebd84d72 491 struct device *dev = &dax_dev->dev;
dee41079
DW
492 struct dax_region *dax_region;
493 phys_addr_t phys;
494 pgoff_t pgoff;
495 pfn_t pfn;
496
497 if (check_vma(dax_dev, vma, __func__))
498 return VM_FAULT_SIGBUS;
499
500 dax_region = dax_dev->region;
501 if (dax_region->align > PMD_SIZE) {
502 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
503 return VM_FAULT_SIGBUS;
504 }
505
506 /* dax pmd mappings require pfn_t_devmap() */
507 if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
508 dev_dbg(dev, "%s: alignment > fault size\n", __func__);
509 return VM_FAULT_SIGBUS;
510 }
511
512 pgoff = linear_page_index(vma, pmd_addr);
4c3cb6e9 513 phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
dee41079
DW
514 if (phys == -1) {
515 dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
516 pgoff);
517 return VM_FAULT_SIGBUS;
518 }
519
520 pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
521
522 return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
523 flags & FAULT_FLAG_WRITE);
524}
525
526static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
527 pmd_t *pmd, unsigned int flags)
528{
529 int rc;
530 struct file *filp = vma->vm_file;
531 struct dax_dev *dax_dev = filp->private_data;
532
ebd84d72 533 dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
dee41079
DW
534 current->comm, (flags & FAULT_FLAG_WRITE)
535 ? "write" : "read", vma->vm_start, vma->vm_end);
536
537 rcu_read_lock();
538 rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
539 rcu_read_unlock();
540
541 return rc;
542}
543
dee41079
DW
544static const struct vm_operations_struct dax_dev_vm_ops = {
545 .fault = dax_dev_fault,
546 .pmd_fault = dax_dev_pmd_fault,
dee41079
DW
547};
548
af69f51e 549static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
dee41079
DW
550{
551 struct dax_dev *dax_dev = filp->private_data;
552 int rc;
553
ebd84d72 554 dev_dbg(&dax_dev->dev, "%s\n", __func__);
dee41079
DW
555
556 rc = check_vma(dax_dev, vma, __func__);
557 if (rc)
558 return rc;
559
dee41079
DW
560 vma->vm_ops = &dax_dev_vm_ops;
561 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
562 return 0;
043a9255
DW
563}
564
565/* return an unmapped area aligned to the dax region specified alignment */
af69f51e 566static unsigned long dax_get_unmapped_area(struct file *filp,
043a9255
DW
567 unsigned long addr, unsigned long len, unsigned long pgoff,
568 unsigned long flags)
569{
570 unsigned long off, off_end, off_align, len_align, addr_align, align;
571 struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
572 struct dax_region *dax_region;
573
574 if (!dax_dev || addr)
575 goto out;
576
577 dax_region = dax_dev->region;
578 align = dax_region->align;
579 off = pgoff << PAGE_SHIFT;
580 off_end = off + len;
581 off_align = round_up(off, align);
582
583 if ((off_end <= off_align) || ((off_end - off_align) < align))
584 goto out;
585
586 len_align = len + align;
587 if ((off + len_align) < off)
588 goto out;
dee41079 589
043a9255
DW
590 addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
591 pgoff, flags);
592 if (!IS_ERR_VALUE(addr_align)) {
593 addr_align += (off - addr_align) & (align - 1);
594 return addr_align;
595 }
596 out:
597 return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
598}
599
af69f51e 600static int dax_open(struct inode *inode, struct file *filp)
043a9255 601{
ba09c01d 602 struct dax_dev *dax_dev;
043a9255 603
ba09c01d
DW
604 dax_dev = container_of(inode->i_cdev, struct dax_dev, cdev);
605 dev_dbg(&dax_dev->dev, "%s\n", __func__);
3bc52c45
DW
606 inode->i_mapping = dax_dev->inode->i_mapping;
607 inode->i_mapping->host = dax_dev->inode;
608 filp->f_mapping = inode->i_mapping;
ebd84d72
DW
609 filp->private_data = dax_dev;
610 inode->i_flags = S_DAX;
043a9255 611
043a9255
DW
612 return 0;
613}
dee41079 614
af69f51e 615static int dax_release(struct inode *inode, struct file *filp)
043a9255
DW
616{
617 struct dax_dev *dax_dev = filp->private_data;
043a9255 618
ba09c01d 619 dev_dbg(&dax_dev->dev, "%s\n", __func__);
043a9255 620 return 0;
dee41079
DW
621}
622
ab68f262
DW
623static const struct file_operations dax_fops = {
624 .llseek = noop_llseek,
625 .owner = THIS_MODULE,
af69f51e
DW
626 .open = dax_open,
627 .release = dax_release,
628 .get_unmapped_area = dax_get_unmapped_area,
629 .mmap = dax_mmap,
ab68f262
DW
630};
631
ebd84d72 632static void dax_dev_release(struct device *dev)
043a9255 633{
ebd84d72 634 struct dax_dev *dax_dev = to_dax_dev(dev);
043a9255
DW
635 struct dax_region *dax_region = dax_dev->region;
636
ebd84d72
DW
637 ida_simple_remove(&dax_region->ida, dax_dev->id);
638 ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
639 dax_region_put(dax_region);
3bc52c45 640 iput(dax_dev->inode);
70f93a37 641 kfree(dax_dev->res);
ebd84d72
DW
642 kfree(dax_dev);
643}
644
645static void unregister_dax_dev(void *dev)
646{
647 struct dax_dev *dax_dev = to_dax_dev(dev);
70f93a37 648 struct dax_region *dax_region = dax_dev->region;
ba09c01d 649 struct cdev *cdev = &dax_dev->cdev;
70f93a37 650 int i;
ebd84d72 651
043a9255
DW
652 dev_dbg(dev, "%s\n", __func__);
653
654 /*
655 * Note, rcu is not protecting the liveness of dax_dev, rcu is
656 * ensuring that any fault handlers that might have seen
657 * dax_dev->alive == true, have completed. Any fault handlers
658 * that start after synchronize_rcu() has started will abort
659 * upon seeing dax_dev->alive == false.
660 */
661 dax_dev->alive = false;
662 synchronize_rcu();
9dc1e492 663 unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
70f93a37
DW
664
665 mutex_lock(&dax_region->lock);
666 for (i = 0; i < dax_dev->num_resources; i++)
667 __release_region(&dax_region->res, dax_dev->res[i]->start,
668 resource_size(dax_dev->res[i]));
d4c6b777
DW
669 if (dax_region->seed == dev)
670 dax_region->seed = NULL;
70f93a37 671 mutex_unlock(&dax_region->lock);
d4c6b777 672 atomic_dec(&dax_region->child_count);
70f93a37 673
ba09c01d 674 cdev_del(cdev);
043a9255 675 device_unregister(dev);
043a9255
DW
676}
677
5662d52d
DW
678struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
679 struct resource *res, int count)
043a9255
DW
680{
681 struct device *parent = dax_region->dev;
682 struct dax_dev *dax_dev;
9d2d01a0 683 int rc = 0, minor, i;
043a9255 684 struct device *dev;
ba09c01d 685 struct cdev *cdev;
043a9255
DW
686 dev_t dev_t;
687
70f93a37 688 dax_dev = kzalloc(sizeof(*dax_dev), GFP_KERNEL);
043a9255 689 if (!dax_dev)
5662d52d 690 return ERR_PTR(-ENOMEM);
043a9255 691
70f93a37
DW
692 dax_dev->res = kzalloc(sizeof(res) * count, GFP_KERNEL);
693 if (!dax_dev->res)
694 goto err_res;
695
9d2d01a0 696 for (i = 0; i < count; i++) {
70f93a37
DW
697 struct resource *dax_res;
698
9d2d01a0
DW
699 if (!IS_ALIGNED(res[i].start, dax_region->align)
700 || !IS_ALIGNED(resource_size(&res[i]),
701 dax_region->align)) {
702 rc = -EINVAL;
703 break;
704 }
70f93a37
DW
705
706 mutex_lock(&dax_region->lock);
707 dax_res = __request_region(&dax_region->res, res[i].start,
708 resource_size(&res[i]), NULL, 0);
709 mutex_unlock(&dax_region->lock);
710 if (!dax_res) {
711 rc = -EBUSY;
712 break;
713 }
714 dax_dev->res[i] = dax_res;
9d2d01a0
DW
715 }
716
717 if (i < count)
70f93a37 718 goto err_request_region;
9d2d01a0 719
043a9255
DW
720 dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
721 if (dax_dev->id < 0) {
722 rc = dax_dev->id;
70f93a37 723 goto err_request_region;
043a9255
DW
724 }
725
726 minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
727 if (minor < 0) {
728 rc = minor;
729 goto err_minor;
730 }
731
72ffa4d2
AB
732 dev_t = MKDEV(MAJOR(dax_devt), minor);
733 dev = &dax_dev->dev;
3bc52c45
DW
734 dax_dev->inode = dax_inode_get(&dax_dev->cdev, dev_t);
735 if (!dax_dev->inode) {
736 rc = -ENOMEM;
737 goto err_inode;
738 }
739
ba09c01d 740 /* device_initialize() so cdev can reference kobj parent */
ebd84d72 741 device_initialize(dev);
ba09c01d
DW
742
743 cdev = &dax_dev->cdev;
744 cdev_init(cdev, &dax_fops);
745 cdev->owner = parent->driver->owner;
746 cdev->kobj.parent = &dev->kobj;
747 rc = cdev_add(&dax_dev->cdev, dev_t, 1);
748 if (rc)
749 goto err_cdev;
750
751 /* from here on we're committed to teardown via dax_dev_release() */
ba09c01d
DW
752 dax_dev->num_resources = count;
753 dax_dev->alive = true;
754 dax_dev->region = dax_region;
755 kref_get(&dax_region->kref);
756
ebd84d72
DW
757 dev->devt = dev_t;
758 dev->class = dax_class;
759 dev->parent = parent;
760 dev->groups = dax_attribute_groups;
761 dev->release = dax_dev_release;
762 dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id);
70f93a37
DW
763 /* update resource names now that the owner device is named */
764 for (i = 0; i < dax_dev->num_resources; i++)
765 dax_dev->res[i]->name = dev_name(dev);
766
ebd84d72
DW
767 rc = device_add(dev);
768 if (rc) {
769 put_device(dev);
5662d52d 770 return ERR_PTR(rc);
ebd84d72 771 }
043a9255 772
5662d52d
DW
773 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
774 if (rc)
775 return ERR_PTR(rc);
776
d4c6b777
DW
777 if (atomic_inc_return(&dax_region->child_count) == 1) {
778 struct dax_dev *seed;
779
780 seed = devm_create_dax_dev(dax_region, NULL, 0);
781 if (IS_ERR(seed))
782 dev_warn(parent, "failed to create region seed\n");
783 else
784 dax_region->seed = &seed->dev;
785 }
786
5662d52d 787 return dax_dev;
043a9255 788
ba09c01d 789 err_cdev:
3bc52c45
DW
790 iput(dax_dev->inode);
791 err_inode:
ba09c01d 792 ida_simple_remove(&dax_minor_ida, minor);
043a9255
DW
793 err_minor:
794 ida_simple_remove(&dax_region->ida, dax_dev->id);
70f93a37
DW
795 err_request_region:
796 mutex_lock(&dax_region->lock);
797 for (i--; i >= 0; i--)
798 __release_region(&dax_region->res, dax_dev->res[i]->start,
799 resource_size(dax_dev->res[i]));
800 mutex_unlock(&dax_region->lock);
801 kfree(dax_dev->res);
802 err_res:
ebd84d72 803 kfree(dax_dev);
043a9255 804
5662d52d 805 return ERR_PTR(rc);
043a9255
DW
806}
807EXPORT_SYMBOL_GPL(devm_create_dax_dev);
808
ab68f262
DW
809static int __init dax_init(void)
810{
811 int rc;
812
3bc52c45
DW
813 rc = dax_inode_init();
814 if (rc)
ab68f262 815 return rc;
3bc52c45 816
ba09c01d
DW
817 nr_dax = max(nr_dax, 256);
818 rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
819 if (rc)
3bc52c45 820 goto err_chrdev;
ab68f262
DW
821
822 dax_class = class_create(THIS_MODULE, "dax");
823 if (IS_ERR(dax_class)) {
3bc52c45
DW
824 rc = PTR_ERR(dax_class);
825 goto err_class;
ab68f262
DW
826 }
827
828 return 0;
3bc52c45
DW
829
830 err_class:
831 unregister_chrdev_region(dax_devt, nr_dax);
832 err_chrdev:
833 dax_inode_exit();
834 return rc;
ab68f262
DW
835}
836
837static void __exit dax_exit(void)
838{
839 class_destroy(dax_class);
ba09c01d 840 unregister_chrdev_region(dax_devt, nr_dax);
ab68f262 841 ida_destroy(&dax_minor_ida);
3bc52c45 842 dax_inode_exit();
ab68f262
DW
843}
844
845MODULE_AUTHOR("Intel Corporation");
846MODULE_LICENSE("GPL v2");
847subsys_initcall(dax_init);
848module_exit(dax_exit);
This page took 0.074729 seconds and 5 git commands to generate.