2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/memremap.h>
14 #include <linux/blkdev.h>
15 #include <linux/device.h>
16 #include <linux/genhd.h>
17 #include <linux/sizes.h>
18 #include <linux/slab.h>
25 static void nd_pfn_release(struct device
*dev
)
27 struct nd_region
*nd_region
= to_nd_region(dev
->parent
);
28 struct nd_pfn
*nd_pfn
= to_nd_pfn(dev
);
30 dev_dbg(dev
, "%s\n", __func__
);
31 nd_detach_ndns(&nd_pfn
->dev
, &nd_pfn
->ndns
);
32 ida_simple_remove(&nd_region
->pfn_ida
, nd_pfn
->id
);
37 static struct device_type nd_pfn_device_type
= {
39 .release
= nd_pfn_release
,
42 bool is_nd_pfn(struct device
*dev
)
44 return dev
? dev
->type
== &nd_pfn_device_type
: false;
46 EXPORT_SYMBOL(is_nd_pfn
);
48 struct nd_pfn
*to_nd_pfn(struct device
*dev
)
50 struct nd_pfn
*nd_pfn
= container_of(dev
, struct nd_pfn
, dev
);
52 WARN_ON(!is_nd_pfn(dev
));
55 EXPORT_SYMBOL(to_nd_pfn
);
57 static struct nd_pfn
*to_nd_pfn_safe(struct device
*dev
)
60 * pfn device attributes are re-used by dax device instances, so we
61 * need to be careful to correct device-to-nd_pfn conversion.
64 return to_nd_pfn(dev
);
67 struct nd_dax
*nd_dax
= to_nd_dax(dev
);
69 return &nd_dax
->nd_pfn
;
76 static ssize_t
mode_show(struct device
*dev
,
77 struct device_attribute
*attr
, char *buf
)
79 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
81 switch (nd_pfn
->mode
) {
83 return sprintf(buf
, "ram\n");
85 return sprintf(buf
, "pmem\n");
87 return sprintf(buf
, "none\n");
91 static ssize_t
mode_store(struct device
*dev
,
92 struct device_attribute
*attr
, const char *buf
, size_t len
)
94 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
104 if (strncmp(buf
, "pmem\n", n
) == 0
105 || strncmp(buf
, "pmem", n
) == 0) {
106 nd_pfn
->mode
= PFN_MODE_PMEM
;
107 } else if (strncmp(buf
, "ram\n", n
) == 0
108 || strncmp(buf
, "ram", n
) == 0)
109 nd_pfn
->mode
= PFN_MODE_RAM
;
110 else if (strncmp(buf
, "none\n", n
) == 0
111 || strncmp(buf
, "none", n
) == 0)
112 nd_pfn
->mode
= PFN_MODE_NONE
;
116 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
117 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
118 nvdimm_bus_unlock(dev
);
121 return rc
? rc
: len
;
123 static DEVICE_ATTR_RW(mode
);
125 static ssize_t
align_show(struct device
*dev
,
126 struct device_attribute
*attr
, char *buf
)
128 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
130 return sprintf(buf
, "%lx\n", nd_pfn
->align
);
133 static ssize_t
__align_store(struct nd_pfn
*nd_pfn
, const char *buf
)
138 rc
= kstrtoul(buf
, 0, &val
);
142 if (!is_power_of_2(val
) || val
< PAGE_SIZE
|| val
> SZ_1G
)
145 if (nd_pfn
->dev
.driver
)
153 static ssize_t
align_store(struct device
*dev
,
154 struct device_attribute
*attr
, const char *buf
, size_t len
)
156 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
160 nvdimm_bus_lock(dev
);
161 rc
= __align_store(nd_pfn
, buf
);
162 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
163 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
164 nvdimm_bus_unlock(dev
);
167 return rc
? rc
: len
;
169 static DEVICE_ATTR_RW(align
);
171 static ssize_t
uuid_show(struct device
*dev
,
172 struct device_attribute
*attr
, char *buf
)
174 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
177 return sprintf(buf
, "%pUb\n", nd_pfn
->uuid
);
178 return sprintf(buf
, "\n");
181 static ssize_t
uuid_store(struct device
*dev
,
182 struct device_attribute
*attr
, const char *buf
, size_t len
)
184 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
188 rc
= nd_uuid_store(dev
, &nd_pfn
->uuid
, buf
, len
);
189 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
190 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
193 return rc
? rc
: len
;
195 static DEVICE_ATTR_RW(uuid
);
197 static ssize_t
namespace_show(struct device
*dev
,
198 struct device_attribute
*attr
, char *buf
)
200 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
203 nvdimm_bus_lock(dev
);
204 rc
= sprintf(buf
, "%s\n", nd_pfn
->ndns
205 ? dev_name(&nd_pfn
->ndns
->dev
) : "");
206 nvdimm_bus_unlock(dev
);
210 static ssize_t
namespace_store(struct device
*dev
,
211 struct device_attribute
*attr
, const char *buf
, size_t len
)
213 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
217 nvdimm_bus_lock(dev
);
218 rc
= nd_namespace_store(dev
, &nd_pfn
->ndns
, buf
, len
);
219 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
220 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
221 nvdimm_bus_unlock(dev
);
226 static DEVICE_ATTR_RW(namespace);
228 static ssize_t
resource_show(struct device
*dev
,
229 struct device_attribute
*attr
, char *buf
)
231 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
236 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
237 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
238 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
239 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
240 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
242 rc
= sprintf(buf
, "%#llx\n", (unsigned long long) nsio
->res
.start
243 + start_pad
+ offset
);
245 /* no address to convey if the pfn instance is disabled */
252 static DEVICE_ATTR_RO(resource
);
254 static ssize_t
size_show(struct device
*dev
,
255 struct device_attribute
*attr
, char *buf
)
257 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
262 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
263 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
264 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
265 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
266 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
267 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
269 rc
= sprintf(buf
, "%llu\n", (unsigned long long)
270 resource_size(&nsio
->res
) - start_pad
271 - end_trunc
- offset
);
273 /* no size to convey if the pfn instance is disabled */
280 static DEVICE_ATTR_RO(size
);
282 static struct attribute
*nd_pfn_attributes
[] = {
284 &dev_attr_namespace
.attr
,
286 &dev_attr_align
.attr
,
287 &dev_attr_resource
.attr
,
292 struct attribute_group nd_pfn_attribute_group
= {
293 .attrs
= nd_pfn_attributes
,
296 static const struct attribute_group
*nd_pfn_attribute_groups
[] = {
297 &nd_pfn_attribute_group
,
298 &nd_device_attribute_group
,
299 &nd_numa_attribute_group
,
303 struct device
*nd_pfn_devinit(struct nd_pfn
*nd_pfn
,
304 struct nd_namespace_common
*ndns
)
306 struct device
*dev
= &nd_pfn
->dev
;
311 nd_pfn
->mode
= PFN_MODE_NONE
;
312 nd_pfn
->align
= HPAGE_SIZE
;
314 device_initialize(&nd_pfn
->dev
);
315 if (ndns
&& !__nd_attach_ndns(&nd_pfn
->dev
, ndns
, &nd_pfn
->ndns
)) {
316 dev_dbg(&ndns
->dev
, "%s failed, already claimed by %s\n",
317 __func__
, dev_name(ndns
->claim
));
324 static struct nd_pfn
*nd_pfn_alloc(struct nd_region
*nd_region
)
326 struct nd_pfn
*nd_pfn
;
329 nd_pfn
= kzalloc(sizeof(*nd_pfn
), GFP_KERNEL
);
333 nd_pfn
->id
= ida_simple_get(&nd_region
->pfn_ida
, 0, 0, GFP_KERNEL
);
334 if (nd_pfn
->id
< 0) {
340 dev_set_name(dev
, "pfn%d.%d", nd_region
->id
, nd_pfn
->id
);
341 dev
->groups
= nd_pfn_attribute_groups
;
342 dev
->type
= &nd_pfn_device_type
;
343 dev
->parent
= &nd_region
->dev
;
348 struct device
*nd_pfn_create(struct nd_region
*nd_region
)
350 struct nd_pfn
*nd_pfn
;
353 if (!is_nd_pmem(&nd_region
->dev
))
356 nd_pfn
= nd_pfn_alloc(nd_region
);
357 dev
= nd_pfn_devinit(nd_pfn
, NULL
);
359 __nd_device_register(dev
);
363 int nd_pfn_validate(struct nd_pfn
*nd_pfn
)
365 u64 checksum
, offset
;
366 struct nd_namespace_io
*nsio
;
367 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
368 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
369 const u8
*parent_uuid
= nd_dev_to_uuid(&ndns
->dev
);
371 if (!pfn_sb
|| !ndns
)
374 if (!is_nd_pmem(nd_pfn
->dev
.parent
))
377 if (nvdimm_read_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
)))
380 if (memcmp(pfn_sb
->signature
, PFN_SIG
, PFN_SIG_LEN
) != 0)
383 checksum
= le64_to_cpu(pfn_sb
->checksum
);
384 pfn_sb
->checksum
= 0;
385 if (checksum
!= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
))
387 pfn_sb
->checksum
= cpu_to_le64(checksum
);
389 if (memcmp(pfn_sb
->parent_uuid
, parent_uuid
, 16) != 0)
392 if (__le16_to_cpu(pfn_sb
->version_minor
) < 1) {
393 pfn_sb
->start_pad
= 0;
394 pfn_sb
->end_trunc
= 0;
397 if (__le16_to_cpu(pfn_sb
->version_minor
) < 2)
400 switch (le32_to_cpu(pfn_sb
->mode
)) {
409 /* from probe we allocate */
410 nd_pfn
->uuid
= kmemdup(pfn_sb
->uuid
, 16, GFP_KERNEL
);
414 /* from init we validate */
415 if (memcmp(nd_pfn
->uuid
, pfn_sb
->uuid
, 16) != 0)
419 if (nd_pfn
->align
> nvdimm_namespace_capacity(ndns
)) {
420 dev_err(&nd_pfn
->dev
, "alignment: %lx exceeds capacity %llx\n",
421 nd_pfn
->align
, nvdimm_namespace_capacity(ndns
));
426 * These warnings are verbose because they can only trigger in
427 * the case where the physical address alignment of the
428 * namespace has changed since the pfn superblock was
431 offset
= le64_to_cpu(pfn_sb
->dataoff
);
432 nsio
= to_nd_namespace_io(&ndns
->dev
);
433 if (offset
>= resource_size(&nsio
->res
)) {
434 dev_err(&nd_pfn
->dev
, "pfn array size exceeds capacity of %s\n",
435 dev_name(&ndns
->dev
));
439 nd_pfn
->align
= le32_to_cpu(pfn_sb
->align
);
440 if (!is_power_of_2(offset
) || offset
< PAGE_SIZE
) {
441 dev_err(&nd_pfn
->dev
, "bad offset: %#llx dax disabled\n",
448 EXPORT_SYMBOL(nd_pfn_validate
);
450 int nd_pfn_probe(struct device
*dev
, struct nd_namespace_common
*ndns
)
453 struct nd_pfn
*nd_pfn
;
454 struct device
*pfn_dev
;
455 struct nd_pfn_sb
*pfn_sb
;
456 struct nd_region
*nd_region
= to_nd_region(ndns
->dev
.parent
);
461 nvdimm_bus_lock(&ndns
->dev
);
462 nd_pfn
= nd_pfn_alloc(nd_region
);
463 pfn_dev
= nd_pfn_devinit(nd_pfn
, ndns
);
464 nvdimm_bus_unlock(&ndns
->dev
);
467 pfn_sb
= devm_kzalloc(dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
468 nd_pfn
= to_nd_pfn(pfn_dev
);
469 nd_pfn
->pfn_sb
= pfn_sb
;
470 rc
= nd_pfn_validate(nd_pfn
);
471 dev_dbg(dev
, "%s: pfn: %s\n", __func__
,
472 rc
== 0 ? dev_name(pfn_dev
) : "<none>");
474 __nd_detach_ndns(pfn_dev
, &nd_pfn
->ndns
);
477 __nd_device_register(pfn_dev
);
481 EXPORT_SYMBOL(nd_pfn_probe
);
484 * We hotplug memory at section granularity, pad the reserved area from
485 * the previous section base to the namespace base address.
487 static unsigned long init_altmap_base(resource_size_t base
)
489 unsigned long base_pfn
= PHYS_PFN(base
);
491 return PFN_SECTION_ALIGN_DOWN(base_pfn
);
494 static unsigned long init_altmap_reserve(resource_size_t base
)
496 unsigned long reserve
= PHYS_PFN(SZ_8K
);
497 unsigned long base_pfn
= PHYS_PFN(base
);
499 reserve
+= base_pfn
- PFN_SECTION_ALIGN_DOWN(base_pfn
);
503 static struct vmem_altmap
*__nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
504 struct resource
*res
, struct vmem_altmap
*altmap
)
506 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
507 u64 offset
= le64_to_cpu(pfn_sb
->dataoff
);
508 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
509 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
510 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
511 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
512 resource_size_t base
= nsio
->res
.start
+ start_pad
;
513 struct vmem_altmap __altmap
= {
514 .base_pfn
= init_altmap_base(base
),
515 .reserve
= init_altmap_reserve(base
),
518 memcpy(res
, &nsio
->res
, sizeof(*res
));
519 res
->start
+= start_pad
;
520 res
->end
-= end_trunc
;
522 nd_pfn
->mode
= le32_to_cpu(nd_pfn
->pfn_sb
->mode
);
523 if (nd_pfn
->mode
== PFN_MODE_RAM
) {
525 return ERR_PTR(-EINVAL
);
526 nd_pfn
->npfns
= le64_to_cpu(pfn_sb
->npfns
);
528 } else if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
529 nd_pfn
->npfns
= (resource_size(res
) - offset
) / PAGE_SIZE
;
530 if (le64_to_cpu(nd_pfn
->pfn_sb
->npfns
) > nd_pfn
->npfns
)
531 dev_info(&nd_pfn
->dev
,
532 "number of pfns truncated from %lld to %ld\n",
533 le64_to_cpu(nd_pfn
->pfn_sb
->npfns
),
535 memcpy(altmap
, &__altmap
, sizeof(*altmap
));
536 altmap
->free
= PHYS_PFN(offset
- SZ_8K
);
539 return ERR_PTR(-ENXIO
);
544 static int nd_pfn_init(struct nd_pfn
*nd_pfn
)
546 u32 dax_label_reserve
= is_nd_dax(&nd_pfn
->dev
) ? SZ_128K
: 0;
547 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
548 u32 start_pad
= 0, end_trunc
= 0;
549 resource_size_t start
, size
;
550 struct nd_namespace_io
*nsio
;
551 struct nd_region
*nd_region
;
552 struct nd_pfn_sb
*pfn_sb
;
558 pfn_sb
= devm_kzalloc(&nd_pfn
->dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
562 nd_pfn
->pfn_sb
= pfn_sb
;
563 rc
= nd_pfn_validate(nd_pfn
);
567 /* no info block, do init */;
568 nd_region
= to_nd_region(nd_pfn
->dev
.parent
);
570 dev_info(&nd_pfn
->dev
,
571 "%s is read-only, unable to init metadata\n",
572 dev_name(&nd_region
->dev
));
576 memset(pfn_sb
, 0, sizeof(*pfn_sb
));
579 * Check if pmem collides with 'System RAM' when section aligned and
580 * trim it accordingly
582 nsio
= to_nd_namespace_io(&ndns
->dev
);
583 start
= PHYS_SECTION_ALIGN_DOWN(nsio
->res
.start
);
584 size
= resource_size(&nsio
->res
);
585 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
586 IORES_DESC_NONE
) == REGION_MIXED
) {
587 start
= nsio
->res
.start
;
588 start_pad
= PHYS_SECTION_ALIGN_UP(start
) - start
;
591 start
= nsio
->res
.start
;
592 size
= PHYS_SECTION_ALIGN_UP(start
+ size
) - start
;
593 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
594 IORES_DESC_NONE
) == REGION_MIXED
) {
595 size
= resource_size(&nsio
->res
);
596 end_trunc
= start
+ size
- PHYS_SECTION_ALIGN_DOWN(start
+ size
);
599 if (start_pad
+ end_trunc
)
600 dev_info(&nd_pfn
->dev
, "%s section collision, truncate %d bytes\n",
601 dev_name(&ndns
->dev
), start_pad
+ end_trunc
);
604 * Note, we use 64 here for the standard size of struct page,
605 * debugging options may cause it to be larger in which case the
606 * implementation will limit the pfns advertised through
607 * ->direct_access() to those that are included in the memmap.
610 size
= resource_size(&nsio
->res
);
611 npfns
= (size
- start_pad
- end_trunc
- SZ_8K
) / SZ_4K
;
612 if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
613 unsigned long memmap_size
;
616 * vmemmap_populate_hugepages() allocates the memmap array in
619 memmap_size
= ALIGN(64 * npfns
, HPAGE_SIZE
);
620 offset
= ALIGN(start
+ SZ_8K
+ memmap_size
+ dax_label_reserve
,
621 nd_pfn
->align
) - start
;
622 } else if (nd_pfn
->mode
== PFN_MODE_RAM
)
623 offset
= ALIGN(start
+ SZ_8K
+ dax_label_reserve
,
624 nd_pfn
->align
) - start
;
628 if (offset
+ start_pad
+ end_trunc
>= size
) {
629 dev_err(&nd_pfn
->dev
, "%s unable to satisfy requested alignment\n",
630 dev_name(&ndns
->dev
));
634 npfns
= (size
- offset
- start_pad
- end_trunc
) / SZ_4K
;
635 pfn_sb
->mode
= cpu_to_le32(nd_pfn
->mode
);
636 pfn_sb
->dataoff
= cpu_to_le64(offset
);
637 pfn_sb
->npfns
= cpu_to_le64(npfns
);
638 memcpy(pfn_sb
->signature
, PFN_SIG
, PFN_SIG_LEN
);
639 memcpy(pfn_sb
->uuid
, nd_pfn
->uuid
, 16);
640 memcpy(pfn_sb
->parent_uuid
, nd_dev_to_uuid(&ndns
->dev
), 16);
641 pfn_sb
->version_major
= cpu_to_le16(1);
642 pfn_sb
->version_minor
= cpu_to_le16(2);
643 pfn_sb
->start_pad
= cpu_to_le32(start_pad
);
644 pfn_sb
->end_trunc
= cpu_to_le32(end_trunc
);
645 pfn_sb
->align
= cpu_to_le32(nd_pfn
->align
);
646 checksum
= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
);
647 pfn_sb
->checksum
= cpu_to_le64(checksum
);
649 return nvdimm_write_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
));
653 * Determine the effective resource range and vmem_altmap from an nd_pfn
656 struct vmem_altmap
*nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
657 struct resource
*res
, struct vmem_altmap
*altmap
)
661 if (!nd_pfn
->uuid
|| !nd_pfn
->ndns
)
662 return ERR_PTR(-ENODEV
);
664 rc
= nd_pfn_init(nd_pfn
);
668 /* we need a valid pfn_sb before we can init a vmem_altmap */
669 return __nvdimm_setup_pfn(nd_pfn
, res
, altmap
);
671 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn
);