[RFC PATCH] libnvdimm: Update the meaning for persistence_domain values
by Aneesh Kumar K.V
Currently, kernel shows the below values
"persistence_domain":"cpu_cache"
"persistence_domain":"memory_controller"
"persistence_domain":"unknown"
This patch updates the meaning of these values such that
"cpu_cache" indicates no extra instructions is needed to ensure the persistence
of data in the pmem media on power failure.
"memory_controller" indicates platform provided instructions need to be issued
as per documented sequence to make sure data flushed is guaranteed to be on pmem
media in case of system power loss.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
---
arch/powerpc/platforms/pseries/papr_scm.c | 7 ++++++-
include/linux/libnvdimm.h | 6 +++---
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index c2ef320ba1bf..26a5ef263758 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -360,8 +360,13 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
if (p->is_volatile)
p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
- else
+ else {
+ /*
+ * We need to flush things correctly to guarantee persistance
+ */
+ set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+ }
if (!p->region) {
dev_err(dev, "Error registering region %pR from %pOF\n",
ndr_desc.res, p->dn);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index f2a33f2e3ba8..9126737377e1 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -52,9 +52,9 @@ enum {
*/
ND_REGION_PERSIST_CACHE = 1,
/*
- * Platform provides mechanisms to automatically flush outstanding
- * write data from memory controler to pmem on system power loss.
- * (ADR)
+ * Platform provides instructions to flush data such that on completion
+ * of the instructions, data flushed is guaranteed to be on pmem even
+ * in case of a system power loss.
*/
ND_REGION_PERSIST_MEMCTRL = 2,
--
2.24.1
1 year
KASAN: vmalloc-out-of-bounds Read in acpi_nfit_ctl
by syzbot
Hello,
syzbot found the following crash on:
HEAD commit: 040a3c33 Merge tag 'iommu-fixes-v5.5-rc5' of git://git.ker..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=120a5d8ee00000
kernel config: https://syzkaller.appspot.com/x/.config?x=7e89bd00623fe71e
dashboard link: https://syzkaller.appspot.com/bug?extid=002f559bf34c2c7467d0
compiler: gcc (GCC) 9.0.0 20181231 (experimental)
userspace arch: i386
Unfortunately, I don't have any reproducer for this crash yet.
IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+002f559bf34c2c7467d0(a)syzkaller.appspotmail.com
==================================================================
BUG: KASAN: vmalloc-out-of-bounds in test_bit
include/asm-generic/bitops/instrumented-non-atomic.h:110 [inline]
BUG: KASAN: vmalloc-out-of-bounds in acpi_nfit_ctl+0x47f/0x1840
drivers/acpi/nfit/core.c:495
Read of size 8 at addr ffffc90002ddbbb8 by task syz-executor.1/5941
CPU: 3 PID: 5941 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
Call Trace:
__dump_stack lib/dump_stack.c:77 [inline]
dump_stack+0x197/0x210 lib/dump_stack.c:118
print_address_description.constprop.0.cold+0x5/0x30b mm/kasan/report.c:374
__kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506
kasan_report+0x12/0x20 mm/kasan/common.c:639
check_memory_region_inline mm/kasan/generic.c:185 [inline]
check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192
__kasan_check_read+0x11/0x20 mm/kasan/common.c:95
test_bit include/asm-generic/bitops/instrumented-non-atomic.h:110 [inline]
acpi_nfit_ctl+0x47f/0x1840 drivers/acpi/nfit/core.c:495
__nd_ioctl drivers/nvdimm/bus.c:1152 [inline]
nd_ioctl.isra.0+0xfe2/0x1580 drivers/nvdimm/bus.c:1230
bus_ioctl+0x59/0x70 drivers/nvdimm/bus.c:1242
compat_ptr_ioctl+0x6e/0xa0 fs/ioctl.c:788
__do_compat_sys_ioctl fs/compat_ioctl.c:214 [inline]
__se_compat_sys_ioctl fs/compat_ioctl.c:142 [inline]
__ia32_compat_sys_ioctl+0x233/0x610 fs/compat_ioctl.c:142
do_syscall_32_irqs_on arch/x86/entry/common.c:337 [inline]
do_fast_syscall_32+0x27b/0xe16 arch/x86/entry/common.c:408
entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139
RIP: 0023:0xf7f37a39
Code: 00 00 00 89 d3 5b 5e 5f 5d c3 b8 80 96 98 00 eb c4 8b 04 24 c3 8b 1c
24 c3 8b 34 24 c3 8b 3c 24 c3 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90
90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90
RSP: 002b:00000000f5d330cc EFLAGS: 00000296 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 000000000000560a
RDX: 0000000020000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
Memory state around the buggy address:
ffffc90002ddba80: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9
ffffc90002ddbb00: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9
> ffffc90002ddbb80: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9
^
ffffc90002ddbc00: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9
ffffc90002ddbc80: f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9 f9
==================================================================
---
This bug is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkaller(a)googlegroups.com.
syzbot will keep track of this bug report. See:
https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
1 year
[PATCH v3 1/6] libnvdimm/namespace: Make namespace size validation arch dependent
by Aneesh Kumar K.V
The page size used to map the namespace is arch dependent. For example
architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
size is not aligned to the mapping page size, we can observe kernel crash
during namespace init and destroy.
This is due to kernel doing partial map/unmap of the resource range
BUG: Unable to handle kernel data access at 0xc001000406000000
Faulting instruction address: 0xc000000000090790
NIP [c000000000090790] arch_add_memory+0xc0/0x130
LR [c000000000090744] arch_add_memory+0x74/0x130
Call Trace:
arch_add_memory+0x74/0x130 (unreliable)
memremap_pages+0x74c/0xa30
devm_memremap_pages+0x3c/0xa0
pmem_attach_disk+0x188/0x770
nvdimm_bus_probe+0xd8/0x470
really_probe+0x148/0x570
driver_probe_device+0x19c/0x1d0
device_driver_attach+0xcc/0x100
bind_store+0x134/0x1c0
drv_attr_store+0x44/0x60
sysfs_kf_write+0x74/0xc0
kernfs_fop_write+0x1b4/0x290
__vfs_write+0x3c/0x70
vfs_write+0xd0/0x260
ksys_write+0xdc/0x130
system_call+0x5c/0x68
Kernel should also ensure that namespace size is also mulitple of subsection size.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar(a)linux.ibm.com>
---
Changes from v2:
* Use SUBSECTION_SIZE instead of PAGE_SIZE. Namespace size should be multiple
of SUBSECTION size.
arch/arm64/mm/flush.c | 6 ++++++
arch/powerpc/lib/pmem.c | 11 +++++++++++
arch/x86/mm/pageattr.c | 7 +++++++
include/linux/libnvdimm.h | 1 +
4 files changed, 25 insertions(+)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a7..5d82484ac8ca 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -91,4 +91,10 @@ void arch_invalidate_pmem(void *addr, size_t size)
__inval_dcache_area(addr, size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+unsigned long arch_namespace_align_size(void)
+{
+ return (1UL << SUBSECTION_SHIFT);
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
#endif
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..b94e7d4876d1 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -26,6 +26,17 @@ void arch_invalidate_pmem(void *addr, size_t size)
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+unsigned long arch_namespace_align_size(void)
+{
+ unsigned long sub_section_size = (1UL << SUBSECTION_SHIFT);
+
+ if (radix_enabled())
+ return sub_section_size;
+ return max(sub_section_size, (1UL << mmu_psize_defs[mmu_linear_psize].shift));
+
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
+
/*
* CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
*/
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b99ad05b117..0bcd22e11dd0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -310,6 +310,13 @@ void arch_invalidate_pmem(void *addr, size_t size)
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+unsigned long arch_namespace_align_size(void)
+{
+ return (1UL << SUBSECTION_SHIFT);
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
+
+
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 9df091bd30ba..f2a33f2e3ba8 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -284,4 +284,5 @@ static inline void arch_invalidate_pmem(void *addr, size_t size)
}
#endif
+unsigned long arch_namespace_align_size(void);
#endif /* __LIBNVDIMM_H__ */
--
2.24.1
1 year
Re: Linux-nvdimm Digest, Vol 64, Issue 18
by Tom Zhou
hello
On Fri, Jan 10, 2020 at 11:05 AM <linux-nvdimm-request(a)lists.01.org> wrote:
> Send Linux-nvdimm mailing list submissions to
> linux-nvdimm(a)lists.01.org
>
> To subscribe or unsubscribe via email, send a message with subject or
> body 'help' to
> linux-nvdimm-request(a)lists.01.org
>
> You can reach the person managing the list at
> linux-nvdimm-owner(a)lists.01.org
>
> When replying, please edit your Subject line so it is more specific
> than "Re: Contents of Linux-nvdimm digest..."
>
> Today's Topics:
>
> 1. [PATCH ndctl] ndctl/namespace: Fix enable-namespace error for seed
> namespaces
> (Santosh Sivaraj)
> 2. Re: dax: Get rid of fs_dax_get_by_host() helper
> (Christoph Hellwig)
> 3. Re: [PATCH 01/19] dax: remove block device dependencies
> (Christoph Hellwig)
> 4. Re: [PATCH ndctl] ndctl/namespace: Fix enable-namespace error for
> seed namespaces
> (Dan Williams)
> 5. [PATCH RFC 06/10] device-dax: Introduce pfn_flags helper
> (Joao Martins)
> 6. [PATCH RFC 07/10] device-dax: Add support for PFN_SPECIAL flags
> (Joao Martins)
>
>
> ----------------------------------------------------------------------
>
> Date: Fri, 10 Jan 2020 13:50:17 +0530
> From: Santosh Sivaraj <santosh(a)fossix.org>
> Subject: [PATCH ndctl] ndctl/namespace: Fix enable-namespace error for
> seed namespaces
> To: linux-nvdimm(a)lists.01.org, Dan Williams
> <dan.j.williams(a)intel.com>
> Cc: harish(a)linux.ibm.com
> Message-ID: <20200110082017.3485529-1-santosh(a)fossix.org>
>
> 'ndctl enable-namespace all' tries to enable seed namespaces too, which
> results
> in a error like
>
> libndctl: ndctl_namespace_enable: namespace1.0: failed to enable
>
> Signed-off-by: Santosh Sivaraj <santosh(a)fossix.org>
> ---
> ndctl/lib/libndctl.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> index 6596f94..4839214 100644
> --- a/ndctl/lib/libndctl.c
> +++ b/ndctl/lib/libndctl.c
> @@ -4010,11 +4010,16 @@ NDCTL_EXPORT int ndctl_namespace_enable(struct
> ndctl_namespace *ndns)
> const char *devname = ndctl_namespace_get_devname(ndns);
> struct ndctl_ctx *ctx = ndctl_namespace_get_ctx(ndns);
> struct ndctl_region *region = ndns->region;
> + unsigned long long size = ndctl_namespace_get_size(ndns);
> int rc;
>
> if (ndctl_namespace_is_enabled(ndns))
> return 0;
>
> + /* Don't try to enable idle namespace (no capacity allocated) */
> + if (size == 0)
> + return -1;
> +
> rc = ndctl_bind(ctx, ndns->module, devname);
>
> /*
> --
> 2.24.1
>
> ------------------------------
>
> Date: Fri, 10 Jan 2020 04:31:27 -0800
> From: Christoph Hellwig <hch(a)infradead.org>
> Subject: Re: dax: Get rid of fs_dax_get_by_host() helper
> To: Vivek Goyal <vgoyal(a)redhat.com>
> Cc: linux-nvdimm(a)lists.01.org, linux-fsdevel(a)vger.kernel.org,
> linux-kernel(a)vger.kernel.org
> Message-ID: <20200110123127.GA6558(a)infradead.org>
> Content-Type: text/plain; charset=us-ascii
>
> On Mon, Jan 06, 2020 at 01:11:17PM -0500, Vivek Goyal wrote:
> > Looks like nobody is using fs_dax_get_by_host() except
> fs_dax_get_by_bdev()
> > and it can easily use dax_get_by_host() instead.
> >
> > IIUC, fs_dax_get_by_host() was only introduced so that one could compile
> > with CONFIG_FS_DAX=n and CONFIG_DAX=m. fs_dax_get_by_bdev() achieves
> > the same purpose and hence it looks like fs_dax_get_by_host() is not
> > needed anymore.
> >
> > Signed-off-by: Vivek Goyal <vgoyal(a)redhat.com>
>
> Looks good,
>
> Reviewed-by: Christoph Hellwig <hch(a)lst.de>
>
> ------------------------------
>
> Date: Fri, 10 Jan 2020 04:36:31 -0800
> From: Christoph Hellwig <hch(a)infradead.org>
> Subject: Re: [PATCH 01/19] dax: remove block device dependencies
> To: Dan Williams <dan.j.williams(a)intel.com>
> Cc: Jan Kara <jack(a)suse.cz>, "Darrick J. Wong"
> <darrick.wong(a)oracle.com>, Christoph Hellwig <hch(a)infradead.org>,
> Dave
> Chinner <david(a)fromorbit.com>, Miklos Szeredi <miklos(a)szeredi.hu>,
> linux-nvdimm <linux-nvdimm(a)lists.01.org>, Linux Kernel Mailing
> List
> <linux-kernel(a)vger.kernel.org>, "Dr. David Alan Gilbert"
> <dgilbert(a)redhat.com>, virtio-fs(a)redhat.com, Stefan Hajnoczi
> <stefanha(a)redhat.com>, linux-fsdevel <
> linux-fsdevel(a)vger.kernel.org>
> Message-ID: <20200110123631.GA16268(a)infradead.org>
> Content-Type: text/plain; charset=us-ascii
>
> On Thu, Jan 09, 2020 at 12:03:01PM -0800, Dan Williams wrote:
> > > So I'd find two options reasonably consistent:
> > > 1) Keep status quo where partitions are created and support DAX.
> > > 2) Stop partition creation altogether, if anyones wants to split pmem
> > > device further, he can use dm-linear for that (i.e., kpartx).
> > >
> > > But I'm not sure if the ship hasn't already sailed for option 2) to be
> > > feasible without angry users and Linus reverting the change.
> >
> > Christoph? I feel myself leaning more and more to the "keep pmem
> > partitions" camp.
> >
> > I don't see "drop partition support" effort ending well given the long
> > standing "ext4 fails to mount when dax is not available" precedent.
>
> Do we have any evidence of existing setups with DAX and partitions?
> Can we just throw in a patch to reject that case for now before actually
> removing the code and see if anyone screams. And fix ext4 up while
> we are at it.
>
> > I think the next least bad option is to have a dax_get_by_host()
> > variant that passes an offset and length pair rather than requiring a
> > later bdev_dax_pgoff() to recall the offset. This also prevents
> > needing to add another dax-device object representation.
>
> IFF we have to keep partition support, yes. But keeping it just seems
> like a really bad idea.
>
> ------------------------------
>
> Date: Fri, 10 Jan 2020 09:56:26 -0800
> From: Dan Williams <dan.j.williams(a)intel.com>
> Subject: Re: [PATCH ndctl] ndctl/namespace: Fix enable-namespace error
> for seed namespaces
> To: Santosh Sivaraj <santosh(a)fossix.org>
> Cc: linux-nvdimm <linux-nvdimm(a)lists.01.org>, harish(a)linux.ibm.com
> Message-ID:
> <
> CAPcyv4h-ke4Jorqx6md+gfgVupNgXn-qm8Yx7vaLNa4O+91jeg(a)mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> On Fri, Jan 10, 2020 at 12:21 AM Santosh Sivaraj <santosh(a)fossix.org>
> wrote:
> >
> > 'ndctl enable-namespace all' tries to enable seed namespaces too, which
> results
> > in a error like
> >
> > libndctl: ndctl_namespace_enable: namespace1.0: failed to enable
> >
> > Signed-off-by: Santosh Sivaraj <santosh(a)fossix.org>
> > ---
> > ndctl/lib/libndctl.c | 5 +++++
> > 1 file changed, 5 insertions(+)
> >
> > diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> > index 6596f94..4839214 100644
> > --- a/ndctl/lib/libndctl.c
> > +++ b/ndctl/lib/libndctl.c
> > @@ -4010,11 +4010,16 @@ NDCTL_EXPORT int ndctl_namespace_enable(struct
> ndctl_namespace *ndns)
> > const char *devname = ndctl_namespace_get_devname(ndns);
> > struct ndctl_ctx *ctx = ndctl_namespace_get_ctx(ndns);
> > struct ndctl_region *region = ndns->region;
> > + unsigned long long size = ndctl_namespace_get_size(ndns);
> > int rc;
> >
> > if (ndctl_namespace_is_enabled(ndns))
> > return 0;
> >
> > + /* Don't try to enable idle namespace (no capacity allocated) */
> > + if (size == 0)
> > + return -1;
>
> Concept looks good to me, just resend with that -1 changed to a named
> error code (-ENXIO).
>
> ------------------------------
>
> Date: Fri, 10 Jan 2020 19:03:09 +0000
> From: Joao Martins <joao.m.martins(a)oracle.com>
> Subject: [PATCH RFC 06/10] device-dax: Introduce pfn_flags helper
> To: linux-nvdimm(a)lists.01.org
> Cc: Alex Williamson <alex.williamson(a)redhat.com>, Cornelia Huck
> <cohuck(a)redhat.com>, kvm(a)vger.kernel.org, Andrew Morton
> <akpm(a)linux-foundation.org>, linux-mm(a)kvack.org,
> linux-kernel(a)vger.kernel.org, Thomas Gleixner <tglx(a)linutronix.de
> >,
> Ingo Molnar <mingo(a)redhat.com>, Borislav Petkov <bp(a)alien8.de>,
> "H .
> Peter Anvin" <hpa(a)zytor.com>, x86(a)kernel.org, Liran Alon
> <liran.alon(a)oracle.com>, Nikita Leshenko
> <nikita.leshchenko(a)oracle.com>, Barret Rhoden <brho(a)google.com>,
> Boris
> Ostrovsky <boris.ostrovsky(a)oracle.com>, Matthew Wilcox
> <willy(a)infradead.org>, Konrad Rzeszutek Wilk <
> konrad.wilk(a)oracle.com>
> Message-ID: <20200110190313.17144-7-joao.m.martins(a)oracle.com>
>
> Replace PFN_DEV|PFN_MAP check call sites with two helper functions
> dax_is_pfn_dev() and dax_is_pfn_map().
>
> Signed-off-by: Joao Martins <joao.m.martins(a)oracle.com>
> ---
> drivers/dax/device.c | 18 ++++++++++++++----
> 1 file changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index c6a7f5e12c54..113a554de3ee 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -14,6 +14,17 @@
> #include "dax-private.h"
> #include "bus.h"
>
> +static int dax_is_pfn_dev(struct dev_dax *dev_dax)
> +{
> + return (dev_dax->region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV;
> +}
> +
> +static int dax_is_pfn_map(struct dev_dax *dev_dax)
> +{
> + return (dev_dax->region->pfn_flags &
> + (PFN_DEV|PFN_MAP)) == (PFN_DEV|PFN_MAP);
> +}
> +
> static int check_vma_mmap(struct dev_dax *dev_dax, struct vm_area_struct
> *vma,
> const char *func)
> {
> @@ -60,8 +71,7 @@ static int check_vma(struct dev_dax *dev_dax, struct
> vm_area_struct *vma,
> if (rc < 0)
> return rc;
>
> - if ((dev_dax->region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
> - && (vma->vm_flags & VM_DONTCOPY) == 0) {
> + if (dax_is_pfn_dev(dev_dax) && (vma->vm_flags & VM_DONTCOPY) == 0)
> {
> dev_info_ratelimited(&dev_dax->dev,
> "%s: %s: fail, dax range requires
> MADV_DONTFORK\n",
> current->comm, func);
> @@ -140,7 +150,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax
> *dev_dax,
> }
>
> /* dax pmd mappings require pfn_t_devmap() */
> - if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) !=
> (PFN_DEV|PFN_MAP)) {
> + if (!dax_is_pfn_map(dev_dax)) {
> dev_dbg(dev, "region lacks devmap flags\n");
> return VM_FAULT_SIGBUS;
> }
> @@ -190,7 +200,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax
> *dev_dax,
> }
>
> /* dax pud mappings require pfn_t_devmap() */
> - if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) !=
> (PFN_DEV|PFN_MAP)) {
> + if (!dax_is_pfn_map(dev_dax)) {
> dev_dbg(dev, "region lacks devmap flags\n");
> return VM_FAULT_SIGBUS;
> }
> --
> 2.17.1
>
> ------------------------------
>
> Date: Fri, 10 Jan 2020 19:03:10 +0000
> From: Joao Martins <joao.m.martins(a)oracle.com>
> Subject: [PATCH RFC 07/10] device-dax: Add support for PFN_SPECIAL
> flags
> To: linux-nvdimm(a)lists.01.org
> Cc: Alex Williamson <alex.williamson(a)redhat.com>, Cornelia Huck
> <cohuck(a)redhat.com>, kvm(a)vger.kernel.org, Andrew Morton
> <akpm(a)linux-foundation.org>, linux-mm(a)kvack.org,
> linux-kernel(a)vger.kernel.org, Thomas Gleixner <tglx(a)linutronix.de
> >,
> Ingo Molnar <mingo(a)redhat.com>, Borislav Petkov <bp(a)alien8.de>,
> "H .
> Peter Anvin" <hpa(a)zytor.com>, x86(a)kernel.org, Liran Alon
> <liran.alon(a)oracle.com>, Nikita Leshenko
> <nikita.leshchenko(a)oracle.com>, Barret Rhoden <brho(a)google.com>,
> Boris
> Ostrovsky <boris.ostrovsky(a)oracle.com>, Matthew Wilcox
> <willy(a)infradead.org>, Konrad Rzeszutek Wilk <
> konrad.wilk(a)oracle.com>
> Message-ID: <20200110190313.17144-8-joao.m.martins(a)oracle.com>
>
> Right now we assume there's gonna be a PFN_DEV|PFN_MAP which
> means it will have a struct page backing the PFN but that is
> not placed in normal system RAM zones.
>
> Add support for PFN_DEV|PFN_SPECIAL only and therefore the
> underlying vma won't have a struct page. For device dax, this
> means not assuming callers will pass a dev_pagemap, and avoid
> returning SIGBUS for the lack of PFN_MAP region pfn flag and
> finally not setting struct page index/mapping on fault.
>
> Signed-off-by: Joao Martins <joao.m.martins(a)oracle.com>
> ---
> drivers/dax/bus.c | 3 ++-
> drivers/dax/device.c | 40 ++++++++++++++++++++++------------------
> 2 files changed, 24 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
> index 46e46047a1f7..96ca3ac85278 100644
> --- a/drivers/dax/bus.c
> +++ b/drivers/dax/bus.c
> @@ -414,7 +414,8 @@ struct dev_dax *__devm_create_dev_dax(struct
> dax_region *dax_region, int id,
> if (!dev_dax)
> return ERR_PTR(-ENOMEM);
>
> - memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
> + if (pgmap)
> + memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
>
> /*
> * No 'host' or dax_operations since there is no access to this
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index 113a554de3ee..aa38f5ff180a 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -14,6 +14,12 @@
> #include "dax-private.h"
> #include "bus.h"
>
> +static int dax_is_pfn_special(struct dev_dax *dev_dax)
> +{
> + return (dev_dax->region->pfn_flags &
> + (PFN_DEV|PFN_SPECIAL)) == (PFN_DEV|PFN_SPECIAL);
> +}
> +
> static int dax_is_pfn_dev(struct dev_dax *dev_dax)
> {
> return (dev_dax->region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV;
> @@ -104,6 +110,7 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax
> *dev_dax,
> struct dax_region *dax_region;
> phys_addr_t phys;
> unsigned int fault_size = PAGE_SIZE;
> + int rc;
>
> if (check_vma(dev_dax, vmf->vma, __func__))
> return VM_FAULT_SIGBUS;
> @@ -126,7 +133,12 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax
> *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
> + if (dax_is_pfn_special(dev_dax))
> + rc = vmf_insert_pfn(vmf->vma, vmf->address,
> pfn_t_to_pfn(*pfn));
> + else
> + rc = vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
> +
> + return rc;
> }
>
> static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
> @@ -149,12 +161,6 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax
> *dev_dax,
> return VM_FAULT_SIGBUS;
> }
>
> - /* dax pmd mappings require pfn_t_devmap() */
> - if (!dax_is_pfn_map(dev_dax)) {
> - dev_dbg(dev, "region lacks devmap flags\n");
> - return VM_FAULT_SIGBUS;
> - }
> -
> if (fault_size < dax_region->align)
> return VM_FAULT_SIGBUS;
> else if (fault_size > dax_region->align)
> @@ -199,12 +205,6 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax
> *dev_dax,
> return VM_FAULT_SIGBUS;
> }
>
> - /* dax pud mappings require pfn_t_devmap() */
> - if (!dax_is_pfn_map(dev_dax)) {
> - dev_dbg(dev, "region lacks devmap flags\n");
> - return VM_FAULT_SIGBUS;
> - }
> -
> if (fault_size < dax_region->align)
> return VM_FAULT_SIGBUS;
> else if (fault_size > dax_region->align)
> @@ -266,7 +266,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault
> *vmf,
> rc = VM_FAULT_SIGBUS;
> }
>
> - if (rc == VM_FAULT_NOPAGE) {
> + if (dax_is_pfn_map(dev_dax) && (rc == VM_FAULT_NOPAGE)) {
> unsigned long i;
> pgoff_t pgoff;
>
> @@ -344,6 +344,8 @@ static int dax_mmap(struct file *filp, struct
> vm_area_struct *vma)
>
> vma->vm_ops = &dax_vm_ops;
> vma->vm_flags |= VM_HUGEPAGE;
> + if (dax_is_pfn_special(dev_dax))
> + vma->vm_flags |= VM_PFNMAP;
> return 0;
> }
>
> @@ -450,10 +452,12 @@ int dev_dax_probe(struct device *dev)
> return -EBUSY;
> }
>
> - dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
> - addr = devm_memremap_pages(dev, &dev_dax->pgmap);
> - if (IS_ERR(addr))
> - return PTR_ERR(addr);
> + if (dax_is_pfn_map(dev_dax)) {
> + dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
> + addr = devm_memremap_pages(dev, &dev_dax->pgmap);
> + if (IS_ERR(addr))
> + return PTR_ERR(addr);
> + }
>
> inode = dax_inode(dax_dev);
> cdev = inode->i_cdev;
> --
> 2.17.1
>
> ------------------------------
>
> Subject: Digest Footer
>
> _______________________________________________
> Linux-nvdimm mailing list -- linux-nvdimm(a)lists.01.org
> To unsubscribe send an email to linux-nvdimm-leave(a)lists.01.org
>
>
> ------------------------------
>
> End of Linux-nvdimm Digest, Vol 64, Issue 18
> ********************************************
>
1 year
[ndctl PATCH] ndctl/namespace: Fix destroy-namespace accounting
relative to seed devices
by Dan Williams
Seed namespaces are included in "ndctl destroy-namespace all". However
since the user never "creates" them it is surprising to see
"destroy-namespace" report 1 more namespace relative to the number that
have been created. Catch attempts to destroy a zero-sized namespace:
Before:
# ndctl create-namespace -s 500M
{
"dev":"namespace1.0",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1"
}
# ndctl create-namespace -s 500M
{
"dev":"namespace1.1",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.1"
}
# ndctl create-namespace -s 500M
{
"dev":"namespace1.2",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.2"
}
# ndctl destroy-namespace -r 1 all -f
destroyed 4 namespaces
After:
# ndctl create-namespace -s 500M
{
"dev":"namespace1.0",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1"
}
# ndctl create-namespace -s 500M
{
"dev":"namespace1.3",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.3"
}
# ndctl create-namespace -s 500M
{
"dev":"namespace1.1",
"size":"492.00 MiB (515.90 MB)",
"blockdev":"pmem1.1"
}
# ndctl destroy-namespace -r 1 all -f
destroyed 3 namespaces
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
---
ndctl/namespace.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/ndctl/namespace.c b/ndctl/namespace.c
index 2f463509f8ca..994b4e8791ea 100644
--- a/ndctl/namespace.c
+++ b/ndctl/namespace.c
@@ -907,6 +907,7 @@ static int namespace_destroy(struct ndctl_region *region,
struct ndctl_pfn *pfn = ndctl_namespace_get_pfn(ndns);
struct ndctl_dax *dax = ndctl_namespace_get_dax(ndns);
struct ndctl_btt *btt = ndctl_namespace_get_btt(ndns);
+ unsigned long long size;
bool did_zero = false;
int rc;
@@ -953,10 +954,19 @@ static int namespace_destroy(struct ndctl_region *region,
goto out;
}
+ size = ndctl_namespace_get_size(ndns);
+
rc = ndctl_namespace_delete(ndns);
if (rc)
debug("%s: failed to reclaim\n", devname);
+ /*
+ * Don't report a destroyed namespace when no capacity was
+ * allocated.
+ */
+ if (size == 0 && rc == 0)
+ rc = 1;
+
out:
return rc;
}
1 year
[PATCH] daxctl: Change region input type from INTEGER to STRING.
by Li, Redhairer
Allow daxctl to accept both <region-id>, and region name as region parameter.
For example:
daxctl list -r region5
daxctl list -r 5
Link: https://github.com/pmem/ndctl/issues/109
Signed-off-by: Redhairer Li <redhairer.li(a)intel.com>
---
daxctl/device.c | 11 ++++-------
daxctl/list.c | 14 ++++++--------
util/filter.c | 16 ++++++++++++++++
util/filter.h | 2 ++
4 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/daxctl/device.c b/daxctl/device.c
index 72e506e..d9db2f9 100644
--- a/daxctl/device.c
+++ b/daxctl/device.c
@@ -19,15 +19,13 @@
static struct {
const char *dev;
const char *mode;
- int region_id;
+ const char *region;
bool no_online;
bool no_movable;
bool force;
bool human;
bool verbose;
-} param = {
- .region_id = -1,
-};
+} param;
enum dev_mode {
DAXCTL_DEV_MODE_UNKNOWN,
@@ -51,7 +49,7 @@ enum device_action {
};
#define BASE_OPTIONS() \
-OPT_INTEGER('r', "region", ¶m.region_id, "restrict to the given region"), \
+OPT_STRING('r', "region", ¶m.region, "region-id", "filter by region"), \
OPT_BOOLEAN('u', "human", ¶m.human, "use human friendly number formats"), \
OPT_BOOLEAN('v', "verbose", ¶m.verbose, "emit more debug messages")
@@ -484,8 +482,7 @@ static int do_xaction_device(const char *device, enum device_action action,
*processed = 0;
daxctl_region_foreach(ctx, region) {
- if (param.region_id >= 0 && param.region_id
- != daxctl_region_get_id(region))
+ if (!util_daxctl_region_filter(region, device))
continue;
daxctl_dev_foreach(region, dev) {
diff --git a/daxctl/list.c b/daxctl/list.c
index e56300d..6c6251b 100644
--- a/daxctl/list.c
+++ b/daxctl/list.c
@@ -44,10 +44,8 @@ static unsigned long listopts_to_flags(void)
static struct {
const char *dev;
- int region_id;
-} param = {
- .region_id = -1,
-};
+ const char *region;
+} param;
static int did_fail;
@@ -66,7 +64,8 @@ static int num_list_flags(void)
int cmd_list(int argc, const char **argv, struct daxctl_ctx *ctx)
{
const struct option options[] = {
- OPT_INTEGER('r', "region", ¶m.region_id, "filter by region"),
+ OPT_STRING('r', "region", ¶m.region, "region-id",
+ "filter by region"),
OPT_STRING('d', "dev", ¶m.dev, "dev-id",
"filter by dax device instance name"),
OPT_BOOLEAN('D', "devices", &list.devs, "include dax device info"),
@@ -94,7 +93,7 @@ int cmd_list(int argc, const char **argv, struct daxctl_ctx *ctx)
usage_with_options(u, options);
if (num_list_flags() == 0) {
- list.regions = param.region_id >= 0;
+ list.regions = !!param.region;
list.devs = !!param.dev;
}
@@ -106,8 +105,7 @@ int cmd_list(int argc, const char **argv, struct daxctl_ctx *ctx)
daxctl_region_foreach(ctx, region) {
struct json_object *jregion = NULL;
- if (param.region_id >= 0 && param.region_id
- != daxctl_region_get_id(region))
+ if (!util_daxctl_region_filter(region, param.region))
continue;
if (list.regions) {
diff --git a/util/filter.c b/util/filter.c
index 1734bce..877d6c7 100644
--- a/util/filter.c
+++ b/util/filter.c
@@ -335,6 +335,22 @@ struct daxctl_dev *util_daxctl_dev_filter(struct daxctl_dev *dev,
return NULL;
}
+struct daxctl_region *util_daxctl_region_filter(struct daxctl_region *region,
+ const char *ident)
+{
+ int region_id;
+
+ if (!ident || strcmp(ident, "all") == 0)
+ return region;
+
+ if ((sscanf(ident, "%d", ®ion_id) == 1
+ || sscanf(ident, "region%d", ®ion_id) == 1)
+ && daxctl_region_get_id(region) == region_id)
+ return region;
+
+ return NULL;
+}
+
static enum ndctl_namespace_mode mode_to_type(const char *mode)
{
if (!mode)
diff --git a/util/filter.h b/util/filter.h
index c2cdddf..0c12b94 100644
--- a/util/filter.h
+++ b/util/filter.h
@@ -37,6 +37,8 @@ struct ndctl_region *util_region_filter_by_namespace(struct ndctl_region *region
const char *ident);
struct daxctl_dev *util_daxctl_dev_filter(struct daxctl_dev *dev,
const char *ident);
+struct daxctl_region *util_daxctl_region_filter(struct daxctl_region *region,
+ const char *ident);
struct json_object;
--
2.20.1.windows.1
1 year