mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 00:20:52 -05:00
DAX error handling for 4.7
- Until now, dax has been disabled if media errors were found on any device. This enables the use of DAX in the presence of these errors by making all sector-aligned zeroing go through the driver. - The driver (already) has the ability to clear errors on writes that are sent through the block layer using 'DSMs' defined in ACPI 6.1. Other misc changes: - When mounting DAX filesystems, check to make sure the partition is page aligned. This is a requirement for DAX, and previously, we allowed such unaligned mounts to succeed, but subsequent reads/writes would fail. - Misc/cleanup fixes from Jan that remove unused code from DAX related to zeroing, writeback, and some size checks. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJXQ4GKAAoJEHr6Yb6juE3/zowP/iclIhgXXXMQJRUHJlePMXC8 15sGZ32JS1ak9g7vrsmNVEDNynfNtiMYdBxtUyRuj6xqgwdZvFk3F55KOCPtaeA1 +yADkgeRkTAcwzmHw9WQVEzBCqyzSisdrwtEfH817qdq9FJdH66x2Kos6i+HeAVr 5Q/e4gs7lKrjf384/QBl+wxNZOndJaQAPd2VRHQqx2A9F33v0ljdwRaUG1r4fjK2 dtmhcZCqdQyuAGXW3piTnZc5ZFc3DPqO4FkEfqkEK3lFOflK0fd8wMsAZRp/Jd0j GJsgnVSWSqG0Dz476djlG0w8t2p5Jv1g9cKChV+ZZEdFLKWHCOUFqXNj8uI8I4k5 cOEKCHyJ3IwfSHhNQqktEWrQN4T8ZXhWtuc9GuV4UZYuqJqHci6EdR/YsWsJjV+L lm/qvK4ipDS1pivxOy8KX/iN0z7Io8J9GXpStDx3g8iWjLlh4YYlbJLWeeRepo/z aPlV/QAKcHiGY6jzLExrZIyCWkzwo6O+0p1Kxerv9/7K/32HWbOodZ+tC8eD+N25 pV69nCGf+u50T2TtIx1+iann4NC1r7zg5yqnT9AgpyZpiwR5joCDzI5sXW+D0rcS vPtfM84Ccdeq/e6mvfIpZgR0/npQapKnrmUest0J7P2BFPHiFPji1KzZ7M+1aFOo 9R6JdrAj0Sc+FBa+cGzH =v6Of -----END PGP SIGNATURE----- Merge tag 'dax-misc-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm Pull misc DAX updates from Vishal Verma: "DAX error handling for 4.7 - Until now, dax has been disabled if media errors were found on any device. This enables the use of DAX in the presence of these errors by making all sector-aligned zeroing go through the driver. - The driver (already) has the ability to clear errors on writes that are sent through the block layer using 'DSMs' defined in ACPI 6.1. Other misc changes: - When mounting DAX filesystems, check to make sure the partition is page aligned. This is a requirement for DAX, and previously, we allowed such unaligned mounts to succeed, but subsequent reads/writes would fail. - Misc/cleanup fixes from Jan that remove unused code from DAX related to zeroing, writeback, and some size checks" * tag 'dax-misc-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: dax: fix a comment in dax_zero_page_range and dax_truncate_page dax: for truncate/hole-punch, do zeroing through the driver if possible dax: export a low-level __dax_zero_page_range helper dax: use sb_issue_zerout instead of calling dax_clear_sectors dax: enable dax in the presence of known media errors (badblocks) dax: fallback from pmd to pte on error block: Update blkdev_dax_capable() for consistency xfs: Add alignment check for DAX mount ext2: Add alignment check for DAX mount ext4: Add alignment check for DAX mount block: Add bdev_dax_supported() for dax mount checks block: Add vfs_msg() interface dax: Remove redundant inode size checks dax: Remove pointless writeback from dax_do_io() dax: Remove zeroing from dax_io() dax: Remove dead zeroing code from fault handlers ext2: Avoid DAX zeroing to corrupt data ext2: Fix block zeroing in ext2_get_blocks() for DAX dax: Remove complete_unwritten argument DAX: move RADIX_DAX_ definitions to dax.c
This commit is contained in:
commit
315227f6da
19 changed files with 246 additions and 293 deletions
|
@ -79,6 +79,38 @@ These filesystems may be used for inspiration:
|
|||
- ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt
|
||||
|
||||
|
||||
Handling Media Errors
|
||||
---------------------
|
||||
|
||||
The libnvdimm subsystem stores a record of known media error locations for
|
||||
each pmem block device (in gendisk->badblocks). If we fault at such location,
|
||||
or one with a latent error not yet discovered, the application can expect
|
||||
to receive a SIGBUS. Libnvdimm also allows clearing of these errors by simply
|
||||
writing the affected sectors (through the pmem driver, and if the underlying
|
||||
NVDIMM supports the clear_poison DSM defined by ACPI).
|
||||
|
||||
Since DAX IO normally doesn't go through the driver/bio path, applications or
|
||||
sysadmins have an option to restore the lost data from a prior backup/inbuilt
|
||||
redundancy in the following ways:
|
||||
|
||||
1. Delete the affected file, and restore from a backup (sysadmin route):
|
||||
This will free the file system blocks that were being used by the file,
|
||||
and the next time they're allocated, they will be zeroed first, which
|
||||
happens through the driver, and will clear bad sectors.
|
||||
|
||||
2. Truncate or hole-punch the part of the file that has a bad-block (at least
|
||||
an entire aligned sector has to be hole-punched, but not necessarily an
|
||||
entire filesystem block).
|
||||
|
||||
These are the two basic paths that allow DAX filesystems to continue operating
|
||||
in the presence of media errors. More robust error recovery mechanisms can be
|
||||
built on top of this in the future, for example, involving redundancy/mirroring
|
||||
provided at the block layer through DM, or additionally, at the filesystem
|
||||
level. These would have to rely on the above two tenets, that error clearing
|
||||
can happen either by sending an IO through the driver, or zeroing (also through
|
||||
the driver).
|
||||
|
||||
|
||||
Shortcomings
|
||||
------------
|
||||
|
||||
|
|
|
@ -143,7 +143,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
|
|||
*/
|
||||
static long
|
||||
axon_ram_direct_access(struct block_device *device, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn)
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct axon_ram_bank *bank = device->bd_disk->private_data;
|
||||
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include <linux/gfp.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/badblocks.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
|
|
|
@ -381,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
|||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
static long brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn)
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct brd_device *brd = bdev->bd_disk->private_data;
|
||||
struct page *page;
|
||||
|
|
|
@ -164,14 +164,22 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
|
|||
}
|
||||
|
||||
static long pmem_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void __pmem **kaddr, pfn_t *pfn)
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct pmem_device *pmem = bdev->bd_queue->queuedata;
|
||||
resource_size_t offset = sector * 512 + pmem->data_offset;
|
||||
|
||||
if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
|
||||
return -EIO;
|
||||
*kaddr = pmem->virt_addr + offset;
|
||||
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
|
||||
|
||||
/*
|
||||
* If badblocks are present, limit known good range to the
|
||||
* requested range.
|
||||
*/
|
||||
if (unlikely(pmem->bb.count))
|
||||
return size;
|
||||
return pmem->size - pmem->pfn_pad - offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
|
|||
static blk_qc_t dcssblk_make_request(struct request_queue *q,
|
||||
struct bio *bio);
|
||||
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
|
||||
void __pmem **kaddr, pfn_t *pfn);
|
||||
void __pmem **kaddr, pfn_t *pfn, long size);
|
||||
|
||||
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
|
||||
|
||||
|
@ -884,7 +884,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
|
|||
|
||||
static long
|
||||
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
|
||||
void __pmem **kaddr, pfn_t *pfn)
|
||||
void __pmem **kaddr, pfn_t *pfn, long size)
|
||||
{
|
||||
struct dcssblk_dev_info *dev_info;
|
||||
unsigned long offset, dev_sz;
|
||||
|
|
114
fs/block_dev.c
114
fs/block_dev.c
|
@ -51,6 +51,18 @@ struct block_device *I_BDEV(struct inode *inode)
|
|||
}
|
||||
EXPORT_SYMBOL(I_BDEV);
|
||||
|
||||
void __vfs_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
|
||||
{
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
vaf.fmt = fmt;
|
||||
vaf.va = &args;
|
||||
printk_ratelimited("%sVFS (%s): %pV\n", prefix, sb->s_id, &vaf);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
static void bdev_write_inode(struct block_device *bdev)
|
||||
{
|
||||
struct inode *inode = bdev->bd_inode;
|
||||
|
@ -489,7 +501,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
|
|||
sector += get_start_sect(bdev);
|
||||
if (sector % (PAGE_SIZE / 512))
|
||||
return -EINVAL;
|
||||
avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn);
|
||||
avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size);
|
||||
if (!avail)
|
||||
return -ERANGE;
|
||||
if (avail > 0 && avail & ~PAGE_MASK)
|
||||
|
@ -498,6 +510,75 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_direct_access);
|
||||
|
||||
/**
|
||||
* bdev_dax_supported() - Check if the device supports dax for filesystem
|
||||
* @sb: The superblock of the device
|
||||
* @blocksize: The block size of the device
|
||||
*
|
||||
* This is a library function for filesystems to check if the block device
|
||||
* can be mounted with dax option.
|
||||
*
|
||||
* Return: negative errno if unsupported, 0 if supported.
|
||||
*/
|
||||
int bdev_dax_supported(struct super_block *sb, int blocksize)
|
||||
{
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = 0,
|
||||
.size = PAGE_SIZE,
|
||||
};
|
||||
int err;
|
||||
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = bdev_direct_access(sb->s_bdev, &dax);
|
||||
if (err < 0) {
|
||||
switch (err) {
|
||||
case -EOPNOTSUPP:
|
||||
vfs_msg(sb, KERN_ERR,
|
||||
"error: device does not support dax");
|
||||
break;
|
||||
case -EINVAL:
|
||||
vfs_msg(sb, KERN_ERR,
|
||||
"error: unaligned partition for dax");
|
||||
break;
|
||||
default:
|
||||
vfs_msg(sb, KERN_ERR,
|
||||
"error: dax access failed (%d)", err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_dax_supported);
|
||||
|
||||
/**
|
||||
* bdev_dax_capable() - Return if the raw device is capable for dax
|
||||
* @bdev: The device for raw block device access
|
||||
*/
|
||||
bool bdev_dax_capable(struct block_device *bdev)
|
||||
{
|
||||
struct blk_dax_ctl dax = {
|
||||
.size = PAGE_SIZE,
|
||||
};
|
||||
|
||||
if (!IS_ENABLED(CONFIG_FS_DAX))
|
||||
return false;
|
||||
|
||||
dax.sector = 0;
|
||||
if (bdev_direct_access(bdev, &dax) < 0)
|
||||
return false;
|
||||
|
||||
dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512);
|
||||
if (bdev_direct_access(bdev, &dax) < 0)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* pseudo-fs
|
||||
*/
|
||||
|
@ -1160,33 +1241,6 @@ void bd_set_size(struct block_device *bdev, loff_t size)
|
|||
}
|
||||
EXPORT_SYMBOL(bd_set_size);
|
||||
|
||||
static bool blkdev_dax_capable(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
|
||||
if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the partition is not aligned on a page boundary, we can't
|
||||
* do dax I/O to it.
|
||||
*/
|
||||
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|
||||
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the device has known bad blocks, force all I/O through the
|
||||
* driver / page cache.
|
||||
*
|
||||
* TODO: support finer grained dax error handling
|
||||
*/
|
||||
if (disk->bb && disk->bb->count)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
|
||||
|
||||
/*
|
||||
|
@ -1266,7 +1320,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
|||
|
||||
if (!ret) {
|
||||
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
|
||||
if (!blkdev_dax_capable(bdev))
|
||||
if (!bdev_dax_capable(bdev))
|
||||
bdev->bd_inode->i_flags &= ~S_DAX;
|
||||
}
|
||||
|
||||
|
@ -1303,7 +1357,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
|
|||
goto out_clear;
|
||||
}
|
||||
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
|
||||
if (!blkdev_dax_capable(bdev))
|
||||
if (!bdev_dax_capable(bdev))
|
||||
bdev->bd_inode->i_flags &= ~S_DAX;
|
||||
}
|
||||
} else {
|
||||
|
|
257
fs/dax.c
257
fs/dax.c
|
@ -87,50 +87,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
|
|||
return page;
|
||||
}
|
||||
|
||||
/*
|
||||
* dax_clear_sectors() is called from within transaction context from XFS,
|
||||
* and hence this means the stack from this point must follow GFP_NOFS
|
||||
* semantics for all operations.
|
||||
*/
|
||||
int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
|
||||
{
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = _sector,
|
||||
.size = _size,
|
||||
};
|
||||
|
||||
might_sleep();
|
||||
do {
|
||||
long count, sz;
|
||||
|
||||
count = dax_map_atomic(bdev, &dax);
|
||||
if (count < 0)
|
||||
return count;
|
||||
sz = min_t(long, count, SZ_128K);
|
||||
clear_pmem(dax.addr, sz);
|
||||
dax.size -= sz;
|
||||
dax.sector += sz / 512;
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
cond_resched();
|
||||
} while (dax.size);
|
||||
|
||||
wmb_pmem();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_clear_sectors);
|
||||
|
||||
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
|
||||
static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
|
||||
loff_t pos, loff_t end)
|
||||
{
|
||||
loff_t final = end - pos + first; /* The final byte of the buffer */
|
||||
|
||||
if (first > 0)
|
||||
clear_pmem(addr, first);
|
||||
if (final < size)
|
||||
clear_pmem(addr + final, size - final);
|
||||
}
|
||||
|
||||
static bool buffer_written(struct buffer_head *bh)
|
||||
{
|
||||
return buffer_mapped(bh) && !buffer_unwritten(bh);
|
||||
|
@ -169,6 +125,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
|
|||
struct blk_dax_ctl dax = {
|
||||
.addr = (void __pmem *) ERR_PTR(-EIO),
|
||||
};
|
||||
unsigned blkbits = inode->i_blkbits;
|
||||
sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
|
||||
>> blkbits;
|
||||
|
||||
if (rw == READ)
|
||||
end = min(end, i_size_read(inode));
|
||||
|
@ -176,7 +135,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
|
|||
while (pos < end) {
|
||||
size_t len;
|
||||
if (pos == max) {
|
||||
unsigned blkbits = inode->i_blkbits;
|
||||
long page = pos >> PAGE_SHIFT;
|
||||
sector_t block = page << (PAGE_SHIFT - blkbits);
|
||||
unsigned first = pos - (block << blkbits);
|
||||
|
@ -192,6 +150,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
|
|||
bh->b_size = 1 << blkbits;
|
||||
bh_max = pos - first + bh->b_size;
|
||||
bdev = bh->b_bdev;
|
||||
/*
|
||||
* We allow uninitialized buffers for writes
|
||||
* beyond EOF as those cannot race with faults
|
||||
*/
|
||||
WARN_ON_ONCE(
|
||||
(buffer_new(bh) && block < file_blks) ||
|
||||
(rw == WRITE && buffer_unwritten(bh)));
|
||||
} else {
|
||||
unsigned done = bh->b_size -
|
||||
(bh_max - (pos - first));
|
||||
|
@ -211,11 +176,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
|
|||
rc = map_len;
|
||||
break;
|
||||
}
|
||||
if (buffer_unwritten(bh) || buffer_new(bh)) {
|
||||
dax_new_buf(dax.addr, map_len, first,
|
||||
pos, end);
|
||||
need_wmb = true;
|
||||
}
|
||||
dax.addr += first;
|
||||
size = map_len - first;
|
||||
}
|
||||
|
@ -276,15 +236,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
|
|||
memset(&bh, 0, sizeof(bh));
|
||||
bh.b_bdev = inode->i_sb->s_bdev;
|
||||
|
||||
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
|
||||
inode_lock(inode);
|
||||
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
|
||||
if (retval) {
|
||||
inode_unlock(inode);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Protects against truncate */
|
||||
if (!(flags & DIO_SKIP_DIO_COUNT))
|
||||
|
@ -305,7 +258,6 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
|
|||
|
||||
if (!(flags & DIO_SKIP_DIO_COUNT))
|
||||
inode_dio_end(inode);
|
||||
out:
|
||||
return retval;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_do_io);
|
||||
|
@ -321,20 +273,11 @@ EXPORT_SYMBOL_GPL(dax_do_io);
|
|||
static int dax_load_hole(struct address_space *mapping, struct page *page,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long size;
|
||||
struct inode *inode = mapping->host;
|
||||
if (!page)
|
||||
page = find_or_create_page(mapping, vmf->pgoff,
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page)
|
||||
return VM_FAULT_OOM;
|
||||
/* Recheck i_size under page lock to avoid truncate race */
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
vmf->page = page;
|
||||
return VM_FAULT_LOCKED;
|
||||
|
@ -565,33 +508,14 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
|
|||
.sector = to_sector(bh, inode),
|
||||
.size = bh->b_size,
|
||||
};
|
||||
pgoff_t size;
|
||||
int error;
|
||||
|
||||
i_mmap_lock_read(mapping);
|
||||
|
||||
/*
|
||||
* Check truncate didn't happen while we were allocating a block.
|
||||
* If it did, this block may or may not be still allocated to the
|
||||
* file. We can't tell the filesystem to free it because we can't
|
||||
* take i_mutex here. In the worst case, the file still has blocks
|
||||
* allocated past the end of the file.
|
||||
*/
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(vmf->pgoff >= size)) {
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (dax_map_atomic(bdev, &dax) < 0) {
|
||||
error = PTR_ERR(dax.addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (buffer_unwritten(bh) || buffer_new(bh)) {
|
||||
clear_pmem(dax.addr, PAGE_SIZE);
|
||||
wmb_pmem();
|
||||
}
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
|
||||
error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
|
||||
|
@ -612,19 +536,13 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
|
|||
* @vma: The virtual memory area where the fault occurred
|
||||
* @vmf: The description of the fault
|
||||
* @get_block: The filesystem method used to translate file offsets to blocks
|
||||
* @complete_unwritten: The filesystem method used to convert unwritten blocks
|
||||
* to written so the data written to them is exposed. This is required for
|
||||
* required by write faults for filesystems that will return unwritten
|
||||
* extent mappings from @get_block, but it is optional for reads as
|
||||
* dax_insert_mapping() will always zero unwritten blocks. If the fs does
|
||||
* not support unwritten extents, the it should pass NULL.
|
||||
*
|
||||
* When a page fault occurs, filesystems may call this helper in their
|
||||
* fault handler for DAX files. __dax_fault() assumes the caller has done all
|
||||
* the necessary locking for the page fault to proceed successfully.
|
||||
*/
|
||||
int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
get_block_t get_block, dax_iodone_t complete_unwritten)
|
||||
get_block_t get_block)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
|
@ -659,15 +577,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
put_page(page);
|
||||
goto repeat;
|
||||
}
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (unlikely(vmf->pgoff >= size)) {
|
||||
/*
|
||||
* We have a struct page covering a hole in the file
|
||||
* from a read fault and we've raced with a truncate
|
||||
*/
|
||||
error = -EIO;
|
||||
goto unlock_page;
|
||||
}
|
||||
}
|
||||
|
||||
error = get_block(inode, block, &bh, 0);
|
||||
|
@ -700,17 +609,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
if (error)
|
||||
goto unlock_page;
|
||||
vmf->page = page;
|
||||
if (!page) {
|
||||
if (!page)
|
||||
i_mmap_lock_read(mapping);
|
||||
/* Check we didn't race with truncate */
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >>
|
||||
PAGE_SHIFT;
|
||||
if (vmf->pgoff >= size) {
|
||||
i_mmap_unlock_read(mapping);
|
||||
error = -EIO;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
return VM_FAULT_LOCKED;
|
||||
}
|
||||
|
||||
|
@ -727,23 +627,9 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
page = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we successfully insert the new mapping over an unwritten extent,
|
||||
* we need to ensure we convert the unwritten extent. If there is an
|
||||
* error inserting the mapping, the filesystem needs to leave it as
|
||||
* unwritten to prevent exposure of the stale underlying data to
|
||||
* userspace, but we still need to call the completion function so
|
||||
* the private resources on the mapping buffer can be released. We
|
||||
* indicate what the callback should do via the uptodate variable, same
|
||||
* as for normal BH based IO completions.
|
||||
*/
|
||||
/* Filesystem should not return unwritten buffers to us! */
|
||||
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
|
||||
error = dax_insert_mapping(inode, &bh, vma, vmf);
|
||||
if (buffer_unwritten(&bh)) {
|
||||
if (complete_unwritten)
|
||||
complete_unwritten(&bh, !error);
|
||||
else
|
||||
WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
|
||||
}
|
||||
|
||||
out:
|
||||
if (error == -ENOMEM)
|
||||
|
@ -772,7 +658,7 @@ EXPORT_SYMBOL(__dax_fault);
|
|||
* fault handler for DAX files.
|
||||
*/
|
||||
int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
get_block_t get_block, dax_iodone_t complete_unwritten)
|
||||
get_block_t get_block)
|
||||
{
|
||||
int result;
|
||||
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
|
||||
|
@ -781,7 +667,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
|
|||
sb_start_pagefault(sb);
|
||||
file_update_time(vma->vm_file);
|
||||
}
|
||||
result = __dax_fault(vma, vmf, get_block, complete_unwritten);
|
||||
result = __dax_fault(vma, vmf, get_block);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
sb_end_pagefault(sb);
|
||||
|
||||
|
@ -815,8 +701,7 @@ static void __dax_dbg(struct buffer_head *bh, unsigned long address,
|
|||
#define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd")
|
||||
|
||||
int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmd, unsigned int flags, get_block_t get_block,
|
||||
dax_iodone_t complete_unwritten)
|
||||
pmd_t *pmd, unsigned int flags, get_block_t get_block)
|
||||
{
|
||||
struct file *file = vma->vm_file;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
|
@ -875,6 +760,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
if (get_block(inode, block, &bh, 1) != 0)
|
||||
return VM_FAULT_SIGBUS;
|
||||
alloc = true;
|
||||
WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh));
|
||||
}
|
||||
|
||||
bdev = bh.b_bdev;
|
||||
|
@ -902,23 +788,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
|
||||
i_mmap_lock_read(mapping);
|
||||
|
||||
/*
|
||||
* If a truncate happened while we were allocating blocks, we may
|
||||
* leave blocks allocated to the file that are beyond EOF. We can't
|
||||
* take i_mutex here, so just leave them hanging; they'll be freed
|
||||
* when the file is deleted.
|
||||
*/
|
||||
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
if (pgoff >= size) {
|
||||
result = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
}
|
||||
if ((pgoff | PG_PMD_COLOUR) >= size) {
|
||||
dax_pmd_dbg(&bh, address,
|
||||
"offset + huge page size > file size");
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
|
||||
spinlock_t *ptl;
|
||||
pmd_t entry;
|
||||
|
@ -954,8 +823,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
long length = dax_map_atomic(bdev, &dax);
|
||||
|
||||
if (length < 0) {
|
||||
result = VM_FAULT_SIGBUS;
|
||||
goto out;
|
||||
dax_pmd_dbg(&bh, address, "dax-error fallback");
|
||||
goto fallback;
|
||||
}
|
||||
if (length < PMD_SIZE) {
|
||||
dax_pmd_dbg(&bh, address, "dax-length too small");
|
||||
|
@ -973,14 +842,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
dax_pmd_dbg(&bh, address, "pfn not in memmap");
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
|
||||
clear_pmem(dax.addr, PMD_SIZE);
|
||||
wmb_pmem();
|
||||
count_vm_event(PGMAJFAULT);
|
||||
mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
|
||||
result |= VM_FAULT_MAJOR;
|
||||
}
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
|
||||
/*
|
||||
|
@ -1020,9 +881,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
out:
|
||||
i_mmap_unlock_read(mapping);
|
||||
|
||||
if (buffer_unwritten(&bh))
|
||||
complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
|
||||
|
||||
return result;
|
||||
|
||||
fallback:
|
||||
|
@ -1042,8 +900,7 @@ EXPORT_SYMBOL_GPL(__dax_pmd_fault);
|
|||
* pmd_fault handler for DAX files.
|
||||
*/
|
||||
int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
||||
pmd_t *pmd, unsigned int flags, get_block_t get_block,
|
||||
dax_iodone_t complete_unwritten)
|
||||
pmd_t *pmd, unsigned int flags, get_block_t get_block)
|
||||
{
|
||||
int result;
|
||||
struct super_block *sb = file_inode(vma->vm_file)->i_sb;
|
||||
|
@ -1052,8 +909,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
|
|||
sb_start_pagefault(sb);
|
||||
file_update_time(vma->vm_file);
|
||||
}
|
||||
result = __dax_pmd_fault(vma, address, pmd, flags, get_block,
|
||||
complete_unwritten);
|
||||
result = __dax_pmd_fault(vma, address, pmd, flags, get_block);
|
||||
if (flags & FAULT_FLAG_WRITE)
|
||||
sb_end_pagefault(sb);
|
||||
|
||||
|
@ -1091,6 +947,43 @@ int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
|
||||
|
||||
static bool dax_range_is_aligned(struct block_device *bdev,
|
||||
unsigned int offset, unsigned int length)
|
||||
{
|
||||
unsigned short sector_size = bdev_logical_block_size(bdev);
|
||||
|
||||
if (!IS_ALIGNED(offset, sector_size))
|
||||
return false;
|
||||
if (!IS_ALIGNED(length, sector_size))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
|
||||
unsigned int offset, unsigned int length)
|
||||
{
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = sector,
|
||||
.size = PAGE_SIZE,
|
||||
};
|
||||
|
||||
if (dax_range_is_aligned(bdev, offset, length)) {
|
||||
sector_t start_sector = dax.sector + (offset >> 9);
|
||||
|
||||
return blkdev_issue_zeroout(bdev, start_sector,
|
||||
length >> 9, GFP_NOFS, true);
|
||||
} else {
|
||||
if (dax_map_atomic(bdev, &dax) < 0)
|
||||
return PTR_ERR(dax.addr);
|
||||
clear_pmem(dax.addr + offset, length);
|
||||
wmb_pmem();
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__dax_zero_page_range);
|
||||
|
||||
/**
|
||||
* dax_zero_page_range - zero a range within a page of a DAX file
|
||||
* @inode: The file being truncated
|
||||
|
@ -1102,12 +995,6 @@ EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
|
|||
* page in a DAX file. This is intended for hole-punch operations. If
|
||||
* you are truncating a file, the helper function dax_truncate_page() may be
|
||||
* more convenient.
|
||||
*
|
||||
* We work in terms of PAGE_SIZE here for commonality with
|
||||
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
|
||||
* took care of disposing of the unnecessary blocks. Even if the filesystem
|
||||
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
|
||||
* since the file might be mmapped.
|
||||
*/
|
||||
int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
|
||||
get_block_t get_block)
|
||||
|
@ -1126,23 +1013,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
|
|||
bh.b_bdev = inode->i_sb->s_bdev;
|
||||
bh.b_size = PAGE_SIZE;
|
||||
err = get_block(inode, index, &bh, 0);
|
||||
if (err < 0)
|
||||
if (err < 0 || !buffer_written(&bh))
|
||||
return err;
|
||||
if (buffer_written(&bh)) {
|
||||
struct block_device *bdev = bh.b_bdev;
|
||||
struct blk_dax_ctl dax = {
|
||||
.sector = to_sector(&bh, inode),
|
||||
.size = PAGE_SIZE,
|
||||
};
|
||||
|
||||
if (dax_map_atomic(bdev, &dax) < 0)
|
||||
return PTR_ERR(dax.addr);
|
||||
clear_pmem(dax.addr + offset, length);
|
||||
wmb_pmem();
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
}
|
||||
|
||||
return 0;
|
||||
return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode),
|
||||
offset, length);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
||||
|
||||
|
@ -1154,12 +1029,6 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
|
|||
*
|
||||
* Similar to block_truncate_page(), this function can be called by a
|
||||
* filesystem when it is truncating a DAX file to handle the partial page.
|
||||
*
|
||||
* We work in terms of PAGE_SIZE here for commonality with
|
||||
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
|
||||
* took care of disposing of the unnecessary blocks. Even if the filesystem
|
||||
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
|
||||
* since the file might be mmapped.
|
||||
*/
|
||||
int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
|
||||
{
|
||||
|
|
|
@ -51,7 +51,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
}
|
||||
down_read(&ei->dax_sem);
|
||||
|
||||
ret = __dax_fault(vma, vmf, ext2_get_block, NULL);
|
||||
ret = __dax_fault(vma, vmf, ext2_get_block);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
|
@ -72,7 +72,7 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
|||
}
|
||||
down_read(&ei->dax_sem);
|
||||
|
||||
ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL);
|
||||
ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
if (flags & FAULT_FLAG_WRITE)
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/highuid.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/buffer_head.h>
|
||||
|
@ -737,19 +738,18 @@ static int ext2_get_blocks(struct inode *inode,
|
|||
* so that it's not found by another thread before it's
|
||||
* initialised
|
||||
*/
|
||||
err = dax_clear_sectors(inode->i_sb->s_bdev,
|
||||
le32_to_cpu(chain[depth-1].key) <<
|
||||
(inode->i_blkbits - 9),
|
||||
1 << inode->i_blkbits);
|
||||
err = sb_issue_zeroout(inode->i_sb,
|
||||
le32_to_cpu(chain[depth-1].key), count,
|
||||
GFP_NOFS);
|
||||
if (err) {
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
} else
|
||||
set_buffer_new(bh_result);
|
||||
|
||||
ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
|
||||
mutex_unlock(&ei->truncate_mutex);
|
||||
set_buffer_new(bh_result);
|
||||
got_it:
|
||||
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
|
||||
if (count > blocks_to_boundary)
|
||||
|
|
|
@ -922,16 +922,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
|
|||
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
|
||||
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
ext2_msg(sb, KERN_ERR,
|
||||
"error: unsupported blocksize for dax");
|
||||
err = bdev_dax_supported(sb, blocksize);
|
||||
if (err)
|
||||
goto failed_mount;
|
||||
}
|
||||
if (!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
ext2_msg(sb, KERN_ERR,
|
||||
"error: device does not support dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the blocksize doesn't match, re-read the thing.. */
|
||||
|
|
|
@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|||
if (IS_ERR(handle))
|
||||
result = VM_FAULT_SIGBUS;
|
||||
else
|
||||
result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL);
|
||||
result = __dax_fault(vma, vmf, ext4_dax_get_block);
|
||||
|
||||
if (write) {
|
||||
if (!IS_ERR(handle))
|
||||
|
@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
|||
result = VM_FAULT_SIGBUS;
|
||||
else
|
||||
result = __dax_pmd_fault(vma, addr, pmd, flags,
|
||||
ext4_dax_get_block, NULL);
|
||||
ext4_dax_get_block);
|
||||
|
||||
if (write) {
|
||||
if (!IS_ERR(handle))
|
||||
|
|
|
@ -3417,16 +3417,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
|||
}
|
||||
|
||||
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
|
||||
if (blocksize != PAGE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"error: unsupported blocksize for dax");
|
||||
err = bdev_dax_supported(sb, blocksize);
|
||||
if (err)
|
||||
goto failed_mount;
|
||||
}
|
||||
if (!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"error: device does not support dax");
|
||||
goto failed_mount;
|
||||
}
|
||||
}
|
||||
|
||||
if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
|
||||
|
|
|
@ -72,18 +72,11 @@ xfs_zero_extent(
|
|||
struct xfs_mount *mp = ip->i_mount;
|
||||
xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
|
||||
sector_t block = XFS_BB_TO_FSBT(mp, sector);
|
||||
ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
|
||||
|
||||
if (IS_DAX(VFS_I(ip)))
|
||||
return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
|
||||
sector, size);
|
||||
|
||||
/*
|
||||
* let the block layer decide on the fastest method of
|
||||
* implementing the zeroing.
|
||||
*/
|
||||
return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS);
|
||||
|
||||
return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
|
||||
block << (mp->m_super->s_blocksize_bits - 9),
|
||||
count_fsb << (mp->m_super->s_blocksize_bits - 9),
|
||||
GFP_NOFS, true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1551,7 +1551,7 @@ xfs_filemap_page_mkwrite(
|
|||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (IS_DAX(inode)) {
|
||||
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL);
|
||||
ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
|
||||
} else {
|
||||
ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
|
||||
ret = block_page_mkwrite_return(ret);
|
||||
|
@ -1585,7 +1585,7 @@ xfs_filemap_fault(
|
|||
* changes to xfs_get_blocks_direct() to map unwritten extent
|
||||
* ioend for conversion on read-only mappings.
|
||||
*/
|
||||
ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL);
|
||||
ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
|
||||
} else
|
||||
ret = filemap_fault(vma, vmf);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
@ -1622,8 +1622,7 @@ xfs_filemap_pmd_fault(
|
|||
}
|
||||
|
||||
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault,
|
||||
NULL);
|
||||
ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault);
|
||||
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
|
||||
|
||||
if (flags & FAULT_FLAG_WRITE)
|
||||
|
|
|
@ -1555,14 +1555,12 @@ xfs_fs_fill_super(
|
|||
|
||||
if (mp->m_flags & XFS_MOUNT_DAX) {
|
||||
xfs_warn(mp,
|
||||
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
|
||||
if (sb->s_blocksize != PAGE_SIZE) {
|
||||
"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
|
||||
|
||||
error = bdev_dax_supported(sb, sb->s_blocksize);
|
||||
if (error) {
|
||||
xfs_alert(mp,
|
||||
"Filesystem block size invalid for DAX Turning DAX off.");
|
||||
mp->m_flags &= ~XFS_MOUNT_DAX;
|
||||
} else if (!sb->s_bdev->bd_disk->fops->direct_access) {
|
||||
xfs_alert(mp,
|
||||
"Block device does not support DAX Turning DAX off.");
|
||||
"DAX unsupported by block device. Turning off DAX.");
|
||||
mp->m_flags &= ~XFS_MOUNT_DAX;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -768,6 +768,17 @@ static inline void rq_flush_dcache_pages(struct request *rq)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
#define vfs_msg(sb, level, fmt, ...) \
|
||||
__vfs_msg(sb, level, fmt, ##__VA_ARGS__)
|
||||
#else
|
||||
#define vfs_msg(sb, level, fmt, ...) \
|
||||
do { \
|
||||
no_printk(fmt, ##__VA_ARGS__); \
|
||||
__vfs_msg(sb, "", " "); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
extern int blk_register_queue(struct gendisk *disk);
|
||||
extern void blk_unregister_queue(struct gendisk *disk);
|
||||
extern blk_qc_t generic_make_request(struct bio *bio);
|
||||
|
@ -1660,7 +1671,7 @@ struct block_device_operations {
|
|||
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
|
||||
long (*direct_access)(struct block_device *, sector_t, void __pmem **,
|
||||
pfn_t *);
|
||||
pfn_t *, long);
|
||||
unsigned int (*check_events) (struct gendisk *disk,
|
||||
unsigned int clearing);
|
||||
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
|
||||
|
@ -1680,6 +1691,8 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *);
|
|||
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
|
||||
struct writeback_control *);
|
||||
extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *);
|
||||
extern int bdev_dax_supported(struct super_block *, int);
|
||||
extern bool bdev_dax_capable(struct block_device *);
|
||||
#else /* CONFIG_BLOCK */
|
||||
|
||||
struct block_device;
|
||||
|
|
|
@ -7,41 +7,44 @@
|
|||
|
||||
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *,
|
||||
get_block_t, dio_iodone_t, int flags);
|
||||
int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
|
||||
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
|
||||
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
|
||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
|
||||
dax_iodone_t);
|
||||
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
|
||||
dax_iodone_t);
|
||||
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
|
||||
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
|
||||
int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
|
||||
unsigned int offset, unsigned int length);
|
||||
#else
|
||||
static inline struct page *read_dax_sector(struct block_device *bdev,
|
||||
sector_t n)
|
||||
{
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
static inline int __dax_zero_page_range(struct block_device *bdev,
|
||||
sector_t sector, unsigned int offset, unsigned int length)
|
||||
{
|
||||
return -ENXIO;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
|
||||
unsigned int flags, get_block_t, dax_iodone_t);
|
||||
unsigned int flags, get_block_t);
|
||||
int __dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
|
||||
unsigned int flags, get_block_t, dax_iodone_t);
|
||||
unsigned int flags, get_block_t);
|
||||
#else
|
||||
static inline int dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags, get_block_t gb,
|
||||
dax_iodone_t di)
|
||||
pmd_t *pmd, unsigned int flags, get_block_t gb)
|
||||
{
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
#define __dax_pmd_fault dax_pmd_fault
|
||||
#endif
|
||||
int dax_pfn_mkwrite(struct vm_area_struct *, struct vm_fault *);
|
||||
#define dax_mkwrite(vma, vmf, gb, iod) dax_fault(vma, vmf, gb, iod)
|
||||
#define __dax_mkwrite(vma, vmf, gb, iod) __dax_fault(vma, vmf, gb, iod)
|
||||
#define dax_mkwrite(vma, vmf, gb) dax_fault(vma, vmf, gb)
|
||||
#define __dax_mkwrite(vma, vmf, gb) __dax_fault(vma, vmf, gb)
|
||||
|
||||
static inline bool vma_is_dax(struct vm_area_struct *vma)
|
||||
{
|
||||
|
|
|
@ -74,7 +74,6 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
|
|||
struct buffer_head *bh_result, int create);
|
||||
typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
ssize_t bytes, void *private);
|
||||
typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
|
||||
|
||||
#define MAY_EXEC 0x00000001
|
||||
#define MAY_WRITE 0x00000002
|
||||
|
|
Loading…
Reference in a new issue