1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-22 07:53:11 -05:00
linux/lib/kunit_iov_iter.c

1037 lines
25 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* I/O iterator tests. This can only test kernel-backed iterator types.
*
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/bvec.h>
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
#include <linux/folio_queue.h>
#include <kunit/test.h>
MODULE_DESCRIPTION("iov_iter testing");
MODULE_AUTHOR("David Howells <dhowells@redhat.com>");
MODULE_LICENSE("GPL");
struct kvec_test_range {
int from, to;
};
static const struct kvec_test_range kvec_test_ranges[] = {
{ 0x00002, 0x00002 },
{ 0x00027, 0x03000 },
{ 0x05193, 0x18794 },
{ 0x20000, 0x20000 },
{ 0x20000, 0x24000 },
{ 0x24000, 0x27001 },
{ 0x29000, 0xffffb },
{ 0xffffd, 0xffffe },
{ -1 }
};
static inline u8 pattern(unsigned long x)
{
return x & 0xff;
}
static void iov_kunit_unmap(void *data)
{
vunmap(data);
}
static void *__init iov_kunit_create_buffer(struct kunit *test,
struct page ***ppages,
size_t npages)
{
struct page **pages;
unsigned long got;
void *buffer;
pages = kunit_kcalloc(test, npages, sizeof(struct page *), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, pages);
*ppages = pages;
got = alloc_pages_bulk_array(GFP_KERNEL, npages, pages);
if (got != npages) {
release_pages(pages, got);
KUNIT_ASSERT_EQ(test, got, npages);
}
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
for (int i = 0; i < npages; i++)
pages[i]->index = i;
buffer = vmap(pages, npages, VM_MAP | VM_MAP_PUT_PAGES, PAGE_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buffer);
kunit_add_action_or_reset(test, iov_kunit_unmap, buffer);
return buffer;
}
static void __init iov_kunit_load_kvec(struct kunit *test,
struct iov_iter *iter, int dir,
struct kvec *kvec, unsigned int kvmax,
void *buffer, size_t bufsize,
const struct kvec_test_range *pr)
{
size_t size = 0;
int i;
for (i = 0; i < kvmax; i++, pr++) {
if (pr->from < 0)
break;
KUNIT_ASSERT_GE(test, pr->to, pr->from);
KUNIT_ASSERT_LE(test, pr->to, bufsize);
kvec[i].iov_base = buffer + pr->from;
kvec[i].iov_len = pr->to - pr->from;
size += pr->to - pr->from;
}
KUNIT_ASSERT_LE(test, size, bufsize);
iov_iter_kvec(iter, dir, kvec, i, size);
}
/*
* Test copying to a ITER_KVEC-type iterator.
*/
static void __init iov_kunit_copy_to_kvec(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct page **spages, **bpages;
struct kvec kvec[8];
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, patt;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
scratch = iov_kunit_create_buffer(test, &spages, npages);
for (i = 0; i < bufsize; i++)
scratch[i] = pattern(i);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
memset(buffer, 0, bufsize);
iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
buffer, bufsize, kvec_test_ranges);
size = iter.count;
copied = copy_to_iter(scratch, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
/* Build the expected image in the scratch buffer. */
patt = 0;
memset(scratch, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++)
for (i = pr->from; i < pr->to; i++)
scratch[i] = pattern(patt++);
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
if (buffer[i] != scratch[i])
return;
}
KUNIT_SUCCEED(test);
}
/*
* Test copying from a ITER_KVEC-type iterator.
*/
static void __init iov_kunit_copy_from_kvec(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct page **spages, **bpages;
struct kvec kvec[8];
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, j;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
buffer = iov_kunit_create_buffer(test, &bpages, npages);
for (i = 0; i < bufsize; i++)
buffer[i] = pattern(i);
scratch = iov_kunit_create_buffer(test, &spages, npages);
memset(scratch, 0, bufsize);
iov_kunit_load_kvec(test, &iter, WRITE, kvec, ARRAY_SIZE(kvec),
buffer, bufsize, kvec_test_ranges);
size = min(iter.count, bufsize);
copied = copy_from_iter(scratch, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
/* Build the expected image in the main buffer. */
i = 0;
memset(buffer, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
for (j = pr->from; j < pr->to; j++) {
buffer[i++] = pattern(j);
if (i >= bufsize)
goto stop;
}
}
stop:
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
if (scratch[i] != buffer[i])
return;
}
KUNIT_SUCCEED(test);
}
struct bvec_test_range {
int page, from, to;
};
static const struct bvec_test_range bvec_test_ranges[] = {
{ 0, 0x0002, 0x0002 },
{ 1, 0x0027, 0x0893 },
{ 2, 0x0193, 0x0794 },
{ 3, 0x0000, 0x1000 },
{ 4, 0x0000, 0x1000 },
{ 5, 0x0000, 0x1000 },
{ 6, 0x0000, 0x0ffb },
{ 6, 0x0ffd, 0x0ffe },
{ -1, -1, -1 }
};
static void __init iov_kunit_load_bvec(struct kunit *test,
struct iov_iter *iter, int dir,
struct bio_vec *bvec, unsigned int bvmax,
struct page **pages, size_t npages,
size_t bufsize,
const struct bvec_test_range *pr)
{
struct page *can_merge = NULL, *page;
size_t size = 0;
int i;
for (i = 0; i < bvmax; i++, pr++) {
if (pr->from < 0)
break;
KUNIT_ASSERT_LT(test, pr->page, npages);
KUNIT_ASSERT_LT(test, pr->page * PAGE_SIZE, bufsize);
KUNIT_ASSERT_GE(test, pr->from, 0);
KUNIT_ASSERT_GE(test, pr->to, pr->from);
KUNIT_ASSERT_LE(test, pr->to, PAGE_SIZE);
page = pages[pr->page];
if (pr->from == 0 && pr->from != pr->to && page == can_merge) {
i--;
bvec[i].bv_len += pr->to;
} else {
bvec_set_page(&bvec[i], page, pr->to - pr->from, pr->from);
}
size += pr->to - pr->from;
if ((pr->to & ~PAGE_MASK) == 0)
can_merge = page + pr->to / PAGE_SIZE;
else
can_merge = NULL;
}
iov_iter_bvec(iter, dir, bvec, i, size);
}
/*
* Test copying to a ITER_BVEC-type iterator.
*/
static void __init iov_kunit_copy_to_bvec(struct kunit *test)
{
const struct bvec_test_range *pr;
struct iov_iter iter;
struct bio_vec bvec[8];
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, b, patt;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
scratch = iov_kunit_create_buffer(test, &spages, npages);
for (i = 0; i < bufsize; i++)
scratch[i] = pattern(i);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
memset(buffer, 0, bufsize);
iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
bpages, npages, bufsize, bvec_test_ranges);
size = iter.count;
copied = copy_to_iter(scratch, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
/* Build the expected image in the scratch buffer. */
b = 0;
patt = 0;
memset(scratch, 0, bufsize);
for (pr = bvec_test_ranges; pr->from >= 0; pr++, b++) {
u8 *p = scratch + pr->page * PAGE_SIZE;
for (i = pr->from; i < pr->to; i++)
p[i] = pattern(patt++);
}
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
if (buffer[i] != scratch[i])
return;
}
KUNIT_SUCCEED(test);
}
/*
* Test copying from a ITER_BVEC-type iterator.
*/
static void __init iov_kunit_copy_from_bvec(struct kunit *test)
{
const struct bvec_test_range *pr;
struct iov_iter iter;
struct bio_vec bvec[8];
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, j;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
buffer = iov_kunit_create_buffer(test, &bpages, npages);
for (i = 0; i < bufsize; i++)
buffer[i] = pattern(i);
scratch = iov_kunit_create_buffer(test, &spages, npages);
memset(scratch, 0, bufsize);
iov_kunit_load_bvec(test, &iter, WRITE, bvec, ARRAY_SIZE(bvec),
bpages, npages, bufsize, bvec_test_ranges);
size = iter.count;
copied = copy_from_iter(scratch, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.nr_segs, 0);
/* Build the expected image in the main buffer. */
i = 0;
memset(buffer, 0, bufsize);
for (pr = bvec_test_ranges; pr->from >= 0; pr++) {
size_t patt = pr->page * PAGE_SIZE;
for (j = pr->from; j < pr->to; j++) {
buffer[i++] = pattern(patt + j);
if (i >= bufsize)
goto stop;
}
}
stop:
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
if (scratch[i] != buffer[i])
return;
}
KUNIT_SUCCEED(test);
}
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
static void iov_kunit_destroy_folioq(void *data)
{
struct folio_queue *folioq, *next;
for (folioq = data; folioq; folioq = next) {
next = folioq->next;
for (int i = 0; i < folioq_nr_slots(folioq); i++)
if (folioq_folio(folioq, i))
folio_put(folioq_folio(folioq, i));
kfree(folioq);
}
}
static void __init iov_kunit_load_folioq(struct kunit *test,
struct iov_iter *iter, int dir,
struct folio_queue *folioq,
struct page **pages, size_t npages)
{
struct folio_queue *p = folioq;
size_t size = 0;
int i;
for (i = 0; i < npages; i++) {
if (folioq_full(p)) {
p->next = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p->next);
folioq_init(p->next, 0);
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
p->next->prev = p;
p = p->next;
}
folioq_append(p, page_folio(pages[i]));
size += PAGE_SIZE;
}
iov_iter_folio_queue(iter, dir, folioq, 0, 0, size);
}
static struct folio_queue *iov_kunit_create_folioq(struct kunit *test)
{
struct folio_queue *folioq;
folioq = kzalloc(sizeof(struct folio_queue), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, folioq);
kunit_add_action_or_reset(test, iov_kunit_destroy_folioq, folioq);
folioq_init(folioq, 0);
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
return folioq;
}
/*
* Test copying to a ITER_FOLIOQ-type iterator.
*/
static void __init iov_kunit_copy_to_folioq(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct folio_queue *folioq;
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, patt;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
folioq = iov_kunit_create_folioq(test);
scratch = iov_kunit_create_buffer(test, &spages, npages);
for (i = 0; i < bufsize; i++)
scratch[i] = pattern(i);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
memset(buffer, 0, bufsize);
iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
i = 0;
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
size = pr->to - pr->from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_folio_queue(&iter, READ, folioq, 0, 0, pr->to);
iov_iter_advance(&iter, pr->from);
copied = copy_to_iter(scratch + i, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE);
i += size;
if (test->status == KUNIT_FAILURE)
goto stop;
}
/* Build the expected image in the scratch buffer. */
patt = 0;
memset(scratch, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++)
for (i = pr->from; i < pr->to; i++)
scratch[i] = pattern(patt++);
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
if (buffer[i] != scratch[i])
return;
}
stop:
KUNIT_SUCCEED(test);
}
/*
* Test copying from a ITER_FOLIOQ-type iterator.
*/
static void __init iov_kunit_copy_from_folioq(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct folio_queue *folioq;
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, j;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
folioq = iov_kunit_create_folioq(test);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
for (i = 0; i < bufsize; i++)
buffer[i] = pattern(i);
scratch = iov_kunit_create_buffer(test, &spages, npages);
memset(scratch, 0, bufsize);
iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
i = 0;
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
size = pr->to - pr->from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to);
iov_iter_advance(&iter, pr->from);
copied = copy_from_iter(scratch + i, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to % PAGE_SIZE);
i += size;
}
/* Build the expected image in the main buffer. */
i = 0;
memset(buffer, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
for (j = pr->from; j < pr->to; j++) {
buffer[i++] = pattern(j);
if (i >= bufsize)
goto stop;
}
}
stop:
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
if (scratch[i] != buffer[i])
return;
}
KUNIT_SUCCEED(test);
}
static void iov_kunit_destroy_xarray(void *data)
{
struct xarray *xarray = data;
xa_destroy(xarray);
kfree(xarray);
}
static void __init iov_kunit_load_xarray(struct kunit *test,
struct iov_iter *iter, int dir,
struct xarray *xarray,
struct page **pages, size_t npages)
{
size_t size = 0;
int i;
for (i = 0; i < npages; i++) {
void *x = xa_store(xarray, i, pages[i], GFP_KERNEL);
KUNIT_ASSERT_FALSE(test, xa_is_err(x));
size += PAGE_SIZE;
}
iov_iter_xarray(iter, dir, xarray, 0, size);
}
static struct xarray *iov_kunit_create_xarray(struct kunit *test)
{
struct xarray *xarray;
xarray = kzalloc(sizeof(struct xarray), GFP_KERNEL);
xa_init(xarray);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xarray);
kunit_add_action_or_reset(test, iov_kunit_destroy_xarray, xarray);
return xarray;
}
/*
* Test copying to a ITER_XARRAY-type iterator.
*/
static void __init iov_kunit_copy_to_xarray(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct xarray *xarray;
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, patt;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
xarray = iov_kunit_create_xarray(test);
scratch = iov_kunit_create_buffer(test, &spages, npages);
for (i = 0; i < bufsize; i++)
scratch[i] = pattern(i);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
memset(buffer, 0, bufsize);
iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
i = 0;
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
size = pr->to - pr->from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_xarray(&iter, READ, xarray, pr->from, size);
copied = copy_to_iter(scratch + i, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
i += size;
}
/* Build the expected image in the scratch buffer. */
patt = 0;
memset(scratch, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++)
for (i = pr->from; i < pr->to; i++)
scratch[i] = pattern(patt++);
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, buffer[i], scratch[i], "at i=%x", i);
if (buffer[i] != scratch[i])
return;
}
KUNIT_SUCCEED(test);
}
/*
* Test copying from a ITER_XARRAY-type iterator.
*/
static void __init iov_kunit_copy_from_xarray(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct xarray *xarray;
struct page **spages, **bpages;
u8 *scratch, *buffer;
size_t bufsize, npages, size, copied;
int i, j;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
xarray = iov_kunit_create_xarray(test);
buffer = iov_kunit_create_buffer(test, &bpages, npages);
for (i = 0; i < bufsize; i++)
buffer[i] = pattern(i);
scratch = iov_kunit_create_buffer(test, &spages, npages);
memset(scratch, 0, bufsize);
iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
i = 0;
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
size = pr->to - pr->from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_xarray(&iter, WRITE, xarray, pr->from, size);
copied = copy_from_iter(scratch + i, size, &iter);
KUNIT_EXPECT_EQ(test, copied, size);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.iov_offset, size);
i += size;
}
/* Build the expected image in the main buffer. */
i = 0;
memset(buffer, 0, bufsize);
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
for (j = pr->from; j < pr->to; j++) {
buffer[i++] = pattern(j);
if (i >= bufsize)
goto stop;
}
}
stop:
/* Compare the images */
for (i = 0; i < bufsize; i++) {
KUNIT_EXPECT_EQ_MSG(test, scratch[i], buffer[i], "at i=%x", i);
if (scratch[i] != buffer[i])
return;
}
KUNIT_SUCCEED(test);
}
/*
* Test the extraction of ITER_KVEC-type iterators.
*/
static void __init iov_kunit_extract_pages_kvec(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct page **bpages, *pagelist[8], **pages = pagelist;
struct kvec kvec[8];
u8 *buffer;
ssize_t len;
size_t bufsize, size = 0, npages;
int i, from;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
buffer = iov_kunit_create_buffer(test, &bpages, npages);
iov_kunit_load_kvec(test, &iter, READ, kvec, ARRAY_SIZE(kvec),
buffer, bufsize, kvec_test_ranges);
size = iter.count;
pr = kvec_test_ranges;
from = pr->from;
do {
size_t offset0 = LONG_MAX;
for (i = 0; i < ARRAY_SIZE(pagelist); i++)
pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
ARRAY_SIZE(pagelist), 0, &offset0);
KUNIT_EXPECT_GE(test, len, 0);
if (len < 0)
break;
KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
KUNIT_EXPECT_LE(test, len, size);
KUNIT_EXPECT_EQ(test, iter.count, size - len);
size -= len;
if (len == 0)
break;
for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
struct page *p;
ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
int ix;
KUNIT_ASSERT_GE(test, part, 0);
while (from == pr->to) {
pr++;
from = pr->from;
if (from < 0)
goto stop;
}
ix = from / PAGE_SIZE;
KUNIT_ASSERT_LT(test, ix, npages);
p = bpages[ix];
KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
from += part;
len -= part;
KUNIT_ASSERT_GE(test, len, 0);
if (len == 0)
break;
offset0 = 0;
}
if (test->status == KUNIT_FAILURE)
break;
} while (iov_iter_count(&iter) > 0);
stop:
KUNIT_EXPECT_EQ(test, size, 0);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_SUCCEED(test);
}
/*
* Test the extraction of ITER_BVEC-type iterators.
*/
static void __init iov_kunit_extract_pages_bvec(struct kunit *test)
{
const struct bvec_test_range *pr;
struct iov_iter iter;
struct page **bpages, *pagelist[8], **pages = pagelist;
struct bio_vec bvec[8];
ssize_t len;
size_t bufsize, size = 0, npages;
int i, from;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
iov_kunit_create_buffer(test, &bpages, npages);
iov_kunit_load_bvec(test, &iter, READ, bvec, ARRAY_SIZE(bvec),
bpages, npages, bufsize, bvec_test_ranges);
size = iter.count;
pr = bvec_test_ranges;
from = pr->from;
do {
size_t offset0 = LONG_MAX;
for (i = 0; i < ARRAY_SIZE(pagelist); i++)
pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
ARRAY_SIZE(pagelist), 0, &offset0);
KUNIT_EXPECT_GE(test, len, 0);
if (len < 0)
break;
KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
KUNIT_EXPECT_LE(test, len, size);
KUNIT_EXPECT_EQ(test, iter.count, size - len);
size -= len;
if (len == 0)
break;
for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
struct page *p;
ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
int ix;
KUNIT_ASSERT_GE(test, part, 0);
while (from == pr->to) {
pr++;
from = pr->from;
if (from < 0)
goto stop;
}
ix = pr->page + from / PAGE_SIZE;
KUNIT_ASSERT_LT(test, ix, npages);
p = bpages[ix];
KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
from += part;
len -= part;
KUNIT_ASSERT_GE(test, len, 0);
if (len == 0)
break;
offset0 = 0;
}
if (test->status == KUNIT_FAILURE)
break;
} while (iov_iter_count(&iter) > 0);
stop:
KUNIT_EXPECT_EQ(test, size, 0);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_SUCCEED(test);
}
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
/*
* Test the extraction of ITER_FOLIOQ-type iterators.
*/
static void __init iov_kunit_extract_pages_folioq(struct kunit *test)
{
const struct kvec_test_range *pr;
struct folio_queue *folioq;
struct iov_iter iter;
struct page **bpages, *pagelist[8], **pages = pagelist;
ssize_t len;
size_t bufsize, size = 0, npages;
int i, from;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
folioq = iov_kunit_create_folioq(test);
iov_kunit_create_buffer(test, &bpages, npages);
iov_kunit_load_folioq(test, &iter, READ, folioq, bpages, npages);
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
from = pr->from;
size = pr->to - from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_folio_queue(&iter, WRITE, folioq, 0, 0, pr->to);
iov_iter_advance(&iter, from);
do {
size_t offset0 = LONG_MAX;
for (i = 0; i < ARRAY_SIZE(pagelist); i++)
pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
ARRAY_SIZE(pagelist), 0, &offset0);
KUNIT_EXPECT_GE(test, len, 0);
if (len < 0)
break;
KUNIT_EXPECT_LE(test, len, size);
KUNIT_EXPECT_EQ(test, iter.count, size - len);
if (len == 0)
break;
size -= len;
KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
struct page *p;
ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
int ix;
KUNIT_ASSERT_GE(test, part, 0);
ix = from / PAGE_SIZE;
KUNIT_ASSERT_LT(test, ix, npages);
p = bpages[ix];
KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
from += part;
len -= part;
KUNIT_ASSERT_GE(test, len, 0);
if (len == 0)
break;
offset0 = 0;
}
if (test->status == KUNIT_FAILURE)
goto stop;
} while (iov_iter_count(&iter) > 0);
KUNIT_EXPECT_EQ(test, size, 0);
KUNIT_EXPECT_EQ(test, iter.count, 0);
}
stop:
KUNIT_SUCCEED(test);
}
/*
* Test the extraction of ITER_XARRAY-type iterators.
*/
static void __init iov_kunit_extract_pages_xarray(struct kunit *test)
{
const struct kvec_test_range *pr;
struct iov_iter iter;
struct xarray *xarray;
struct page **bpages, *pagelist[8], **pages = pagelist;
ssize_t len;
size_t bufsize, size = 0, npages;
int i, from;
bufsize = 0x100000;
npages = bufsize / PAGE_SIZE;
xarray = iov_kunit_create_xarray(test);
iov_kunit_create_buffer(test, &bpages, npages);
iov_kunit_load_xarray(test, &iter, READ, xarray, bpages, npages);
for (pr = kvec_test_ranges; pr->from >= 0; pr++) {
from = pr->from;
size = pr->to - from;
KUNIT_ASSERT_LE(test, pr->to, bufsize);
iov_iter_xarray(&iter, WRITE, xarray, from, size);
do {
size_t offset0 = LONG_MAX;
for (i = 0; i < ARRAY_SIZE(pagelist); i++)
pagelist[i] = (void *)(unsigned long)0xaa55aa55aa55aa55ULL;
len = iov_iter_extract_pages(&iter, &pages, 100 * 1024,
ARRAY_SIZE(pagelist), 0, &offset0);
KUNIT_EXPECT_GE(test, len, 0);
if (len < 0)
break;
KUNIT_EXPECT_LE(test, len, size);
KUNIT_EXPECT_EQ(test, iter.count, size - len);
if (len == 0)
break;
size -= len;
KUNIT_EXPECT_GE(test, (ssize_t)offset0, 0);
KUNIT_EXPECT_LT(test, offset0, PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(pagelist); i++) {
struct page *p;
ssize_t part = min_t(ssize_t, len, PAGE_SIZE - offset0);
int ix;
KUNIT_ASSERT_GE(test, part, 0);
ix = from / PAGE_SIZE;
KUNIT_ASSERT_LT(test, ix, npages);
p = bpages[ix];
KUNIT_EXPECT_PTR_EQ(test, pagelist[i], p);
KUNIT_EXPECT_EQ(test, offset0, from % PAGE_SIZE);
from += part;
len -= part;
KUNIT_ASSERT_GE(test, len, 0);
if (len == 0)
break;
offset0 = 0;
}
if (test->status == KUNIT_FAILURE)
goto stop;
} while (iov_iter_count(&iter) > 0);
KUNIT_EXPECT_EQ(test, size, 0);
KUNIT_EXPECT_EQ(test, iter.count, 0);
KUNIT_EXPECT_EQ(test, iter.iov_offset, pr->to - pr->from);
}
stop:
KUNIT_SUCCEED(test);
}
static struct kunit_case __refdata iov_kunit_cases[] = {
KUNIT_CASE(iov_kunit_copy_to_kvec),
KUNIT_CASE(iov_kunit_copy_from_kvec),
KUNIT_CASE(iov_kunit_copy_to_bvec),
KUNIT_CASE(iov_kunit_copy_from_bvec),
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
KUNIT_CASE(iov_kunit_copy_to_folioq),
KUNIT_CASE(iov_kunit_copy_from_folioq),
KUNIT_CASE(iov_kunit_copy_to_xarray),
KUNIT_CASE(iov_kunit_copy_from_xarray),
KUNIT_CASE(iov_kunit_extract_pages_kvec),
KUNIT_CASE(iov_kunit_extract_pages_bvec),
mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios Define a data structure, struct folio_queue, to represent a sequence of folios and a kernel-internal I/O iterator type, ITER_FOLIOQ, to allow a list of folio_queue structures to be used to provide a buffer to iov_iter-taking functions, such as sendmsg and recvmsg. The folio_queue structure looks like: struct folio_queue { struct folio_batch vec; u8 orders[PAGEVEC_SIZE]; struct folio_queue *next; struct folio_queue *prev; unsigned long marks; unsigned long marks2; }; It does not use a list_head so that next and/or prev can be set to NULL at the ends of the list, allowing iov_iter-handling routines to determine that they *are* the ends without needing to store a head pointer in the iov_iter struct. A folio_batch struct is used to hold the folio pointers which allows the batch to be passed to batch handling functions. Two mark bits are available per slot. The intention is to use at least one of them to mark folios that need putting, but that might not be ultimately necessary. Accessor functions are used to access the slots to do the masking and an additional accessor function is used to indicate the size of the array. The order of each folio is also stored in the structure to avoid the need for iov_iter_advance() and iov_iter_revert() to have to query each folio to find its size. With careful barriering, this can be used as an extending buffer with new folios inserted and new folio_queue structs added without the need for a lock. Further, provided we always keep at least one struct in the buffer, we can also remove consumed folios and consumed structs from the head end as we without the need for locks. [Questions/thoughts] (1) To manage this, I need a head pointer, a tail pointer, a tail slot number (assuming insertion happens at the tail end and the next pointers point from head to tail). Should I put these into a struct of their own, say "folio_queue_head" or "rolling_buffer"? I will end up with two of these in netfs_io_request eventually, one keeping track of the pagecache I'm dealing with for buffered I/O and the other to hold a bounce buffer when we need one. (2) Should I make the slots {folio,off,len} or bio_vec? (3) This is intended to replace ITER_XARRAY eventually. Using an xarray in I/O iteration requires the taking of the RCU read lock, doing copying under the RCU read lock, walking the xarray (which may change under us), handling retries and dealing with special values. The advantage of ITER_XARRAY is that when we're dealing with the pagecache directly, we don't need any allocation - but if we're doing encrypted comms, there's a good chance we'd be using a bounce buffer anyway. This will require afs, erofs, cifs, orangefs and fscache to be converted to not use this. afs still uses it for dirs and symlinks; some of erofs usages should be easy to change, but there's one which won't be so easy; ceph's use via fscache can be fixed by porting ceph to netfslib; cifs is using xarray as a bounce buffer - that can be moved to use sheaves instead; and orangefs has a similar problem to erofs - maybe orangefs could use netfslib? Signed-off-by: David Howells <dhowells@redhat.com> cc: Matthew Wilcox <willy@infradead.org> cc: Jeff Layton <jlayton@kernel.org> cc: Steve French <sfrench@samba.org> cc: Ilya Dryomov <idryomov@gmail.com> cc: Gao Xiang <xiang@kernel.org> cc: Mike Marshall <hubcap@omnibond.com> cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: devel@lists.orangefs.org Link: https://lore.kernel.org/r/20240814203850.2240469-13-dhowells@redhat.com/ # v2 Signed-off-by: Christian Brauner <brauner@kernel.org>
2024-06-18 19:20:42 -04:00
KUNIT_CASE(iov_kunit_extract_pages_folioq),
KUNIT_CASE(iov_kunit_extract_pages_xarray),
{}
};
static struct kunit_suite iov_kunit_suite = {
.name = "iov_iter",
.test_cases = iov_kunit_cases,
};
kunit_test_suites(&iov_kunit_suite);