1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-23 08:35:19 -05:00

net/smc: Unbind r/w buffer size from clcsock and make them tunable

Currently, SMC uses smc->sk.sk_{rcv|snd}buf to create buffers for
send buffer and RMB. And the values of buffer size are from tcp_{w|r}mem
in clcsock.

The buffer size from TCP socket doesn't fit SMC well. Generally, buffers
are usually larger than TCP for SMC-R/-D to get higher performance, for
they are different underlay devices and paths.

So this patch unbinds buffer size from TCP, and introduces two sysctl
knobs to tune them independently. Also, these knobs are per net
namespace and work for containers.

Signed-off-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Tony Lu 2022-09-20 17:52:22 +08:00 committed by Paolo Abeni
parent 77eee32514
commit 0227f058aa
5 changed files with 47 additions and 7 deletions

View file

@ -41,3 +41,21 @@ smcr_testlink_time - INTEGER
disabling TEST_LINK. disabling TEST_LINK.
Default: 30 seconds. Default: 30 seconds.
wmem - INTEGER
Initial size of send buffer used by SMC sockets.
The default value inherits from net.ipv4.tcp_wmem[1].
The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.
Default: 16K
rmem - INTEGER
Initial size of receive buffer (RMB) used by SMC sockets.
The default value inherits from net.ipv4.tcp_rmem[1].
The minimum value is 16KiB and there is no hard limit for max value, but
only allowed 512KiB for SMC-R and 1MiB for SMC-D.
Default: 128K

View file

@ -20,5 +20,7 @@ struct netns_smc {
unsigned int sysctl_autocorking_size; unsigned int sysctl_autocorking_size;
unsigned int sysctl_smcr_buf_type; unsigned int sysctl_smcr_buf_type;
int sysctl_smcr_testlink_time; int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
}; };
#endif #endif

View file

@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT; sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct; sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol; sk->sk_protocol = protocol;
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk); smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work); INIT_WORK(&smc->connect_work, smc_connect_work);
@ -3253,9 +3255,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
smc->clcsock = clcsock; smc->clcsock = clcsock;
} }
smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
out: out:
return rc; return rc;
} }

View file

@ -2307,10 +2307,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */ /* use socket recv buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_rcvbuf / 2; sk_buf_size = smc->sk.sk_rcvbuf;
else else
/* use socket send buffer size (w/o overhead) as start value */ /* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2; sk_buf_size = smc->sk.sk_sndbuf;
for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) { bufsize_short >= 0; bufsize_short--) {
@ -2369,7 +2369,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) { if (is_rmb) {
conn->rmb_desc = buf_desc; conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short; conn->rmbe_size_short = bufsize_short;
smc->sk.sk_rcvbuf = bufsize * 2; smc->sk.sk_rcvbuf = bufsize;
atomic_set(&conn->bytes_to_rcv, 0); atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit = conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len); smc_rmb_wnd_update_limit(buf_desc->len);
@ -2377,7 +2377,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else { } else {
conn->sndbuf_desc = buf_desc; conn->sndbuf_desc = buf_desc;
smc->sk.sk_sndbuf = bufsize * 2; smc->sk.sk_sndbuf = bufsize;
atomic_set(&conn->sndbuf_space, bufsize); atomic_set(&conn->sndbuf_space, bufsize);
} }
return 0; return 0;

View file

@ -19,6 +19,9 @@
#include "smc_llc.h" #include "smc_llc.h"
#include "smc_sysctl.h" #include "smc_sysctl.h"
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
static struct ctl_table smc_table[] = { static struct ctl_table smc_table[] = {
{ {
.procname = "autocorking_size", .procname = "autocorking_size",
@ -43,6 +46,22 @@ static struct ctl_table smc_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
{
.procname = "wmem",
.data = &init_net.smc.sysctl_wmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
},
{
.procname = "rmem",
.data = &init_net.smc.sysctl_rmem,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
},
{ } { }
}; };
@ -69,6 +88,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
return 0; return 0;