mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-22 16:06:04 -05:00
bpf: fix filed access without lock
The tcp_bpf_recvmsg_parser() function, running in user context, retrieves seq_copied from tcp_sk without holding the socket lock, and stores it in a local variable seq. However, the softirq context can modify tcp_sk->seq_copied concurrently, for example, n tcp_read_sock(). As a result, the seq value is stale when it is assigned back to tcp_sk->copied_seq at the end of tcp_bpf_recvmsg_parser(), leading to incorrect behavior. Due to concurrency, the copied_seq field in tcp_bpf_recvmsg_parser() might be set to an incorrect value (less than the actual copied_seq) at the end of function: 'WRITE_ONCE(tcp->copied_seq, seq)'. This causes the 'offset' to be negative in tcp_read_sock()->tcp_recv_skb() when processing new incoming packets (sk->copied_seq - skb->seq becomes less than 0), and all subsequent packets will be dropped. Signed-off-by: Jiayuan Chen <mrpre@163.com> Link: https://lore.kernel.org/r/20241028065226.35568-1-mrpre@163.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
This commit is contained in:
parent
740be3b9a6
commit
a32aee8f0d
1 changed files with 4 additions and 3 deletions
|
@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
|
|||
int flags,
|
||||
int *addr_len)
|
||||
{
|
||||
struct tcp_sock *tcp = tcp_sk(sk);
|
||||
int peek = flags & MSG_PEEK;
|
||||
u32 seq = tcp->copied_seq;
|
||||
struct sk_psock *psock;
|
||||
struct tcp_sock *tcp;
|
||||
int copied = 0;
|
||||
u32 seq;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
return inet_recv_error(sk, msg, len, addr_len);
|
||||
|
@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
|
|||
return tcp_recvmsg(sk, msg, len, flags, addr_len);
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
tcp = tcp_sk(sk);
|
||||
seq = tcp->copied_seq;
|
||||
/* We may have received data on the sk_receive_queue pre-accept and
|
||||
* then we can not use read_skb in this context because we haven't
|
||||
* assigned a sk_socket yet so have no link to the ops. The work-around
|
||||
|
|
Loading…
Reference in a new issue