From 10f41d0710fc81b7af93fa6106678d57b1ff24a7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:29 +0100 Subject: [PATCH 1/5] tls: break out of main loop when PEEK gets a non-data record PEEK needs to leave decrypted records on the rx_list so that we can receive them later on, so it jumps back into the async code that queues the skb. Unfortunately that makes us skip the TLS_RECORD_TYPE_DATA check at the bottom of the main loop, so if two records of the same (non-DATA) type are queued, we end up merging them. Add the same record type check, and make it unlikely to not penalize the async fastpath. Async decrypt only applies to data record, so this check is only needed for PEEK. process_rx_list also has similar issues. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/3df2eef4fdae720c55e69472b5bea668772b45a2.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9fbc70200cd0..78aedfc682ba 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2064,6 +2064,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } From fdfbaec5923d9359698cbb286bc0deadbb717504 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:30 +0100 Subject: [PATCH 2/5] tls: stop recv() if initial process_rx_list gave us non-DATA If we have a non-DATA record on the rx_list and another record of the same type still on the queue, we will end up merging them: - process_rx_list copies the non-DATA record - we start the loop and process the first available record since it's of the same type - we break out of the loop since the record was not DATA Just check the record type and jump to the end in case process_rx_list did some work. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/bd31449e43bd4b6ff546f5c51cf958c31c511deb.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 78aedfc682ba..43dd0d82b6ed 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1971,7 +1971,7 @@ int tls_sw_recvmsg(struct sock *sk, goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); From ec823bf3a479d42c589dc0f28ef4951c49cd2d2a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:31 +0100 Subject: [PATCH 3/5] tls: don't skip over different type records from the rx_list If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA and do a recv(PEEK), the rx_list will contain the first two records. The next large recv will walk through the rx_list and copy data from record 1, then stop because record 2 is a different type. Since we haven't filled up our buffer, we will process the next available record. It's also DATA, so we can merge it with the current read. We shouldn't do that, since there was a record in between that we ignored. Add a flag to let process_rx_list inform tls_sw_recvmsg that it had more data available. Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records") Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/f00c0c0afa080c60f016df1471158c1caf983c34.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- net/tls/tls_sw.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 43dd0d82b6ed..de96959336c4 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1772,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1785,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1803,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1844,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1947,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1966,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA)) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2130,10 +2136,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); + async_copy_bytes, is_peek, NULL); } copied += decrypted; From 7b2a4c2a623a9f9c5fd9cff499bb9457fa2192ec Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:32 +0100 Subject: [PATCH 4/5] selftests: tls: add test for merging of same-type control messages Two consecutive control messages of the same type should never be merged into one large received blob of data. Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/018f1633d5471684c65def5fe390de3b15c3d683.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 49c84602707f..2714c230a0f9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1485,6 +1485,32 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; From 2bf6172632e1d4a6ec7ee7e734d53a43a145f5c6 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 15 Feb 2024 17:17:33 +0100 Subject: [PATCH 5/5] selftests: tls: add test for peeking past a record of a different type If we queue 3 records: - record 1, type DATA - record 2, some other type - record 3, type DATA the current code can look past the 2nd record and merge the 2 data records. Signed-off-by: Sabrina Dubroca Link: https://lore.kernel.org/r/4623550f8617c239581030c13402d3262f2bd14f.1708007371.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2714c230a0f9..b95c249f81c2 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1511,6 +1511,25 @@ TEST_F(tls, control_msg_nomerge) EXPECT_EQ(memcmp(buf, rec2, send_len), 0); } +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read";