16df96146SKuniyuki Iwashima // SPDX-License-Identifier: GPL-2.0
26df96146SKuniyuki Iwashima /* Copyright Amazon.com Inc. or its affiliates. */
36df96146SKuniyuki Iwashima #define _GNU_SOURCE
46df96146SKuniyuki Iwashima #include <sched.h>
56df96146SKuniyuki Iwashima
6*ff149e92SKuniyuki Iwashima #include <fcntl.h>
7*ff149e92SKuniyuki Iwashima
86df96146SKuniyuki Iwashima #include <netinet/in.h>
96df96146SKuniyuki Iwashima #include <sys/socket.h>
106df96146SKuniyuki Iwashima #include <sys/sysinfo.h>
116df96146SKuniyuki Iwashima
126df96146SKuniyuki Iwashima #include "../kselftest_harness.h"
136df96146SKuniyuki Iwashima
FIXTURE(so_incoming_cpu)146df96146SKuniyuki Iwashima FIXTURE(so_incoming_cpu)
156df96146SKuniyuki Iwashima {
166df96146SKuniyuki Iwashima int *servers;
176df96146SKuniyuki Iwashima union {
186df96146SKuniyuki Iwashima struct sockaddr addr;
196df96146SKuniyuki Iwashima struct sockaddr_in in_addr;
206df96146SKuniyuki Iwashima };
216df96146SKuniyuki Iwashima socklen_t addrlen;
226df96146SKuniyuki Iwashima };
236df96146SKuniyuki Iwashima
246df96146SKuniyuki Iwashima enum when_to_set {
256df96146SKuniyuki Iwashima BEFORE_REUSEPORT,
266df96146SKuniyuki Iwashima BEFORE_LISTEN,
276df96146SKuniyuki Iwashima AFTER_LISTEN,
286df96146SKuniyuki Iwashima AFTER_ALL_LISTEN,
296df96146SKuniyuki Iwashima };
306df96146SKuniyuki Iwashima
FIXTURE_VARIANT(so_incoming_cpu)316df96146SKuniyuki Iwashima FIXTURE_VARIANT(so_incoming_cpu)
326df96146SKuniyuki Iwashima {
336df96146SKuniyuki Iwashima int when_to_set;
346df96146SKuniyuki Iwashima };
356df96146SKuniyuki Iwashima
FIXTURE_VARIANT_ADD(so_incoming_cpu,before_reuseport)366df96146SKuniyuki Iwashima FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport)
376df96146SKuniyuki Iwashima {
386df96146SKuniyuki Iwashima .when_to_set = BEFORE_REUSEPORT,
396df96146SKuniyuki Iwashima };
406df96146SKuniyuki Iwashima
FIXTURE_VARIANT_ADD(so_incoming_cpu,before_listen)416df96146SKuniyuki Iwashima FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen)
426df96146SKuniyuki Iwashima {
436df96146SKuniyuki Iwashima .when_to_set = BEFORE_LISTEN,
446df96146SKuniyuki Iwashima };
456df96146SKuniyuki Iwashima
FIXTURE_VARIANT_ADD(so_incoming_cpu,after_listen)466df96146SKuniyuki Iwashima FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen)
476df96146SKuniyuki Iwashima {
486df96146SKuniyuki Iwashima .when_to_set = AFTER_LISTEN,
496df96146SKuniyuki Iwashima };
506df96146SKuniyuki Iwashima
FIXTURE_VARIANT_ADD(so_incoming_cpu,after_all_listen)516df96146SKuniyuki Iwashima FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
526df96146SKuniyuki Iwashima {
536df96146SKuniyuki Iwashima .when_to_set = AFTER_ALL_LISTEN,
546df96146SKuniyuki Iwashima };
556df96146SKuniyuki Iwashima
write_sysctl(struct __test_metadata * _metadata,char * filename,char * string)56*ff149e92SKuniyuki Iwashima static void write_sysctl(struct __test_metadata *_metadata,
57*ff149e92SKuniyuki Iwashima char *filename, char *string)
58*ff149e92SKuniyuki Iwashima {
59*ff149e92SKuniyuki Iwashima int fd, len, ret;
60*ff149e92SKuniyuki Iwashima
61*ff149e92SKuniyuki Iwashima fd = open(filename, O_WRONLY);
62*ff149e92SKuniyuki Iwashima ASSERT_NE(fd, -1);
63*ff149e92SKuniyuki Iwashima
64*ff149e92SKuniyuki Iwashima len = strlen(string);
65*ff149e92SKuniyuki Iwashima ret = write(fd, string, len);
66*ff149e92SKuniyuki Iwashima ASSERT_EQ(ret, len);
67*ff149e92SKuniyuki Iwashima }
68*ff149e92SKuniyuki Iwashima
setup_netns(struct __test_metadata * _metadata)69*ff149e92SKuniyuki Iwashima static void setup_netns(struct __test_metadata *_metadata)
70*ff149e92SKuniyuki Iwashima {
71*ff149e92SKuniyuki Iwashima ASSERT_EQ(unshare(CLONE_NEWNET), 0);
72*ff149e92SKuniyuki Iwashima ASSERT_EQ(system("ip link set lo up"), 0);
73*ff149e92SKuniyuki Iwashima
74*ff149e92SKuniyuki Iwashima write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
75*ff149e92SKuniyuki Iwashima write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
76*ff149e92SKuniyuki Iwashima }
77*ff149e92SKuniyuki Iwashima
78*ff149e92SKuniyuki Iwashima #define NR_PORT (60001 - 10000 - 1)
79*ff149e92SKuniyuki Iwashima #define NR_CLIENT_PER_SERVER_DEFAULT 32
80*ff149e92SKuniyuki Iwashima static int nr_client_per_server, nr_server, nr_client;
81*ff149e92SKuniyuki Iwashima
FIXTURE_SETUP(so_incoming_cpu)826df96146SKuniyuki Iwashima FIXTURE_SETUP(so_incoming_cpu)
836df96146SKuniyuki Iwashima {
84*ff149e92SKuniyuki Iwashima setup_netns(_metadata);
856df96146SKuniyuki Iwashima
86*ff149e92SKuniyuki Iwashima nr_server = get_nprocs();
87*ff149e92SKuniyuki Iwashima ASSERT_LE(2, nr_server);
88*ff149e92SKuniyuki Iwashima
89*ff149e92SKuniyuki Iwashima if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
90*ff149e92SKuniyuki Iwashima nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
91*ff149e92SKuniyuki Iwashima else
92*ff149e92SKuniyuki Iwashima nr_client_per_server = NR_PORT / nr_server;
93*ff149e92SKuniyuki Iwashima
94*ff149e92SKuniyuki Iwashima nr_client = nr_client_per_server * nr_server;
95*ff149e92SKuniyuki Iwashima
96*ff149e92SKuniyuki Iwashima self->servers = malloc(sizeof(int) * nr_server);
976df96146SKuniyuki Iwashima ASSERT_NE(self->servers, NULL);
986df96146SKuniyuki Iwashima
996df96146SKuniyuki Iwashima self->in_addr.sin_family = AF_INET;
1006df96146SKuniyuki Iwashima self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1016df96146SKuniyuki Iwashima self->in_addr.sin_port = htons(0);
1026df96146SKuniyuki Iwashima self->addrlen = sizeof(struct sockaddr_in);
1036df96146SKuniyuki Iwashima }
1046df96146SKuniyuki Iwashima
FIXTURE_TEARDOWN(so_incoming_cpu)1056df96146SKuniyuki Iwashima FIXTURE_TEARDOWN(so_incoming_cpu)
1066df96146SKuniyuki Iwashima {
1076df96146SKuniyuki Iwashima int i;
1086df96146SKuniyuki Iwashima
109*ff149e92SKuniyuki Iwashima for (i = 0; i < nr_server; i++)
1106df96146SKuniyuki Iwashima close(self->servers[i]);
1116df96146SKuniyuki Iwashima
1126df96146SKuniyuki Iwashima free(self->servers);
1136df96146SKuniyuki Iwashima }
1146df96146SKuniyuki Iwashima
set_so_incoming_cpu(struct __test_metadata * _metadata,int fd,int cpu)1156df96146SKuniyuki Iwashima void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu)
1166df96146SKuniyuki Iwashima {
1176df96146SKuniyuki Iwashima int ret;
1186df96146SKuniyuki Iwashima
1196df96146SKuniyuki Iwashima ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int));
1206df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1216df96146SKuniyuki Iwashima }
1226df96146SKuniyuki Iwashima
create_server(struct __test_metadata * _metadata,FIXTURE_DATA (so_incoming_cpu)* self,const FIXTURE_VARIANT (so_incoming_cpu)* variant,int cpu)1236df96146SKuniyuki Iwashima int create_server(struct __test_metadata *_metadata,
1246df96146SKuniyuki Iwashima FIXTURE_DATA(so_incoming_cpu) *self,
1256df96146SKuniyuki Iwashima const FIXTURE_VARIANT(so_incoming_cpu) *variant,
1266df96146SKuniyuki Iwashima int cpu)
1276df96146SKuniyuki Iwashima {
1286df96146SKuniyuki Iwashima int fd, ret;
1296df96146SKuniyuki Iwashima
1306df96146SKuniyuki Iwashima fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
1316df96146SKuniyuki Iwashima ASSERT_NE(fd, -1);
1326df96146SKuniyuki Iwashima
1336df96146SKuniyuki Iwashima if (variant->when_to_set == BEFORE_REUSEPORT)
1346df96146SKuniyuki Iwashima set_so_incoming_cpu(_metadata, fd, cpu);
1356df96146SKuniyuki Iwashima
1366df96146SKuniyuki Iwashima ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int));
1376df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1386df96146SKuniyuki Iwashima
1396df96146SKuniyuki Iwashima ret = bind(fd, &self->addr, self->addrlen);
1406df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1416df96146SKuniyuki Iwashima
1426df96146SKuniyuki Iwashima if (variant->when_to_set == BEFORE_LISTEN)
1436df96146SKuniyuki Iwashima set_so_incoming_cpu(_metadata, fd, cpu);
1446df96146SKuniyuki Iwashima
145*ff149e92SKuniyuki Iwashima /* We don't use nr_client_per_server here not to block
1466df96146SKuniyuki Iwashima * this test at connect() if SO_INCOMING_CPU is broken.
1476df96146SKuniyuki Iwashima */
148*ff149e92SKuniyuki Iwashima ret = listen(fd, nr_client);
1496df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1506df96146SKuniyuki Iwashima
1516df96146SKuniyuki Iwashima if (variant->when_to_set == AFTER_LISTEN)
1526df96146SKuniyuki Iwashima set_so_incoming_cpu(_metadata, fd, cpu);
1536df96146SKuniyuki Iwashima
1546df96146SKuniyuki Iwashima return fd;
1556df96146SKuniyuki Iwashima }
1566df96146SKuniyuki Iwashima
create_servers(struct __test_metadata * _metadata,FIXTURE_DATA (so_incoming_cpu)* self,const FIXTURE_VARIANT (so_incoming_cpu)* variant)1576df96146SKuniyuki Iwashima void create_servers(struct __test_metadata *_metadata,
1586df96146SKuniyuki Iwashima FIXTURE_DATA(so_incoming_cpu) *self,
1596df96146SKuniyuki Iwashima const FIXTURE_VARIANT(so_incoming_cpu) *variant)
1606df96146SKuniyuki Iwashima {
1616df96146SKuniyuki Iwashima int i, ret;
1626df96146SKuniyuki Iwashima
163*ff149e92SKuniyuki Iwashima for (i = 0; i < nr_server; i++) {
1646df96146SKuniyuki Iwashima self->servers[i] = create_server(_metadata, self, variant, i);
1656df96146SKuniyuki Iwashima
1666df96146SKuniyuki Iwashima if (i == 0) {
1676df96146SKuniyuki Iwashima ret = getsockname(self->servers[i], &self->addr, &self->addrlen);
1686df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1696df96146SKuniyuki Iwashima }
1706df96146SKuniyuki Iwashima }
1716df96146SKuniyuki Iwashima
1726df96146SKuniyuki Iwashima if (variant->when_to_set == AFTER_ALL_LISTEN) {
173*ff149e92SKuniyuki Iwashima for (i = 0; i < nr_server; i++)
1746df96146SKuniyuki Iwashima set_so_incoming_cpu(_metadata, self->servers[i], i);
1756df96146SKuniyuki Iwashima }
1766df96146SKuniyuki Iwashima }
1776df96146SKuniyuki Iwashima
create_clients(struct __test_metadata * _metadata,FIXTURE_DATA (so_incoming_cpu)* self)1786df96146SKuniyuki Iwashima void create_clients(struct __test_metadata *_metadata,
1796df96146SKuniyuki Iwashima FIXTURE_DATA(so_incoming_cpu) *self)
1806df96146SKuniyuki Iwashima {
1816df96146SKuniyuki Iwashima cpu_set_t cpu_set;
1826df96146SKuniyuki Iwashima int i, j, fd, ret;
1836df96146SKuniyuki Iwashima
184*ff149e92SKuniyuki Iwashima for (i = 0; i < nr_server; i++) {
1856df96146SKuniyuki Iwashima CPU_ZERO(&cpu_set);
1866df96146SKuniyuki Iwashima
1876df96146SKuniyuki Iwashima CPU_SET(i, &cpu_set);
1886df96146SKuniyuki Iwashima ASSERT_EQ(CPU_COUNT(&cpu_set), 1);
1896df96146SKuniyuki Iwashima ASSERT_NE(CPU_ISSET(i, &cpu_set), 0);
1906df96146SKuniyuki Iwashima
1916df96146SKuniyuki Iwashima /* Make sure SYN will be processed on the i-th CPU
1926df96146SKuniyuki Iwashima * and finally distributed to the i-th listener.
1936df96146SKuniyuki Iwashima */
1943ff16174SKuniyuki Iwashima ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
1956df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
1966df96146SKuniyuki Iwashima
197*ff149e92SKuniyuki Iwashima for (j = 0; j < nr_client_per_server; j++) {
1986df96146SKuniyuki Iwashima fd = socket(AF_INET, SOCK_STREAM, 0);
1996df96146SKuniyuki Iwashima ASSERT_NE(fd, -1);
2006df96146SKuniyuki Iwashima
2016df96146SKuniyuki Iwashima ret = connect(fd, &self->addr, self->addrlen);
2026df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
2036df96146SKuniyuki Iwashima
2046df96146SKuniyuki Iwashima close(fd);
2056df96146SKuniyuki Iwashima }
2066df96146SKuniyuki Iwashima }
2076df96146SKuniyuki Iwashima }
2086df96146SKuniyuki Iwashima
verify_incoming_cpu(struct __test_metadata * _metadata,FIXTURE_DATA (so_incoming_cpu)* self)2096df96146SKuniyuki Iwashima void verify_incoming_cpu(struct __test_metadata *_metadata,
2106df96146SKuniyuki Iwashima FIXTURE_DATA(so_incoming_cpu) *self)
2116df96146SKuniyuki Iwashima {
2126df96146SKuniyuki Iwashima int i, j, fd, cpu, ret, total = 0;
2136df96146SKuniyuki Iwashima socklen_t len = sizeof(int);
2146df96146SKuniyuki Iwashima
215*ff149e92SKuniyuki Iwashima for (i = 0; i < nr_server; i++) {
216*ff149e92SKuniyuki Iwashima for (j = 0; j < nr_client_per_server; j++) {
2176df96146SKuniyuki Iwashima /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
2186df96146SKuniyuki Iwashima fd = accept(self->servers[i], &self->addr, &self->addrlen);
2196df96146SKuniyuki Iwashima ASSERT_NE(fd, -1);
2206df96146SKuniyuki Iwashima
2216df96146SKuniyuki Iwashima ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);
2226df96146SKuniyuki Iwashima ASSERT_EQ(ret, 0);
2236df96146SKuniyuki Iwashima ASSERT_EQ(cpu, i);
2246df96146SKuniyuki Iwashima
2256df96146SKuniyuki Iwashima close(fd);
2266df96146SKuniyuki Iwashima total++;
2276df96146SKuniyuki Iwashima }
2286df96146SKuniyuki Iwashima }
2296df96146SKuniyuki Iwashima
230*ff149e92SKuniyuki Iwashima ASSERT_EQ(total, nr_client);
2316df96146SKuniyuki Iwashima TH_LOG("SO_INCOMING_CPU is very likely to be "
2326df96146SKuniyuki Iwashima "working correctly with %d sockets.", total);
2336df96146SKuniyuki Iwashima }
2346df96146SKuniyuki Iwashima
TEST_F(so_incoming_cpu,test1)2356df96146SKuniyuki Iwashima TEST_F(so_incoming_cpu, test1)
2366df96146SKuniyuki Iwashima {
2376df96146SKuniyuki Iwashima create_servers(_metadata, self, variant);
2386df96146SKuniyuki Iwashima create_clients(_metadata, self);
2396df96146SKuniyuki Iwashima verify_incoming_cpu(_metadata, self);
2406df96146SKuniyuki Iwashima }
2416df96146SKuniyuki Iwashima
TEST_F(so_incoming_cpu,test2)2426df96146SKuniyuki Iwashima TEST_F(so_incoming_cpu, test2)
2436df96146SKuniyuki Iwashima {
2446df96146SKuniyuki Iwashima int server;
2456df96146SKuniyuki Iwashima
2466df96146SKuniyuki Iwashima create_servers(_metadata, self, variant);
2476df96146SKuniyuki Iwashima
2486df96146SKuniyuki Iwashima /* No CPU specified */
2496df96146SKuniyuki Iwashima server = create_server(_metadata, self, variant, -1);
2506df96146SKuniyuki Iwashima close(server);
2516df96146SKuniyuki Iwashima
2526df96146SKuniyuki Iwashima create_clients(_metadata, self);
2536df96146SKuniyuki Iwashima verify_incoming_cpu(_metadata, self);
2546df96146SKuniyuki Iwashima }
2556df96146SKuniyuki Iwashima
TEST_F(so_incoming_cpu,test3)2566df96146SKuniyuki Iwashima TEST_F(so_incoming_cpu, test3)
2576df96146SKuniyuki Iwashima {
2586df96146SKuniyuki Iwashima int server, client;
2596df96146SKuniyuki Iwashima
2606df96146SKuniyuki Iwashima create_servers(_metadata, self, variant);
2616df96146SKuniyuki Iwashima
2626df96146SKuniyuki Iwashima /* No CPU specified */
2636df96146SKuniyuki Iwashima server = create_server(_metadata, self, variant, -1);
2646df96146SKuniyuki Iwashima
2656df96146SKuniyuki Iwashima create_clients(_metadata, self);
2666df96146SKuniyuki Iwashima
2676df96146SKuniyuki Iwashima /* Never receive any requests */
2686df96146SKuniyuki Iwashima client = accept(server, &self->addr, &self->addrlen);
2696df96146SKuniyuki Iwashima ASSERT_EQ(client, -1);
2706df96146SKuniyuki Iwashima
2716df96146SKuniyuki Iwashima verify_incoming_cpu(_metadata, self);
2726df96146SKuniyuki Iwashima }
2736df96146SKuniyuki Iwashima
2746df96146SKuniyuki Iwashima TEST_HARNESS_MAIN
275