| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | * NET		Generic infrastructure for Network protocols. | 
|---|
| 4 | * | 
|---|
| 5 | * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br> | 
|---|
| 6 | * | 
|---|
| 7 | * 		From code originally in include/net/tcp.h | 
|---|
| 8 | */ | 
|---|
| 9 |  | 
|---|
| 10 | #include <linux/module.h> | 
|---|
| 11 | #include <linux/random.h> | 
|---|
| 12 | #include <linux/slab.h> | 
|---|
| 13 | #include <linux/string.h> | 
|---|
| 14 | #include <linux/tcp.h> | 
|---|
| 15 | #include <linux/vmalloc.h> | 
|---|
| 16 |  | 
|---|
| 17 | #include <net/request_sock.h> | 
|---|
| 18 |  | 
|---|
| 19 | /* | 
|---|
| 20 | * Maximum number of SYN_RECV sockets in queue per LISTEN socket. | 
|---|
| 21 | * One SYN_RECV socket costs about 80bytes on a 32bit machine. | 
|---|
| 22 | * It would be better to replace it with a global counter for all sockets | 
|---|
| 23 | * but then some measure against one socket starving all other sockets | 
|---|
| 24 | * would be needed. | 
|---|
| 25 | * | 
|---|
| 26 | * The minimum value of it is 128. Experiments with real servers show that | 
|---|
| 27 | * it is absolutely not enough even at 100conn/sec. 256 cures most | 
|---|
| 28 | * of problems. | 
|---|
| 29 | * This value is adjusted to 128 for low memory machines, | 
|---|
| 30 | * and it will increase in proportion to the memory of machine. | 
|---|
| 31 | * Note : Dont forget somaxconn that may limit backlog too. | 
|---|
| 32 | */ | 
|---|
| 33 |  | 
|---|
| 34 | void reqsk_queue_alloc(struct request_sock_queue *queue) | 
|---|
| 35 | { | 
|---|
| 36 | queue->fastopenq.rskq_rst_head = NULL; | 
|---|
| 37 | queue->fastopenq.rskq_rst_tail = NULL; | 
|---|
| 38 | queue->fastopenq.qlen = 0; | 
|---|
| 39 |  | 
|---|
| 40 | queue->rskq_accept_head = NULL; | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | /* | 
|---|
| 44 | * This function is called to set a Fast Open socket's "fastopen_rsk" field | 
|---|
| 45 | * to NULL when a TFO socket no longer needs to access the request_sock. | 
|---|
| 46 | * This happens only after 3WHS has been either completed or aborted (e.g., | 
|---|
| 47 | * RST is received). | 
|---|
| 48 | * | 
|---|
| 49 | * Before TFO, a child socket is created only after 3WHS is completed, | 
|---|
| 50 | * hence it never needs to access the request_sock. things get a lot more | 
|---|
| 51 | * complex with TFO. A child socket, accepted or not, has to access its | 
|---|
| 52 | * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts, | 
|---|
| 53 | * until 3WHS is either completed or aborted. Afterwards the req will stay | 
|---|
| 54 | * until either the child socket is accepted, or in the rare case when the | 
|---|
| 55 | * listener is closed before the child is accepted. | 
|---|
| 56 | * | 
|---|
| 57 | * In short, a request socket is only freed after BOTH 3WHS has completed | 
|---|
| 58 | * (or aborted) and the child socket has been accepted (or listener closed). | 
|---|
| 59 | * When a child socket is accepted, its corresponding req->sk is set to | 
|---|
| 60 | * NULL since it's no longer needed. More importantly, "req->sk == NULL" | 
|---|
| 61 | * will be used by the code below to determine if a child socket has been | 
|---|
| 62 | * accepted or not, and the check is protected by the fastopenq->lock | 
|---|
| 63 | * described below. | 
|---|
| 64 | * | 
|---|
| 65 | * Note that fastopen_rsk is only accessed from the child socket's context | 
|---|
| 66 | * with its socket lock held. But a request_sock (req) can be accessed by | 
|---|
| 67 | * both its child socket through fastopen_rsk, and a listener socket through | 
|---|
| 68 | * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin | 
|---|
| 69 | * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created. | 
|---|
| 70 | * only in the rare case when both the listener and the child locks are held, | 
|---|
| 71 | * e.g., in inet_csk_listen_stop() do we not need to acquire the lock. | 
|---|
| 72 | * The lock also protects other fields such as fastopenq->qlen, which is | 
|---|
| 73 | * decremented by this function when fastopen_rsk is no longer needed. | 
|---|
| 74 | * | 
|---|
| 75 | * Note that another solution was to simply use the existing socket lock | 
|---|
| 76 | * from the listener. But first socket lock is difficult to use. It is not | 
|---|
| 77 | * a simple spin lock - one must consider sock_owned_by_user() and arrange | 
|---|
| 78 | * to use sk_add_backlog() stuff. But what really makes it infeasible is the | 
|---|
| 79 | * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to | 
|---|
| 80 | * acquire a child's lock while holding listener's socket lock. | 
|---|
| 81 | * | 
|---|
| 82 | * This function also sets "treq->tfo_listener" to false. | 
|---|
| 83 | * treq->tfo_listener is used by the listener so it is protected by the | 
|---|
| 84 | * fastopenq->lock in this function. | 
|---|
| 85 | */ | 
|---|
| 86 | void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, | 
|---|
| 87 | bool reset) | 
|---|
| 88 | { | 
|---|
| 89 | struct sock *lsk = req->rsk_listener; | 
|---|
| 90 | struct fastopen_queue *fastopenq; | 
|---|
| 91 |  | 
|---|
| 92 | fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq; | 
|---|
| 93 |  | 
|---|
| 94 | RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL); | 
|---|
| 95 | spin_lock_bh(lock: &fastopenq->lock); | 
|---|
| 96 | fastopenq->qlen--; | 
|---|
| 97 | tcp_rsk(req)->tfo_listener = false; | 
|---|
| 98 | if (req->sk)	/* the child socket hasn't been accepted yet */ | 
|---|
| 99 | goto out; | 
|---|
| 100 |  | 
|---|
| 101 | if (!reset || lsk->sk_state != TCP_LISTEN) { | 
|---|
| 102 | /* If the listener has been closed don't bother with the | 
|---|
| 103 | * special RST handling below. | 
|---|
| 104 | */ | 
|---|
| 105 | spin_unlock_bh(lock: &fastopenq->lock); | 
|---|
| 106 | reqsk_put(req); | 
|---|
| 107 | return; | 
|---|
| 108 | } | 
|---|
| 109 | /* Wait for 60secs before removing a req that has triggered RST. | 
|---|
| 110 | * This is a simple defense against TFO spoofing attack - by | 
|---|
| 111 | * counting the req against fastopen.max_qlen, and disabling | 
|---|
| 112 | * TFO when the qlen exceeds max_qlen. | 
|---|
| 113 | * | 
|---|
| 114 | * For more details see CoNext'11 "TCP Fast Open" paper. | 
|---|
| 115 | */ | 
|---|
| 116 | req->rsk_timer.expires = jiffies + 60*HZ; | 
|---|
| 117 | if (fastopenq->rskq_rst_head == NULL) | 
|---|
| 118 | fastopenq->rskq_rst_head = req; | 
|---|
| 119 | else | 
|---|
| 120 | fastopenq->rskq_rst_tail->dl_next = req; | 
|---|
| 121 |  | 
|---|
| 122 | req->dl_next = NULL; | 
|---|
| 123 | fastopenq->rskq_rst_tail = req; | 
|---|
| 124 | fastopenq->qlen++; | 
|---|
| 125 | out: | 
|---|
| 126 | spin_unlock_bh(lock: &fastopenq->lock); | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|