| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | #include <linux/kernel.h> | 
|---|
| 3 | #include <linux/tcp.h> | 
|---|
| 4 | #include <linux/rcupdate.h> | 
|---|
| 5 | #include <net/tcp.h> | 
|---|
| 6 | #include <net/busy_poll.h> | 
|---|
| 7 |  | 
|---|
| 8 | void tcp_fastopen_init_key_once(struct net *net) | 
|---|
| 9 | { | 
|---|
| 10 | u8 key[TCP_FASTOPEN_KEY_LENGTH]; | 
|---|
| 11 | struct tcp_fastopen_context *ctxt; | 
|---|
| 12 |  | 
|---|
| 13 | rcu_read_lock(); | 
|---|
| 14 | ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); | 
|---|
| 15 | if (ctxt) { | 
|---|
| 16 | rcu_read_unlock(); | 
|---|
| 17 | return; | 
|---|
| 18 | } | 
|---|
| 19 | rcu_read_unlock(); | 
|---|
| 20 |  | 
|---|
| 21 | /* tcp_fastopen_reset_cipher publishes the new context | 
|---|
| 22 | * atomically, so we allow this race happening here. | 
|---|
| 23 | * | 
|---|
| 24 | * All call sites of tcp_fastopen_cookie_gen also check | 
|---|
| 25 | * for a valid cookie, so this is an acceptable risk. | 
|---|
| 26 | */ | 
|---|
| 27 | get_random_bytes(buf: key, len: sizeof(key)); | 
|---|
| 28 | tcp_fastopen_reset_cipher(net, NULL, primary_key: key, NULL); | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | static void tcp_fastopen_ctx_free(struct rcu_head *head) | 
|---|
| 32 | { | 
|---|
| 33 | struct tcp_fastopen_context *ctx = | 
|---|
| 34 | container_of(head, struct tcp_fastopen_context, rcu); | 
|---|
| 35 |  | 
|---|
| 36 | kfree_sensitive(objp: ctx); | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | void tcp_fastopen_destroy_cipher(struct sock *sk) | 
|---|
| 40 | { | 
|---|
| 41 | struct tcp_fastopen_context *ctx; | 
|---|
| 42 |  | 
|---|
| 43 | ctx = rcu_dereference_protected( | 
|---|
| 44 | inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); | 
|---|
| 45 | if (ctx) | 
|---|
| 46 | call_rcu(head: &ctx->rcu, func: tcp_fastopen_ctx_free); | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | void tcp_fastopen_ctx_destroy(struct net *net) | 
|---|
| 50 | { | 
|---|
| 51 | struct tcp_fastopen_context *ctxt; | 
|---|
| 52 |  | 
|---|
| 53 | ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL)); | 
|---|
| 54 |  | 
|---|
| 55 | if (ctxt) | 
|---|
| 56 | call_rcu(head: &ctxt->rcu, func: tcp_fastopen_ctx_free); | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, | 
|---|
| 60 | void *primary_key, void *backup_key) | 
|---|
| 61 | { | 
|---|
| 62 | struct tcp_fastopen_context *ctx, *octx; | 
|---|
| 63 | struct fastopen_queue *q; | 
|---|
| 64 | int err = 0; | 
|---|
| 65 |  | 
|---|
| 66 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 
|---|
| 67 | if (!ctx) { | 
|---|
| 68 | err = -ENOMEM; | 
|---|
| 69 | goto out; | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | ctx->key[0].key[0] = get_unaligned_le64(p: primary_key); | 
|---|
| 73 | ctx->key[0].key[1] = get_unaligned_le64(p: primary_key + 8); | 
|---|
| 74 | if (backup_key) { | 
|---|
| 75 | ctx->key[1].key[0] = get_unaligned_le64(p: backup_key); | 
|---|
| 76 | ctx->key[1].key[1] = get_unaligned_le64(p: backup_key + 8); | 
|---|
| 77 | ctx->num = 2; | 
|---|
| 78 | } else { | 
|---|
| 79 | ctx->num = 1; | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | if (sk) { | 
|---|
| 83 | q = &inet_csk(sk)->icsk_accept_queue.fastopenq; | 
|---|
| 84 | octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx))); | 
|---|
| 85 | } else { | 
|---|
| 86 | octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, | 
|---|
| 87 | RCU_INITIALIZER(ctx))); | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | if (octx) | 
|---|
| 91 | call_rcu(head: &octx->rcu, func: tcp_fastopen_ctx_free); | 
|---|
| 92 | out: | 
|---|
| 93 | return err; | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | int tcp_fastopen_get_cipher(struct net *net, struct inet_connection_sock *icsk, | 
|---|
| 97 | u64 *key) | 
|---|
| 98 | { | 
|---|
| 99 | struct tcp_fastopen_context *ctx; | 
|---|
| 100 | int n_keys = 0, i; | 
|---|
| 101 |  | 
|---|
| 102 | rcu_read_lock(); | 
|---|
| 103 | if (icsk) | 
|---|
| 104 | ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); | 
|---|
| 105 | else | 
|---|
| 106 | ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); | 
|---|
| 107 | if (ctx) { | 
|---|
| 108 | n_keys = tcp_fastopen_context_len(ctx); | 
|---|
| 109 | for (i = 0; i < n_keys; i++) { | 
|---|
| 110 | put_unaligned_le64(val: ctx->key[i].key[0], p: key + (i * 2)); | 
|---|
| 111 | put_unaligned_le64(val: ctx->key[i].key[1], p: key + (i * 2) + 1); | 
|---|
| 112 | } | 
|---|
| 113 | } | 
|---|
| 114 | rcu_read_unlock(); | 
|---|
| 115 |  | 
|---|
| 116 | return n_keys; | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, | 
|---|
| 120 | struct sk_buff *syn, | 
|---|
| 121 | const siphash_key_t *key, | 
|---|
| 122 | struct tcp_fastopen_cookie *foc) | 
|---|
| 123 | { | 
|---|
| 124 | BUILD_BUG_ON(TCP_FASTOPEN_COOKIE_SIZE != sizeof(u64)); | 
|---|
| 125 |  | 
|---|
| 126 | if (req->rsk_ops->family == AF_INET) { | 
|---|
| 127 | const struct iphdr *iph = ip_hdr(skb: syn); | 
|---|
| 128 |  | 
|---|
| 129 | foc->val[0] = cpu_to_le64(siphash(&iph->saddr, | 
|---|
| 130 | sizeof(iph->saddr) + | 
|---|
| 131 | sizeof(iph->daddr), | 
|---|
| 132 | key)); | 
|---|
| 133 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | 
|---|
| 134 | return true; | 
|---|
| 135 | } | 
|---|
| 136 | #if IS_ENABLED(CONFIG_IPV6) | 
|---|
| 137 | if (req->rsk_ops->family == AF_INET6) { | 
|---|
| 138 | const struct ipv6hdr *ip6h = ipv6_hdr(skb: syn); | 
|---|
| 139 |  | 
|---|
| 140 | foc->val[0] = cpu_to_le64(siphash(&ip6h->saddr, | 
|---|
| 141 | sizeof(ip6h->saddr) + | 
|---|
| 142 | sizeof(ip6h->daddr), | 
|---|
| 143 | key)); | 
|---|
| 144 | foc->len = TCP_FASTOPEN_COOKIE_SIZE; | 
|---|
| 145 | return true; | 
|---|
| 146 | } | 
|---|
| 147 | #endif | 
|---|
| 148 | return false; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | /* Generate the fastopen cookie by applying SipHash to both the source and | 
|---|
| 152 | * destination addresses. | 
|---|
| 153 | */ | 
|---|
| 154 | static void tcp_fastopen_cookie_gen(struct sock *sk, | 
|---|
| 155 | struct request_sock *req, | 
|---|
| 156 | struct sk_buff *syn, | 
|---|
| 157 | struct tcp_fastopen_cookie *foc) | 
|---|
| 158 | { | 
|---|
| 159 | struct tcp_fastopen_context *ctx; | 
|---|
| 160 |  | 
|---|
| 161 | rcu_read_lock(); | 
|---|
| 162 | ctx = tcp_fastopen_get_ctx(sk); | 
|---|
| 163 | if (ctx) | 
|---|
| 164 | __tcp_fastopen_cookie_gen_cipher(req, syn, key: &ctx->key[0], foc); | 
|---|
| 165 | rcu_read_unlock(); | 
|---|
| 166 | } | 
|---|
| 167 |  | 
|---|
| 168 | /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, | 
|---|
| 169 | * queue this additional data / FIN. | 
|---|
| 170 | */ | 
|---|
| 171 | void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) | 
|---|
| 172 | { | 
|---|
| 173 | struct tcp_sock *tp = tcp_sk(sk); | 
|---|
| 174 |  | 
|---|
| 175 | if (TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt) | 
|---|
| 176 | return; | 
|---|
| 177 |  | 
|---|
| 178 | skb = skb_clone(skb, GFP_ATOMIC); | 
|---|
| 179 | if (!skb) | 
|---|
| 180 | return; | 
|---|
| 181 |  | 
|---|
| 182 | tcp_cleanup_skb(skb); | 
|---|
| 183 | /* segs_in has been initialized to 1 in tcp_create_openreq_child(). | 
|---|
| 184 | * Hence, reset segs_in to 0 before calling tcp_segs_in() | 
|---|
| 185 | * to avoid double counting.  Also, tcp_segs_in() expects | 
|---|
| 186 | * skb->len to include the tcp_hdrlen.  Hence, it should | 
|---|
| 187 | * be called before __skb_pull(). | 
|---|
| 188 | */ | 
|---|
| 189 | tp->segs_in = 0; | 
|---|
| 190 | tcp_segs_in(tp, skb); | 
|---|
| 191 | __skb_pull(skb, len: tcp_hdrlen(skb)); | 
|---|
| 192 | sk_forced_mem_schedule(sk, size: skb->truesize); | 
|---|
| 193 | skb_set_owner_r(skb, sk); | 
|---|
| 194 |  | 
|---|
| 195 | TCP_SKB_CB(skb)->seq++; | 
|---|
| 196 | TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; | 
|---|
| 197 |  | 
|---|
| 198 | tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; | 
|---|
| 199 | tcp_add_receive_queue(sk, skb); | 
|---|
| 200 | tp->syn_data_acked = 1; | 
|---|
| 201 |  | 
|---|
| 202 | /* u64_stats_update_begin(&tp->syncp) not needed here, | 
|---|
| 203 | * as we certainly are not changing upper 32bit value (0) | 
|---|
| 204 | */ | 
|---|
| 205 | tp->bytes_received = skb->len; | 
|---|
| 206 |  | 
|---|
| 207 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) | 
|---|
| 208 | tcp_fin(sk); | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | /* returns 0 - no key match, 1 for primary, 2 for backup */ | 
|---|
| 212 | static int tcp_fastopen_cookie_gen_check(struct sock *sk, | 
|---|
| 213 | struct request_sock *req, | 
|---|
| 214 | struct sk_buff *syn, | 
|---|
| 215 | struct tcp_fastopen_cookie *orig, | 
|---|
| 216 | struct tcp_fastopen_cookie *valid_foc) | 
|---|
| 217 | { | 
|---|
| 218 | struct tcp_fastopen_cookie search_foc = { .len = -1 }; | 
|---|
| 219 | struct tcp_fastopen_cookie *foc = valid_foc; | 
|---|
| 220 | struct tcp_fastopen_context *ctx; | 
|---|
| 221 | int i, ret = 0; | 
|---|
| 222 |  | 
|---|
| 223 | rcu_read_lock(); | 
|---|
| 224 | ctx = tcp_fastopen_get_ctx(sk); | 
|---|
| 225 | if (!ctx) | 
|---|
| 226 | goto out; | 
|---|
| 227 | for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { | 
|---|
| 228 | __tcp_fastopen_cookie_gen_cipher(req, syn, key: &ctx->key[i], foc); | 
|---|
| 229 | if (tcp_fastopen_cookie_match(foc, orig)) { | 
|---|
| 230 | ret = i + 1; | 
|---|
| 231 | goto out; | 
|---|
| 232 | } | 
|---|
| 233 | foc = &search_foc; | 
|---|
| 234 | } | 
|---|
| 235 | out: | 
|---|
| 236 | rcu_read_unlock(); | 
|---|
| 237 | return ret; | 
|---|
| 238 | } | 
|---|
| 239 |  | 
|---|
| 240 | static struct sock *tcp_fastopen_create_child(struct sock *sk, | 
|---|
| 241 | struct sk_buff *skb, | 
|---|
| 242 | struct request_sock *req) | 
|---|
| 243 | { | 
|---|
| 244 | struct tcp_sock *tp; | 
|---|
| 245 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | 
|---|
| 246 | struct sock *child; | 
|---|
| 247 | bool own_req; | 
|---|
| 248 |  | 
|---|
| 249 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, | 
|---|
| 250 | NULL, &own_req); | 
|---|
| 251 | if (!child) | 
|---|
| 252 | return NULL; | 
|---|
| 253 |  | 
|---|
| 254 | spin_lock(lock: &queue->fastopenq.lock); | 
|---|
| 255 | queue->fastopenq.qlen++; | 
|---|
| 256 | spin_unlock(lock: &queue->fastopenq.lock); | 
|---|
| 257 |  | 
|---|
| 258 | /* Initialize the child socket. Have to fix some values to take | 
|---|
| 259 | * into account the child is a Fast Open socket and is created | 
|---|
| 260 | * only out of the bits carried in the SYN packet. | 
|---|
| 261 | */ | 
|---|
| 262 | tp = tcp_sk(child); | 
|---|
| 263 |  | 
|---|
| 264 | rcu_assign_pointer(tp->fastopen_rsk, req); | 
|---|
| 265 | tcp_rsk(req)->tfo_listener = true; | 
|---|
| 266 |  | 
|---|
| 267 | /* RFC1323: The window in SYN & SYN/ACK segments is never | 
|---|
| 268 | * scaled. So correct it appropriately. | 
|---|
| 269 | */ | 
|---|
| 270 | tp->snd_wnd = ntohs(tcp_hdr(skb)->window); | 
|---|
| 271 | tp->max_window = tp->snd_wnd; | 
|---|
| 272 |  | 
|---|
| 273 | /* Activate the retrans timer so that SYNACK can be retransmitted. | 
|---|
| 274 | * The request socket is not added to the ehash | 
|---|
| 275 | * because it's been added to the accept queue directly. | 
|---|
| 276 | */ | 
|---|
| 277 | req->timeout = tcp_timeout_init(sk: child); | 
|---|
| 278 | tcp_reset_xmit_timer(sk: child, ICSK_TIME_RETRANS, | 
|---|
| 279 | when: req->timeout, pace_delay: false); | 
|---|
| 280 |  | 
|---|
| 281 | refcount_set(r: &req->rsk_refcnt, n: 2); | 
|---|
| 282 |  | 
|---|
| 283 | sk_mark_napi_id_set(sk: child, skb); | 
|---|
| 284 |  | 
|---|
| 285 | /* Now finish processing the fastopen child socket. */ | 
|---|
| 286 | tcp_init_transfer(sk: child, bpf_op: BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); | 
|---|
| 287 |  | 
|---|
| 288 | tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; | 
|---|
| 289 |  | 
|---|
| 290 | tcp_fastopen_add_skb(sk: child, skb); | 
|---|
| 291 |  | 
|---|
| 292 | tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; | 
|---|
| 293 | tp->rcv_wup = tp->rcv_nxt; | 
|---|
| 294 | /* tcp_conn_request() is sending the SYNACK, | 
|---|
| 295 | * and queues the child into listener accept queue. | 
|---|
| 296 | */ | 
|---|
| 297 | return child; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | static bool tcp_fastopen_queue_check(struct sock *sk) | 
|---|
| 301 | { | 
|---|
| 302 | struct fastopen_queue *fastopenq; | 
|---|
| 303 | int max_qlen; | 
|---|
| 304 |  | 
|---|
| 305 | /* Make sure the listener has enabled fastopen, and we don't | 
|---|
| 306 | * exceed the max # of pending TFO requests allowed before trying | 
|---|
| 307 | * to validating the cookie in order to avoid burning CPU cycles | 
|---|
| 308 | * unnecessarily. | 
|---|
| 309 | * | 
|---|
| 310 | * XXX (TFO) - The implication of checking the max_qlen before | 
|---|
| 311 | * processing a cookie request is that clients can't differentiate | 
|---|
| 312 | * between qlen overflow causing Fast Open to be disabled | 
|---|
| 313 | * temporarily vs a server not supporting Fast Open at all. | 
|---|
| 314 | */ | 
|---|
| 315 | fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; | 
|---|
| 316 | max_qlen = READ_ONCE(fastopenq->max_qlen); | 
|---|
| 317 | if (max_qlen == 0) | 
|---|
| 318 | return false; | 
|---|
| 319 |  | 
|---|
| 320 | if (fastopenq->qlen >= max_qlen) { | 
|---|
| 321 | struct request_sock *req1; | 
|---|
| 322 | spin_lock(lock: &fastopenq->lock); | 
|---|
| 323 | req1 = fastopenq->rskq_rst_head; | 
|---|
| 324 | if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { | 
|---|
| 325 | __NET_INC_STATS(sock_net(sk), | 
|---|
| 326 | LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); | 
|---|
| 327 | spin_unlock(lock: &fastopenq->lock); | 
|---|
| 328 | return false; | 
|---|
| 329 | } | 
|---|
| 330 | fastopenq->rskq_rst_head = req1->dl_next; | 
|---|
| 331 | fastopenq->qlen--; | 
|---|
| 332 | spin_unlock(lock: &fastopenq->lock); | 
|---|
| 333 | reqsk_put(req: req1); | 
|---|
| 334 | } | 
|---|
| 335 | return true; | 
|---|
| 336 | } | 
|---|
| 337 |  | 
|---|
| 338 | static bool tcp_fastopen_no_cookie(const struct sock *sk, | 
|---|
| 339 | const struct dst_entry *dst, | 
|---|
| 340 | int flag) | 
|---|
| 341 | { | 
|---|
| 342 | return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) || | 
|---|
| 343 | tcp_sk(sk)->fastopen_no_cookie || | 
|---|
| 344 | (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) | 
|---|
| 348 | * may be updated and return the client in the SYN-ACK later. E.g., Fast Open | 
|---|
| 349 | * cookie request (foc->len == 0). | 
|---|
| 350 | */ | 
|---|
| 351 | struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, | 
|---|
| 352 | struct request_sock *req, | 
|---|
| 353 | struct tcp_fastopen_cookie *foc, | 
|---|
| 354 | const struct dst_entry *dst) | 
|---|
| 355 | { | 
|---|
| 356 | bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; | 
|---|
| 357 | int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen); | 
|---|
| 358 | struct tcp_fastopen_cookie valid_foc = { .len = -1 }; | 
|---|
| 359 | struct sock *child; | 
|---|
| 360 | int ret = 0; | 
|---|
| 361 |  | 
|---|
| 362 | if (foc->len == 0) /* Client requests a cookie */ | 
|---|
| 363 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); | 
|---|
| 364 |  | 
|---|
| 365 | if (!((tcp_fastopen & TFO_SERVER_ENABLE) && | 
|---|
| 366 | (syn_data || foc->len >= 0) && | 
|---|
| 367 | tcp_fastopen_queue_check(sk))) { | 
|---|
| 368 | foc->len = -1; | 
|---|
| 369 | return NULL; | 
|---|
| 370 | } | 
|---|
| 371 |  | 
|---|
| 372 | if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) | 
|---|
| 373 | goto fastopen; | 
|---|
| 374 |  | 
|---|
| 375 | if (foc->len == 0) { | 
|---|
| 376 | /* Client requests a cookie. */ | 
|---|
| 377 | tcp_fastopen_cookie_gen(sk, req, syn: skb, foc: &valid_foc); | 
|---|
| 378 | } else if (foc->len > 0) { | 
|---|
| 379 | ret = tcp_fastopen_cookie_gen_check(sk, req, syn: skb, orig: foc, | 
|---|
| 380 | valid_foc: &valid_foc); | 
|---|
| 381 | if (!ret) { | 
|---|
| 382 | NET_INC_STATS(sock_net(sk), | 
|---|
| 383 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | 
|---|
| 384 | } else { | 
|---|
| 385 | /* Cookie is valid. Create a (full) child socket to | 
|---|
| 386 | * accept the data in SYN before returning a SYN-ACK to | 
|---|
| 387 | * ack the data. If we fail to create the socket, fall | 
|---|
| 388 | * back and ack the ISN only but includes the same | 
|---|
| 389 | * cookie. | 
|---|
| 390 | * | 
|---|
| 391 | * Note: Data-less SYN with valid cookie is allowed to | 
|---|
| 392 | * send data in SYN_RECV state. | 
|---|
| 393 | */ | 
|---|
| 394 | fastopen: | 
|---|
| 395 | child = tcp_fastopen_create_child(sk, skb, req); | 
|---|
| 396 | if (child) { | 
|---|
| 397 | if (ret == 2) { | 
|---|
| 398 | valid_foc.exp = foc->exp; | 
|---|
| 399 | *foc = valid_foc; | 
|---|
| 400 | NET_INC_STATS(sock_net(sk), | 
|---|
| 401 | LINUX_MIB_TCPFASTOPENPASSIVEALTKEY); | 
|---|
| 402 | } else { | 
|---|
| 403 | foc->len = -1; | 
|---|
| 404 | } | 
|---|
| 405 | NET_INC_STATS(sock_net(sk), | 
|---|
| 406 | LINUX_MIB_TCPFASTOPENPASSIVE); | 
|---|
| 407 | tcp_sk(child)->syn_fastopen_child = 1; | 
|---|
| 408 | return child; | 
|---|
| 409 | } | 
|---|
| 410 | NET_INC_STATS(sock_net(sk), | 
|---|
| 411 | LINUX_MIB_TCPFASTOPENPASSIVEFAIL); | 
|---|
| 412 | } | 
|---|
| 413 | } | 
|---|
| 414 | valid_foc.exp = foc->exp; | 
|---|
| 415 | *foc = valid_foc; | 
|---|
| 416 | return NULL; | 
|---|
| 417 | } | 
|---|
| 418 |  | 
|---|
| 419 | bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, | 
|---|
| 420 | struct tcp_fastopen_cookie *cookie) | 
|---|
| 421 | { | 
|---|
| 422 | const struct dst_entry *dst; | 
|---|
| 423 |  | 
|---|
| 424 | tcp_fastopen_cache_get(sk, mss, cookie); | 
|---|
| 425 |  | 
|---|
| 426 | /* Firewall blackhole issue check */ | 
|---|
| 427 | if (tcp_fastopen_active_should_disable(sk)) { | 
|---|
| 428 | cookie->len = -1; | 
|---|
| 429 | return false; | 
|---|
| 430 | } | 
|---|
| 431 |  | 
|---|
| 432 | dst = __sk_dst_get(sk); | 
|---|
| 433 |  | 
|---|
| 434 | if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { | 
|---|
| 435 | cookie->len = -1; | 
|---|
| 436 | return true; | 
|---|
| 437 | } | 
|---|
| 438 | if (cookie->len > 0) | 
|---|
| 439 | return true; | 
|---|
| 440 | tcp_sk(sk)->fastopen_client_fail = TFO_COOKIE_UNAVAILABLE; | 
|---|
| 441 | return false; | 
|---|
| 442 | } | 
|---|
| 443 |  | 
|---|
| 444 | /* This function checks if we want to defer sending SYN until the first | 
|---|
| 445 | * write().  We defer under the following conditions: | 
|---|
| 446 | * 1. fastopen_connect sockopt is set | 
|---|
| 447 | * 2. we have a valid cookie | 
|---|
| 448 | * Return value: return true if we want to defer until application writes data | 
|---|
| 449 | *               return false if we want to send out SYN immediately | 
|---|
| 450 | */ | 
|---|
| 451 | bool tcp_fastopen_defer_connect(struct sock *sk, int *err) | 
|---|
| 452 | { | 
|---|
| 453 | struct tcp_fastopen_cookie cookie = { .len = 0 }; | 
|---|
| 454 | struct tcp_sock *tp = tcp_sk(sk); | 
|---|
| 455 | u16 mss; | 
|---|
| 456 |  | 
|---|
| 457 | if (tp->fastopen_connect && !tp->fastopen_req) { | 
|---|
| 458 | if (tcp_fastopen_cookie_check(sk, mss: &mss, cookie: &cookie)) { | 
|---|
| 459 | inet_set_bit(DEFER_CONNECT, sk); | 
|---|
| 460 | return true; | 
|---|
| 461 | } | 
|---|
| 462 |  | 
|---|
| 463 | /* Alloc fastopen_req in order for FO option to be included | 
|---|
| 464 | * in SYN | 
|---|
| 465 | */ | 
|---|
| 466 | tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), | 
|---|
| 467 | sk->sk_allocation); | 
|---|
| 468 | if (tp->fastopen_req) | 
|---|
| 469 | tp->fastopen_req->cookie = cookie; | 
|---|
| 470 | else | 
|---|
| 471 | *err = -ENOBUFS; | 
|---|
| 472 | } | 
|---|
| 473 | return false; | 
|---|
| 474 | } | 
|---|
| 475 | EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); | 
|---|
| 476 |  | 
|---|
| 477 | /* | 
|---|
| 478 | * The following code block is to deal with middle box issues with TFO: | 
|---|
| 479 | * Middlebox firewall issues can potentially cause server's data being | 
|---|
| 480 | * blackholed after a successful 3WHS using TFO. | 
|---|
| 481 | * The proposed solution is to disable active TFO globally under the | 
|---|
| 482 | * following circumstances: | 
|---|
| 483 | *   1. client side TFO socket receives out of order FIN | 
|---|
| 484 | *   2. client side TFO socket receives out of order RST | 
|---|
| 485 | *   3. client side TFO socket has timed out three times consecutively during | 
|---|
| 486 | *      or after handshake | 
|---|
| 487 | * We disable active side TFO globally for 1hr at first. Then if it | 
|---|
| 488 | * happens again, we disable it for 2h, then 4h, 8h, ... | 
|---|
| 489 | * And we reset the timeout back to 1hr when we see a successful active | 
|---|
| 490 | * TFO connection with data exchanges. | 
|---|
| 491 | */ | 
|---|
| 492 |  | 
|---|
| 493 | /* Disable active TFO and record current jiffies and | 
|---|
| 494 | * tfo_active_disable_times | 
|---|
| 495 | */ | 
|---|
| 496 | void tcp_fastopen_active_disable(struct sock *sk) | 
|---|
| 497 | { | 
|---|
| 498 | struct net *net = sock_net(sk); | 
|---|
| 499 |  | 
|---|
| 500 | if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)) | 
|---|
| 501 | return; | 
|---|
| 502 |  | 
|---|
| 503 | /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */ | 
|---|
| 504 | WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies); | 
|---|
| 505 |  | 
|---|
| 506 | /* Paired with smp_rmb() in tcp_fastopen_active_should_disable(). | 
|---|
| 507 | * We want net->ipv4.tfo_active_disable_stamp to be updated first. | 
|---|
| 508 | */ | 
|---|
| 509 | smp_mb__before_atomic(); | 
|---|
| 510 | atomic_inc(v: &net->ipv4.tfo_active_disable_times); | 
|---|
| 511 |  | 
|---|
| 512 | NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); | 
|---|
| 513 | } | 
|---|
| 514 |  | 
|---|
| 515 | /* Calculate timeout for tfo active disable | 
|---|
| 516 | * Return true if we are still in the active TFO disable period | 
|---|
| 517 | * Return false if timeout already expired and we should use active TFO | 
|---|
| 518 | */ | 
|---|
| 519 | bool tcp_fastopen_active_should_disable(struct sock *sk) | 
|---|
| 520 | { | 
|---|
| 521 | unsigned int tfo_bh_timeout = | 
|---|
| 522 | READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout); | 
|---|
| 523 | unsigned long timeout; | 
|---|
| 524 | int tfo_da_times; | 
|---|
| 525 | int multiplier; | 
|---|
| 526 |  | 
|---|
| 527 | if (!tfo_bh_timeout) | 
|---|
| 528 | return false; | 
|---|
| 529 |  | 
|---|
| 530 | tfo_da_times = atomic_read(v: &sock_net(sk)->ipv4.tfo_active_disable_times); | 
|---|
| 531 | if (!tfo_da_times) | 
|---|
| 532 | return false; | 
|---|
| 533 |  | 
|---|
| 534 | /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */ | 
|---|
| 535 | smp_rmb(); | 
|---|
| 536 |  | 
|---|
| 537 | /* Limit timeout to max: 2^6 * initial timeout */ | 
|---|
| 538 | multiplier = 1 << min(tfo_da_times - 1, 6); | 
|---|
| 539 |  | 
|---|
| 540 | /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */ | 
|---|
| 541 | timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) + | 
|---|
| 542 | multiplier * tfo_bh_timeout * HZ; | 
|---|
| 543 | if (time_before(jiffies, timeout)) | 
|---|
| 544 | return true; | 
|---|
| 545 |  | 
|---|
| 546 | /* Mark check bit so we can check for successful active TFO | 
|---|
| 547 | * condition and reset tfo_active_disable_times | 
|---|
| 548 | */ | 
|---|
| 549 | tcp_sk(sk)->syn_fastopen_ch = 1; | 
|---|
| 550 | return false; | 
|---|
| 551 | } | 
|---|
| 552 |  | 
|---|
| 553 | /* Disable active TFO if FIN is the only packet in the ofo queue | 
|---|
| 554 | * and no data is received. | 
|---|
| 555 | * Also check if we can reset tfo_active_disable_times if data is | 
|---|
| 556 | * received successfully on a marked active TFO sockets opened on | 
|---|
| 557 | * a non-loopback interface | 
|---|
| 558 | */ | 
|---|
| 559 | void tcp_fastopen_active_disable_ofo_check(struct sock *sk) | 
|---|
| 560 | { | 
|---|
| 561 | struct tcp_sock *tp = tcp_sk(sk); | 
|---|
| 562 | struct net_device *dev; | 
|---|
| 563 | struct dst_entry *dst; | 
|---|
| 564 | struct sk_buff *skb; | 
|---|
| 565 |  | 
|---|
| 566 | if (!tp->syn_fastopen) | 
|---|
| 567 | return; | 
|---|
| 568 |  | 
|---|
| 569 | if (!tp->data_segs_in) { | 
|---|
| 570 | skb = skb_rb_first(&tp->out_of_order_queue); | 
|---|
| 571 | if (skb && !skb_rb_next(skb)) { | 
|---|
| 572 | if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { | 
|---|
| 573 | tcp_fastopen_active_disable(sk); | 
|---|
| 574 | return; | 
|---|
| 575 | } | 
|---|
| 576 | } | 
|---|
| 577 | } else if (tp->syn_fastopen_ch && | 
|---|
| 578 | atomic_read(v: &sock_net(sk)->ipv4.tfo_active_disable_times)) { | 
|---|
| 579 | rcu_read_lock(); | 
|---|
| 580 | dst = __sk_dst_get(sk); | 
|---|
| 581 | dev = dst ? dst_dev_rcu(dst) : NULL; | 
|---|
| 582 | if (!(dev && (dev->flags & IFF_LOOPBACK))) | 
|---|
| 583 | atomic_set(v: &sock_net(sk)->ipv4.tfo_active_disable_times, i: 0); | 
|---|
| 584 | rcu_read_unlock(); | 
|---|
| 585 | } | 
|---|
| 586 | } | 
|---|
| 587 |  | 
|---|
| 588 | void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired) | 
|---|
| 589 | { | 
|---|
| 590 | u32 timeouts = inet_csk(sk)->icsk_retransmits; | 
|---|
| 591 | struct tcp_sock *tp = tcp_sk(sk); | 
|---|
| 592 |  | 
|---|
| 593 | /* Broken middle-boxes may black-hole Fast Open connection during or | 
|---|
| 594 | * even after the handshake. Be extremely conservative and pause | 
|---|
| 595 | * Fast Open globally after hitting the third consecutive timeout or | 
|---|
| 596 | * exceeding the configured timeout limit. | 
|---|
| 597 | */ | 
|---|
| 598 | if ((tp->syn_fastopen || tp->syn_data || tp->syn_data_acked) && | 
|---|
| 599 | (timeouts == 2 || (timeouts < 2 && expired))) { | 
|---|
| 600 | tcp_fastopen_active_disable(sk); | 
|---|
| 601 | NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); | 
|---|
| 602 | } | 
|---|
| 603 | } | 
|---|
| 604 |  | 
|---|