| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | *  Syncookies implementation for the Linux kernel | 
|---|
| 4 | * | 
|---|
| 5 | *  Copyright (C) 1997 Andi Kleen | 
|---|
| 6 | *  Based on ideas by D.J.Bernstein and Eric Schenk. | 
|---|
| 7 | */ | 
|---|
| 8 |  | 
|---|
| 9 | #include <linux/tcp.h> | 
|---|
| 10 | #include <linux/siphash.h> | 
|---|
| 11 | #include <linux/kernel.h> | 
|---|
| 12 | #include <linux/export.h> | 
|---|
| 13 | #include <net/secure_seq.h> | 
|---|
| 14 | #include <net/tcp.h> | 
|---|
| 15 | #include <net/tcp_ecn.h> | 
|---|
| 16 | #include <net/route.h> | 
|---|
| 17 |  | 
|---|
| 18 | static siphash_aligned_key_t syncookie_secret[2]; | 
|---|
| 19 |  | 
|---|
| 20 | #define COOKIEBITS 24	/* Upper bits store count */ | 
|---|
| 21 | #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) | 
|---|
| 22 |  | 
|---|
| 23 | /* TCP Timestamp: 6 lowest bits of timestamp sent in the cookie SYN-ACK | 
|---|
| 24 | * stores TCP options: | 
|---|
| 25 | * | 
|---|
| 26 | * MSB                               LSB | 
|---|
| 27 | * | 31 ...   6 |  5  |  4   | 3 2 1 0 | | 
|---|
| 28 | * |  Timestamp | ECN | SACK | WScale  | | 
|---|
| 29 | * | 
|---|
| 30 | * When we receive a valid cookie-ACK, we look at the echoed tsval (if | 
|---|
| 31 | * any) to figure out which TCP options we should use for the rebuilt | 
|---|
| 32 | * connection. | 
|---|
| 33 | * | 
|---|
| 34 | * A WScale setting of '0xf' (which is an invalid scaling value) | 
|---|
| 35 | * means that original syn did not include the TCP window scaling option. | 
|---|
| 36 | */ | 
|---|
| 37 | #define TS_OPT_WSCALE_MASK	0xf | 
|---|
| 38 | #define TS_OPT_SACK		BIT(4) | 
|---|
| 39 | #define TS_OPT_ECN		BIT(5) | 
|---|
| 40 | /* There is no TS_OPT_TIMESTAMP: | 
|---|
| 41 | * if ACK contains timestamp option, we already know it was | 
|---|
| 42 | * requested/supported by the syn/synack exchange. | 
|---|
| 43 | */ | 
|---|
| 44 | #define TSBITS	6 | 
|---|
| 45 |  | 
|---|
| 46 | static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | 
|---|
| 47 | u32 count, int c) | 
|---|
| 48 | { | 
|---|
| 49 | net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); | 
|---|
| 50 | return siphash_4u32(a: (__force u32)saddr, b: (__force u32)daddr, | 
|---|
| 51 | c: (__force u32)sport << 16 | (__force u32)dport, | 
|---|
| 52 | d: count, key: &syncookie_secret[c]); | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | /* | 
|---|
| 56 | * when syncookies are in effect and tcp timestamps are enabled we encode | 
|---|
| 57 | * tcp options in the lower bits of the timestamp value that will be | 
|---|
| 58 | * sent in the syn-ack. | 
|---|
| 59 | * Since subsequent timestamps use the normal tcp_time_stamp value, we | 
|---|
| 60 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. | 
|---|
| 61 | */ | 
|---|
| 62 | u64 cookie_init_timestamp(struct request_sock *req, u64 now) | 
|---|
| 63 | { | 
|---|
| 64 | const struct inet_request_sock *ireq = inet_rsk(sk: req); | 
|---|
| 65 | u64 ts, ts_now = tcp_ns_to_ts(usec_ts: false, val: now); | 
|---|
| 66 | u32 options = 0; | 
|---|
| 67 |  | 
|---|
| 68 | options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; | 
|---|
| 69 | if (ireq->sack_ok) | 
|---|
| 70 | options |= TS_OPT_SACK; | 
|---|
| 71 | if (ireq->ecn_ok) | 
|---|
| 72 | options |= TS_OPT_ECN; | 
|---|
| 73 |  | 
|---|
| 74 | ts = (ts_now >> TSBITS) << TSBITS; | 
|---|
| 75 | ts |= options; | 
|---|
| 76 | if (ts > ts_now) | 
|---|
| 77 | ts -= (1UL << TSBITS); | 
|---|
| 78 |  | 
|---|
| 79 | if (tcp_rsk(req)->req_usec_ts) | 
|---|
| 80 | return ts * NSEC_PER_USEC; | 
|---|
| 81 | return ts * NSEC_PER_MSEC; | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 |  | 
|---|
| 85 | static __u32 secure_tcp_syn_cookie(__be32 saddr, __be32 daddr, __be16 sport, | 
|---|
| 86 | __be16 dport, __u32 sseq, __u32 data) | 
|---|
| 87 | { | 
|---|
| 88 | /* | 
|---|
| 89 | * Compute the secure sequence number. | 
|---|
| 90 | * The output should be: | 
|---|
| 91 | *   HASH(sec1,saddr,sport,daddr,dport,sec1) + sseq + (count * 2^24) | 
|---|
| 92 | *      + (HASH(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24). | 
|---|
| 93 | * Where sseq is their sequence number and count increases every | 
|---|
| 94 | * minute by 1. | 
|---|
| 95 | * As an extra hack, we add a small "data" value that encodes the | 
|---|
| 96 | * MSS into the second hash value. | 
|---|
| 97 | */ | 
|---|
| 98 | u32 count = tcp_cookie_time(); | 
|---|
| 99 | return (cookie_hash(saddr, daddr, sport, dport, count: 0, c: 0) + | 
|---|
| 100 | sseq + (count << COOKIEBITS) + | 
|---|
| 101 | ((cookie_hash(saddr, daddr, sport, dport, count, c: 1) + data) | 
|---|
| 102 | & COOKIEMASK)); | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | /* | 
|---|
| 106 | * This retrieves the small "data" value from the syncookie. | 
|---|
| 107 | * If the syncookie is bad, the data returned will be out of | 
|---|
| 108 | * range.  This must be checked by the caller. | 
|---|
| 109 | * | 
|---|
| 110 | * The count value used to generate the cookie must be less than | 
|---|
| 111 | * MAX_SYNCOOKIE_AGE minutes in the past. | 
|---|
| 112 | * The return value (__u32)-1 if this test fails. | 
|---|
| 113 | */ | 
|---|
| 114 | static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, | 
|---|
| 115 | __be16 sport, __be16 dport, __u32 sseq) | 
|---|
| 116 | { | 
|---|
| 117 | u32 diff, count = tcp_cookie_time(); | 
|---|
| 118 |  | 
|---|
| 119 | /* Strip away the layers from the cookie */ | 
|---|
| 120 | cookie -= cookie_hash(saddr, daddr, sport, dport, count: 0, c: 0) + sseq; | 
|---|
| 121 |  | 
|---|
| 122 | /* Cookie is now reduced to (count * 2^24) ^ (hash % 2^24) */ | 
|---|
| 123 | diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); | 
|---|
| 124 | if (diff >= MAX_SYNCOOKIE_AGE) | 
|---|
| 125 | return (__u32)-1; | 
|---|
| 126 |  | 
|---|
| 127 | return (cookie - | 
|---|
| 128 | cookie_hash(saddr, daddr, sport, dport, count: count - diff, c: 1)) | 
|---|
| 129 | & COOKIEMASK;	/* Leaving the data behind */ | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | /* | 
|---|
| 133 | * MSS Values are chosen based on the 2011 paper | 
|---|
| 134 | * 'An Analysis of TCP Maximum Segement Sizes' by S. Alcock and R. Nelson. | 
|---|
| 135 | * Values .. | 
|---|
| 136 | *  .. lower than 536 are rare (< 0.2%) | 
|---|
| 137 | *  .. between 537 and 1299 account for less than < 1.5% of observed values | 
|---|
| 138 | *  .. in the 1300-1349 range account for about 15 to 20% of observed mss values | 
|---|
| 139 | *  .. exceeding 1460 are very rare (< 0.04%) | 
|---|
| 140 | * | 
|---|
| 141 | *  1460 is the single most frequently announced mss value (30 to 46% depending | 
|---|
| 142 | *  on monitor location).  Table must be sorted. | 
|---|
| 143 | */ | 
|---|
| 144 | static __u16 const msstab[] = { | 
|---|
| 145 | 536, | 
|---|
| 146 | 1300, | 
|---|
| 147 | 1440,	/* 1440, 1452: PPPoE */ | 
|---|
| 148 | 1460, | 
|---|
| 149 | }; | 
|---|
| 150 |  | 
|---|
| 151 | /* | 
|---|
| 152 | * Generate a syncookie.  mssp points to the mss, which is returned | 
|---|
| 153 | * rounded down to the value encoded in the cookie. | 
|---|
| 154 | */ | 
|---|
| 155 | u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, | 
|---|
| 156 | u16 *mssp) | 
|---|
| 157 | { | 
|---|
| 158 | int mssind; | 
|---|
| 159 | const __u16 mss = *mssp; | 
|---|
| 160 |  | 
|---|
| 161 | for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) | 
|---|
| 162 | if (mss >= msstab[mssind]) | 
|---|
| 163 | break; | 
|---|
| 164 | *mssp = msstab[mssind]; | 
|---|
| 165 |  | 
|---|
| 166 | return secure_tcp_syn_cookie(saddr: iph->saddr, daddr: iph->daddr, | 
|---|
| 167 | sport: th->source, dport: th->dest, ntohl(th->seq), | 
|---|
| 168 | data: mssind); | 
|---|
| 169 | } | 
|---|
| 170 | EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence); | 
|---|
| 171 |  | 
|---|
| 172 | __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) | 
|---|
| 173 | { | 
|---|
| 174 | const struct iphdr *iph = ip_hdr(skb); | 
|---|
| 175 | const struct tcphdr *th = tcp_hdr(skb); | 
|---|
| 176 |  | 
|---|
| 177 | return __cookie_v4_init_sequence(iph, th, mssp); | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /* | 
|---|
| 181 | * Check if a ack sequence number is a valid syncookie. | 
|---|
| 182 | * Return the decoded mss if it is, or 0 if not. | 
|---|
| 183 | */ | 
|---|
| 184 | int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) | 
|---|
| 185 | { | 
|---|
| 186 | __u32 cookie = ntohl(th->ack_seq) - 1; | 
|---|
| 187 | __u32 seq = ntohl(th->seq) - 1; | 
|---|
| 188 | __u32 mssind; | 
|---|
| 189 |  | 
|---|
| 190 | mssind = check_tcp_syn_cookie(cookie, saddr: iph->saddr, daddr: iph->daddr, | 
|---|
| 191 | sport: th->source, dport: th->dest, sseq: seq); | 
|---|
| 192 |  | 
|---|
| 193 | return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; | 
|---|
| 194 | } | 
|---|
| 195 | EXPORT_SYMBOL_GPL(__cookie_v4_check); | 
|---|
| 196 |  | 
|---|
| 197 | struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, | 
|---|
| 198 | struct request_sock *req, | 
|---|
| 199 | struct dst_entry *dst) | 
|---|
| 200 | { | 
|---|
| 201 | struct inet_connection_sock *icsk = inet_csk(sk); | 
|---|
| 202 | struct sock *child; | 
|---|
| 203 | bool own_req; | 
|---|
| 204 |  | 
|---|
| 205 | child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, | 
|---|
| 206 | NULL, &own_req); | 
|---|
| 207 | if (child) { | 
|---|
| 208 | refcount_set(r: &req->rsk_refcnt, n: 1); | 
|---|
| 209 | sock_rps_save_rxhash(sk: child, skb); | 
|---|
| 210 |  | 
|---|
| 211 | if (rsk_drop_req(req)) { | 
|---|
| 212 | reqsk_put(req); | 
|---|
| 213 | return child; | 
|---|
| 214 | } | 
|---|
| 215 |  | 
|---|
| 216 | if (inet_csk_reqsk_queue_add(sk, req, child)) | 
|---|
| 217 | return child; | 
|---|
| 218 |  | 
|---|
| 219 | bh_unlock_sock(child); | 
|---|
| 220 | sock_put(sk: child); | 
|---|
| 221 | } | 
|---|
| 222 | __reqsk_free(req); | 
|---|
| 223 |  | 
|---|
| 224 | return NULL; | 
|---|
| 225 | } | 
|---|
| 226 | EXPORT_IPV6_MOD(tcp_get_cookie_sock); | 
|---|
| 227 |  | 
|---|
| 228 | /* | 
|---|
| 229 | * when syncookies are in effect and tcp timestamps are enabled we stored | 
|---|
| 230 | * additional tcp options in the timestamp. | 
|---|
| 231 | * This extracts these options from the timestamp echo. | 
|---|
| 232 | * | 
|---|
| 233 | * return false if we decode a tcp option that is disabled | 
|---|
| 234 | * on the host. | 
|---|
| 235 | */ | 
|---|
| 236 | bool cookie_timestamp_decode(const struct net *net, | 
|---|
| 237 | struct tcp_options_received *tcp_opt) | 
|---|
| 238 | { | 
|---|
| 239 | /* echoed timestamp, lowest bits contain options */ | 
|---|
| 240 | u32 options = tcp_opt->rcv_tsecr; | 
|---|
| 241 |  | 
|---|
| 242 | if (!tcp_opt->saw_tstamp)  { | 
|---|
| 243 | tcp_clear_options(rx_opt: tcp_opt); | 
|---|
| 244 | return true; | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 | if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps)) | 
|---|
| 248 | return false; | 
|---|
| 249 |  | 
|---|
| 250 | tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0; | 
|---|
| 251 |  | 
|---|
| 252 | if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack)) | 
|---|
| 253 | return false; | 
|---|
| 254 |  | 
|---|
| 255 | if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK) | 
|---|
| 256 | return true; /* no window scaling */ | 
|---|
| 257 |  | 
|---|
| 258 | tcp_opt->wscale_ok = 1; | 
|---|
| 259 | tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK; | 
|---|
| 260 |  | 
|---|
| 261 | return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; | 
|---|
| 262 | } | 
|---|
| 263 | EXPORT_IPV6_MOD(cookie_timestamp_decode); | 
|---|
| 264 |  | 
|---|
| 265 | static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, | 
|---|
| 266 | struct request_sock *req) | 
|---|
| 267 | { | 
|---|
| 268 | struct inet_request_sock *ireq = inet_rsk(sk: req); | 
|---|
| 269 | struct tcp_request_sock *treq = tcp_rsk(req); | 
|---|
| 270 | const struct tcphdr *th = tcp_hdr(skb); | 
|---|
| 271 |  | 
|---|
| 272 | req->num_retrans = 0; | 
|---|
| 273 |  | 
|---|
| 274 | ireq->ir_num = ntohs(th->dest); | 
|---|
| 275 | ireq->ir_rmt_port = th->source; | 
|---|
| 276 | ireq->ir_iif = inet_request_bound_dev_if(sk, skb); | 
|---|
| 277 | ireq->ir_mark = inet_request_mark(sk, skb); | 
|---|
| 278 |  | 
|---|
| 279 | if (IS_ENABLED(CONFIG_SMC)) | 
|---|
| 280 | ireq->smc_ok = 0; | 
|---|
| 281 |  | 
|---|
| 282 | treq->snt_synack = 0; | 
|---|
| 283 | treq->snt_tsval_first = 0; | 
|---|
| 284 | treq->tfo_listener = false; | 
|---|
| 285 | treq->txhash = net_tx_rndhash(); | 
|---|
| 286 | treq->rcv_isn = ntohl(th->seq) - 1; | 
|---|
| 287 | treq->snt_isn = ntohl(th->ack_seq) - 1; | 
|---|
| 288 | treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; | 
|---|
| 289 | treq->req_usec_ts = false; | 
|---|
| 290 |  | 
|---|
| 291 | #if IS_ENABLED(CONFIG_MPTCP) | 
|---|
| 292 | treq->is_mptcp = sk_is_mptcp(sk); | 
|---|
| 293 | if (treq->is_mptcp) | 
|---|
| 294 | return mptcp_subflow_init_cookie_req(req, sk, skb); | 
|---|
| 295 | #endif | 
|---|
| 296 |  | 
|---|
| 297 | return 0; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | #if IS_ENABLED(CONFIG_BPF) | 
|---|
| 301 | struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) | 
|---|
| 302 | { | 
|---|
| 303 | struct request_sock *req = inet_reqsk(sk: skb->sk); | 
|---|
| 304 |  | 
|---|
| 305 | skb->sk = NULL; | 
|---|
| 306 | skb->destructor = NULL; | 
|---|
| 307 |  | 
|---|
| 308 | if (cookie_tcp_reqsk_init(sk, skb, req)) { | 
|---|
| 309 | reqsk_free(req); | 
|---|
| 310 | req = NULL; | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | return req; | 
|---|
| 314 | } | 
|---|
| 315 | EXPORT_IPV6_MOD_GPL(cookie_bpf_check); | 
|---|
| 316 | #endif | 
|---|
| 317 |  | 
|---|
| 318 | struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, | 
|---|
| 319 | struct sock *sk, struct sk_buff *skb, | 
|---|
| 320 | struct tcp_options_received *tcp_opt, | 
|---|
| 321 | int mss, u32 tsoff) | 
|---|
| 322 | { | 
|---|
| 323 | struct inet_request_sock *ireq; | 
|---|
| 324 | struct tcp_request_sock *treq; | 
|---|
| 325 | struct request_sock *req; | 
|---|
| 326 |  | 
|---|
| 327 | if (sk_is_mptcp(sk)) | 
|---|
| 328 | req = mptcp_subflow_reqsk_alloc(ops, sk_listener: sk, attach_listener: false); | 
|---|
| 329 | else | 
|---|
| 330 | req = inet_reqsk_alloc(ops, sk_listener: sk, attach_listener: false); | 
|---|
| 331 |  | 
|---|
| 332 | if (!req) | 
|---|
| 333 | return NULL; | 
|---|
| 334 |  | 
|---|
| 335 | if (cookie_tcp_reqsk_init(sk, skb, req)) { | 
|---|
| 336 | reqsk_free(req); | 
|---|
| 337 | return NULL; | 
|---|
| 338 | } | 
|---|
| 339 |  | 
|---|
| 340 | ireq = inet_rsk(sk: req); | 
|---|
| 341 | treq = tcp_rsk(req); | 
|---|
| 342 |  | 
|---|
| 343 | req->mss = mss; | 
|---|
| 344 | req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; | 
|---|
| 345 |  | 
|---|
| 346 | ireq->snd_wscale = tcp_opt->snd_wscale; | 
|---|
| 347 | ireq->tstamp_ok = tcp_opt->saw_tstamp; | 
|---|
| 348 | ireq->sack_ok = tcp_opt->sack_ok; | 
|---|
| 349 | ireq->wscale_ok = tcp_opt->wscale_ok; | 
|---|
| 350 | ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); | 
|---|
| 351 |  | 
|---|
| 352 | treq->ts_off = tsoff; | 
|---|
| 353 |  | 
|---|
| 354 | return req; | 
|---|
| 355 | } | 
|---|
| 356 | EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); | 
|---|
| 357 |  | 
|---|
| 358 | static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, | 
|---|
| 359 | struct sk_buff *skb) | 
|---|
| 360 | { | 
|---|
| 361 | struct tcp_options_received tcp_opt; | 
|---|
| 362 | u32 tsoff = 0; | 
|---|
| 363 | int mss; | 
|---|
| 364 |  | 
|---|
| 365 | if (tcp_synq_no_recent_overflow(sk)) | 
|---|
| 366 | goto out; | 
|---|
| 367 |  | 
|---|
| 368 | mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); | 
|---|
| 369 | if (!mss) { | 
|---|
| 370 | __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); | 
|---|
| 371 | goto out; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); | 
|---|
| 375 |  | 
|---|
| 376 | /* check for timestamp cookie support */ | 
|---|
| 377 | memset(s: &tcp_opt, c: 0, n: sizeof(tcp_opt)); | 
|---|
| 378 | tcp_parse_options(net, skb, opt_rx: &tcp_opt, estab: 0, NULL); | 
|---|
| 379 |  | 
|---|
| 380 | if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { | 
|---|
| 381 | tsoff = secure_tcp_ts_off(net, | 
|---|
| 382 | saddr: ip_hdr(skb)->daddr, | 
|---|
| 383 | daddr: ip_hdr(skb)->saddr); | 
|---|
| 384 | tcp_opt.rcv_tsecr -= tsoff; | 
|---|
| 385 | } | 
|---|
| 386 |  | 
|---|
| 387 | if (!cookie_timestamp_decode(net, tcp_opt: &tcp_opt)) | 
|---|
| 388 | goto out; | 
|---|
| 389 |  | 
|---|
| 390 | return cookie_tcp_reqsk_alloc(ops: &tcp_request_sock_ops, sk, skb, | 
|---|
| 391 | tcp_opt: &tcp_opt, mss, tsoff); | 
|---|
| 392 | out: | 
|---|
| 393 | return ERR_PTR(error: -EINVAL); | 
|---|
| 394 | } | 
|---|
| 395 |  | 
|---|
| 396 | /* On input, sk is a listener. | 
|---|
| 397 | * Output is listener if incoming packet would not create a child | 
|---|
| 398 | *           NULL if memory could not be allocated. | 
|---|
| 399 | */ | 
|---|
| 400 | struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) | 
|---|
| 401 | { | 
|---|
| 402 | struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; | 
|---|
| 403 | const struct tcphdr *th = tcp_hdr(skb); | 
|---|
| 404 | struct tcp_sock *tp = tcp_sk(sk); | 
|---|
| 405 | struct inet_request_sock *ireq; | 
|---|
| 406 | struct net *net = sock_net(sk); | 
|---|
| 407 | struct tcp_request_sock *treq; | 
|---|
| 408 | struct request_sock *req; | 
|---|
| 409 | struct sock *ret = sk; | 
|---|
| 410 | struct flowi4 fl4; | 
|---|
| 411 | struct rtable *rt; | 
|---|
| 412 | __u8 rcv_wscale; | 
|---|
| 413 | int full_space; | 
|---|
| 414 | SKB_DR(reason); | 
|---|
| 415 |  | 
|---|
| 416 | if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || | 
|---|
| 417 | !th->ack || th->rst) | 
|---|
| 418 | goto out; | 
|---|
| 419 |  | 
|---|
| 420 | if (cookie_bpf_ok(skb)) { | 
|---|
| 421 | req = cookie_bpf_check(sk, skb); | 
|---|
| 422 | } else { | 
|---|
| 423 | req = cookie_tcp_check(net, sk, skb); | 
|---|
| 424 | if (IS_ERR(ptr: req)) | 
|---|
| 425 | goto out; | 
|---|
| 426 | } | 
|---|
| 427 | if (!req) { | 
|---|
| 428 | SKB_DR_SET(reason, NO_SOCKET); | 
|---|
| 429 | goto out_drop; | 
|---|
| 430 | } | 
|---|
| 431 |  | 
|---|
| 432 | ireq = inet_rsk(sk: req); | 
|---|
| 433 | treq = tcp_rsk(req); | 
|---|
| 434 |  | 
|---|
| 435 | sk_rcv_saddr_set(sk: req_to_sk(req), addr: ip_hdr(skb)->daddr); | 
|---|
| 436 | sk_daddr_set(sk: req_to_sk(req), addr: ip_hdr(skb)->saddr); | 
|---|
| 437 |  | 
|---|
| 438 | /* We throwed the options of the initial SYN away, so we hope | 
|---|
| 439 | * the ACK carries the same options again (see RFC1122 4.2.3.8) | 
|---|
| 440 | */ | 
|---|
| 441 | RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); | 
|---|
| 442 |  | 
|---|
| 443 | if (security_inet_conn_request(sk, skb, req)) { | 
|---|
| 444 | SKB_DR_SET(reason, SECURITY_HOOK); | 
|---|
| 445 | goto out_free; | 
|---|
| 446 | } | 
|---|
| 447 |  | 
|---|
| 448 | tcp_ao_syncookie(sk, skb, req, AF_INET); | 
|---|
| 449 |  | 
|---|
| 450 | /* | 
|---|
| 451 | * We need to lookup the route here to get at the correct | 
|---|
| 452 | * window size. We should better make sure that the window size | 
|---|
| 453 | * hasn't changed since we received the original syn, but I see | 
|---|
| 454 | * no easy way to do this. | 
|---|
| 455 | */ | 
|---|
| 456 | flowi4_init_output(fl4: &fl4, oif: ireq->ir_iif, mark: ireq->ir_mark, | 
|---|
| 457 | tos: ip_sock_rt_tos(sk), scope: ip_sock_rt_scope(sk), | 
|---|
| 458 | IPPROTO_TCP, flags: inet_sk_flowi_flags(sk), | 
|---|
| 459 | daddr: opt->srr ? opt->faddr : ireq->ir_rmt_addr, | 
|---|
| 460 | saddr: ireq->ir_loc_addr, dport: th->source, sport: th->dest, | 
|---|
| 461 | uid: sk_uid(sk)); | 
|---|
| 462 | security_req_classify_flow(req, flic: flowi4_to_flowi_common(fl4: &fl4)); | 
|---|
| 463 | rt = ip_route_output_key(net, flp: &fl4); | 
|---|
| 464 | if (IS_ERR(ptr: rt)) { | 
|---|
| 465 | SKB_DR_SET(reason, IP_OUTNOROUTES); | 
|---|
| 466 | goto out_free; | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | /* Try to redo what tcp_v4_send_synack did. */ | 
|---|
| 470 | req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? : | 
|---|
| 471 | dst_metric(dst: &rt->dst, RTAX_WINDOW); | 
|---|
| 472 | /* limit the window selection if the user enforce a smaller rx buffer */ | 
|---|
| 473 | full_space = tcp_full_space(sk); | 
|---|
| 474 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | 
|---|
| 475 | (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0)) | 
|---|
| 476 | req->rsk_window_clamp = full_space; | 
|---|
| 477 |  | 
|---|
| 478 | tcp_select_initial_window(sk, space: full_space, mss: req->mss, | 
|---|
| 479 | rcv_wnd: &req->rsk_rcv_wnd, window_clamp: &req->rsk_window_clamp, | 
|---|
| 480 | wscale_ok: ireq->wscale_ok, rcv_wscale: &rcv_wscale, | 
|---|
| 481 | init_rcv_wnd: dst_metric(dst: &rt->dst, RTAX_INITRWND)); | 
|---|
| 482 |  | 
|---|
| 483 | /* req->syncookie is set true only if ACK is validated | 
|---|
| 484 | * by BPF kfunc, then, rcv_wscale is already configured. | 
|---|
| 485 | */ | 
|---|
| 486 | if (!req->syncookie) | 
|---|
| 487 | ireq->rcv_wscale = rcv_wscale; | 
|---|
| 488 | ireq->ecn_ok &= cookie_ecn_ok(net, dst: &rt->dst); | 
|---|
| 489 | treq->accecn_ok = ireq->ecn_ok && cookie_accecn_ok(th); | 
|---|
| 490 |  | 
|---|
| 491 | ret = tcp_get_cookie_sock(sk, skb, req, dst: &rt->dst); | 
|---|
| 492 | /* ip_queue_xmit() depends on our flow being setup | 
|---|
| 493 | * Normal sockets get it right from inet_csk_route_child_sock() | 
|---|
| 494 | */ | 
|---|
| 495 | if (!ret) { | 
|---|
| 496 | SKB_DR_SET(reason, NO_SOCKET); | 
|---|
| 497 | goto out_drop; | 
|---|
| 498 | } | 
|---|
| 499 | inet_sk(ret)->cork.fl.u.ip4 = fl4; | 
|---|
| 500 | out: | 
|---|
| 501 | return ret; | 
|---|
| 502 | out_free: | 
|---|
| 503 | reqsk_free(req); | 
|---|
| 504 | out_drop: | 
|---|
| 505 | sk_skb_reason_drop(sk, skb, reason); | 
|---|
| 506 | return NULL; | 
|---|
| 507 | } | 
|---|
| 508 |  | 
|---|