1/* SPDX-License-Identifier: GPL-2.0-or-later */
2#ifndef _TCP_ECN_H
3#define _TCP_ECN_H
4
5#include <linux/tcp.h>
6#include <linux/skbuff.h>
7#include <linux/bitfield.h>
8
9#include <net/inet_connection_sock.h>
10#include <net/sock.h>
11#include <net/tcp.h>
12#include <net/inet_ecn.h>
13
14/* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
15 * attemped to be negotiated and requested for incoming connection
16 * and outgoing connection, respectively.
17 */
18enum tcp_ecn_mode {
19 TCP_ECN_IN_NOECN_OUT_NOECN = 0,
20 TCP_ECN_IN_ECN_OUT_ECN = 1,
21 TCP_ECN_IN_ECN_OUT_NOECN = 2,
22 TCP_ECN_IN_ACCECN_OUT_ACCECN = 3,
23 TCP_ECN_IN_ACCECN_OUT_ECN = 4,
24 TCP_ECN_IN_ACCECN_OUT_NOECN = 5,
25};
26
27/* AccECN option sending when AccECN has been successfully negotiated */
28enum tcp_accecn_option {
29 TCP_ACCECN_OPTION_DISABLED = 0,
30 TCP_ACCECN_OPTION_MINIMUM = 1,
31 TCP_ACCECN_OPTION_FULL = 2,
32};
33
34static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
35{
36 /* Do not set CWR if in AccECN mode! */
37 if (tcp_ecn_mode_rfc3168(tp))
38 tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
39}
40
41static inline void tcp_ecn_accept_cwr(struct sock *sk,
42 const struct sk_buff *skb)
43{
44 struct tcp_sock *tp = tcp_sk(sk);
45
46 if (tcp_ecn_mode_rfc3168(tp) && tcp_hdr(skb)->cwr) {
47 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
48
49 /* If the sender is telling us it has entered CWR, then its
50 * cwnd may be very low (even just 1 packet), so we should ACK
51 * immediately.
52 */
53 if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
54 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
55 }
56}
57
58static inline void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
59{
60 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
61}
62
63/* tp->accecn_fail_mode */
64#define TCP_ACCECN_ACE_FAIL_SEND BIT(0)
65#define TCP_ACCECN_ACE_FAIL_RECV BIT(1)
66#define TCP_ACCECN_OPT_FAIL_SEND BIT(2)
67#define TCP_ACCECN_OPT_FAIL_RECV BIT(3)
68
69static inline bool tcp_accecn_ace_fail_send(const struct tcp_sock *tp)
70{
71 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_SEND;
72}
73
74static inline bool tcp_accecn_ace_fail_recv(const struct tcp_sock *tp)
75{
76 return tp->accecn_fail_mode & TCP_ACCECN_ACE_FAIL_RECV;
77}
78
79static inline bool tcp_accecn_opt_fail_send(const struct tcp_sock *tp)
80{
81 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_SEND;
82}
83
84static inline bool tcp_accecn_opt_fail_recv(const struct tcp_sock *tp)
85{
86 return tp->accecn_fail_mode & TCP_ACCECN_OPT_FAIL_RECV;
87}
88
89static inline void tcp_accecn_fail_mode_set(struct tcp_sock *tp, u8 mode)
90{
91 tp->accecn_fail_mode |= mode;
92}
93
94#define TCP_ACCECN_OPT_NOT_SEEN 0x0
95#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
96#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
97#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
98
99static inline u8 tcp_accecn_ace(const struct tcphdr *th)
100{
101 return (th->ae << 2) | (th->cwr << 1) | th->ece;
102}
103
104/* Infer the ECT value our SYN arrived with from the echoed ACE field */
105static inline int tcp_accecn_extract_syn_ect(u8 ace)
106{
107 /* Below is an excerpt from the 1st block of Table 2 of AccECN spec */
108 static const int ace_to_ecn[8] = {
109 INET_ECN_ECT_0, /* 0b000 (Undefined) */
110 INET_ECN_ECT_1, /* 0b001 (Undefined) */
111 INET_ECN_NOT_ECT, /* 0b010 (Not-ECT is received) */
112 INET_ECN_ECT_1, /* 0b011 (ECT-1 is received) */
113 INET_ECN_ECT_0, /* 0b100 (ECT-0 is received) */
114 INET_ECN_ECT_1, /* 0b101 (Reserved) */
115 INET_ECN_CE, /* 0b110 (CE is received) */
116 INET_ECN_ECT_1 /* 0b111 (Undefined) */
117 };
118
119 return ace_to_ecn[ace & 0x7];
120}
121
122/* Check ECN field transition to detect invalid transitions */
123static inline bool tcp_ect_transition_valid(u8 snt, u8 rcv)
124{
125 if (rcv == snt)
126 return true;
127
128 /* Non-ECT altered to something or something became non-ECT */
129 if (snt == INET_ECN_NOT_ECT || rcv == INET_ECN_NOT_ECT)
130 return false;
131 /* CE -> ECT(0/1)? */
132 if (snt == INET_ECN_CE)
133 return false;
134 return true;
135}
136
137static inline bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace,
138 u8 sent_ect)
139{
140 u8 ect = tcp_accecn_extract_syn_ect(ace);
141 struct tcp_sock *tp = tcp_sk(sk);
142
143 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
144 return true;
145
146 if (!tcp_ect_transition_valid(snt: sent_ect, rcv: ect)) {
147 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
148 return false;
149 }
150
151 return true;
152}
153
154static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp,
155 u8 saw_opt)
156{
157 tp->saw_accecn_opt = saw_opt;
158 if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
159 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
160}
161
162/* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
163static inline void tcp_accecn_third_ack(struct sock *sk,
164 const struct sk_buff *skb, u8 sent_ect)
165{
166 u8 ace = tcp_accecn_ace(th: tcp_hdr(skb));
167 struct tcp_sock *tp = tcp_sk(sk);
168
169 switch (ace) {
170 case 0x0:
171 /* Invalid value */
172 tcp_accecn_fail_mode_set(tp, TCP_ACCECN_ACE_FAIL_RECV);
173 break;
174 case 0x7:
175 case 0x5:
176 case 0x1:
177 /* Unused but legal values */
178 break;
179 default:
180 /* Validation only applies to first non-data packet */
181 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
182 !TCP_SKB_CB(skb)->sacked &&
183 tcp_accecn_validate_syn_feedback(sk, ace, sent_ect)) {
184 if ((tcp_accecn_extract_syn_ect(ace) == INET_ECN_CE) &&
185 !tp->delivered_ce)
186 tp->delivered_ce++;
187 }
188 break;
189 }
190}
191
192/* Demand the minimum # to send AccECN optnio */
193static inline void tcp_accecn_opt_demand_min(struct sock *sk,
194 u8 opt_demand_min)
195{
196 struct tcp_sock *tp = tcp_sk(sk);
197 u8 opt_demand;
198
199 opt_demand = max_t(u8, opt_demand_min, tp->accecn_opt_demand);
200 tp->accecn_opt_demand = opt_demand;
201}
202
203/* Maps IP ECN field ECT/CE code point to AccECN option field number, given
204 * we are sending fields with Accurate ECN Order 1: ECT(1), CE, ECT(0).
205 */
206static inline u8 tcp_ecnfield_to_accecn_optfield(u8 ecnfield)
207{
208 switch (ecnfield & INET_ECN_MASK) {
209 case INET_ECN_NOT_ECT:
210 return 0; /* AccECN does not send counts of NOT_ECT */
211 case INET_ECN_ECT_1:
212 return 1;
213 case INET_ECN_CE:
214 return 2;
215 case INET_ECN_ECT_0:
216 return 3;
217 }
218 return 0;
219}
220
221/* Maps IP ECN field ECT/CE code point to AccECN option field value offset.
222 * Some fields do not start from zero, to detect zeroing by middleboxes.
223 */
224static inline u32 tcp_accecn_field_init_offset(u8 ecnfield)
225{
226 switch (ecnfield & INET_ECN_MASK) {
227 case INET_ECN_NOT_ECT:
228 return 0; /* AccECN does not send counts of NOT_ECT */
229 case INET_ECN_ECT_1:
230 return TCP_ACCECN_E1B_INIT_OFFSET;
231 case INET_ECN_CE:
232 return TCP_ACCECN_CEB_INIT_OFFSET;
233 case INET_ECN_ECT_0:
234 return TCP_ACCECN_E0B_INIT_OFFSET;
235 }
236 return 0;
237}
238
239/* Maps AccECN option field #nr to IP ECN field ECT/CE bits */
240static inline unsigned int tcp_accecn_optfield_to_ecnfield(unsigned int option,
241 bool order)
242{
243 /* Based on Table 5 of the AccECN spec to map (option, order) to
244 * the corresponding ECN conuters (ECT-1, ECT-0, or CE).
245 */
246 static const u8 optfield_lookup[2][3] = {
247 /* order = 0: 1st field ECT-0, 2nd field CE, 3rd field ECT-1 */
248 { INET_ECN_ECT_0, INET_ECN_CE, INET_ECN_ECT_1 },
249 /* order = 1: 1st field ECT-1, 2nd field CE, 3rd field ECT-0 */
250 { INET_ECN_ECT_1, INET_ECN_CE, INET_ECN_ECT_0 }
251 };
252
253 return optfield_lookup[order][option % 3];
254}
255
256/* Handles AccECN option ECT and CE 24-bit byte counters update into
257 * the u32 value in tcp_sock. As we're processing TCP options, it is
258 * safe to access from - 1.
259 */
260static inline s32 tcp_update_ecn_bytes(u32 *cnt, const char *from,
261 u32 init_offset)
262{
263 u32 truncated = (get_unaligned_be32(p: from - 1) - init_offset) &
264 0xFFFFFFU;
265 u32 delta = (truncated - *cnt) & 0xFFFFFFU;
266
267 /* If delta has the highest bit set (24th bit) indicating
268 * negative, sign extend to correct an estimation using
269 * sign_extend32(delta, 24 - 1)
270 */
271 delta = sign_extend32(value: delta, index: 23);
272 *cnt += delta;
273 return (s32)delta;
274}
275
276/* Updates Accurate ECN received counters from the received IP ECN field */
277static inline void tcp_ecn_received_counters(struct sock *sk,
278 const struct sk_buff *skb, u32 len)
279{
280 u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
281 u8 is_ce = INET_ECN_is_ce(dsfield: ecnfield);
282 struct tcp_sock *tp = tcp_sk(sk);
283 bool ecn_edge;
284
285 if (!INET_ECN_is_not_ect(dsfield: ecnfield)) {
286 u32 pcount = is_ce * max_t(u16, 1, skb_shinfo(skb)->gso_segs);
287
288 /* As for accurate ECN, the TCP_ECN_SEEN flag is set by
289 * tcp_ecn_received_counters() when the ECN codepoint of
290 * received TCP data or ACK contains ECT(0), ECT(1), or CE.
291 */
292 if (!tcp_ecn_mode_rfc3168(tp))
293 tp->ecn_flags |= TCP_ECN_SEEN;
294
295 /* ACE counter tracks *all* segments including pure ACKs */
296 tp->received_ce += pcount;
297 tp->received_ce_pending = min(tp->received_ce_pending + pcount,
298 0xfU);
299
300 if (len > 0) {
301 u8 minlen = tcp_ecnfield_to_accecn_optfield(ecnfield);
302 u32 oldbytes = tp->received_ecn_bytes[ecnfield - 1];
303 u32 bytes_mask = GENMASK_U32(31, 22);
304
305 tp->received_ecn_bytes[ecnfield - 1] += len;
306 tp->accecn_minlen = max_t(u8, tp->accecn_minlen,
307 minlen);
308
309 /* Send AccECN option at least once per 2^22-byte
310 * increase in any ECN byte counter.
311 */
312 if ((tp->received_ecn_bytes[ecnfield - 1] ^ oldbytes) &
313 bytes_mask) {
314 tcp_accecn_opt_demand_min(sk, opt_demand_min: 1);
315 }
316 }
317 }
318
319 ecn_edge = tp->prev_ecnfield != ecnfield;
320 if (ecn_edge || is_ce) {
321 tp->prev_ecnfield = ecnfield;
322 /* Demand Accurate ECN change-triggered ACKs. Two ACK are
323 * demanded to indicate unambiguously the ecnfield value
324 * in the latter ACK.
325 */
326 if (tcp_ecn_mode_accecn(tp)) {
327 if (ecn_edge)
328 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
329 tp->accecn_opt_demand = 2;
330 }
331 }
332}
333
334/* AccECN specification, 2.2: [...] A Data Receiver maintains four counters
335 * initialized at the start of the half-connection. [...] These byte counters
336 * reflect only the TCP payload length, excluding TCP header and TCP options.
337 */
338static inline void tcp_ecn_received_counters_payload(struct sock *sk,
339 const struct sk_buff *skb)
340{
341 const struct tcphdr *th = (const struct tcphdr *)skb->data;
342
343 tcp_ecn_received_counters(sk, skb, len: skb->len - th->doff * 4);
344}
345
346/* AccECN specification, 5.1: [...] a server can determine that it
347 * negotiated AccECN as [...] if the ACK contains an ACE field with
348 * the value 0b010 to 0b111 (decimal 2 to 7).
349 */
350static inline bool cookie_accecn_ok(const struct tcphdr *th)
351{
352 return tcp_accecn_ace(th) > 0x1;
353}
354
355/* Used to form the ACE flags for SYN/ACK */
356static inline u16 tcp_accecn_reflector_flags(u8 ect)
357{
358 /* TCP ACE flags of SYN/ACK are set based on IP-ECN received from SYN.
359 * Below is an excerpt from the 1st block of Table 2 of AccECN spec,
360 * in which TCP ACE flags are encoded as: (AE << 2) | (CWR << 1) | ECE
361 */
362 static const u8 ecn_to_ace_flags[4] = {
363 0b010, /* Not-ECT is received */
364 0b011, /* ECT(1) is received */
365 0b100, /* ECT(0) is received */
366 0b110 /* CE is received */
367 };
368
369 return FIELD_PREP(TCPHDR_ACE, ecn_to_ace_flags[ect & 0x3]);
370}
371
372/* AccECN specification, 3.1.2: If a TCP server that implements AccECN
373 * receives a SYN with the three TCP header flags (AE, CWR and ECE) set
374 * to any combination other than 000, 011 or 111, it MUST negotiate the
375 * use of AccECN as if they had been set to 111.
376 */
377static inline bool tcp_accecn_syn_requested(const struct tcphdr *th)
378{
379 u8 ace = tcp_accecn_ace(th);
380
381 return ace && ace != 0x3;
382}
383
384static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
385{
386 BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
387 BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
388 BUILD_BUG_ON(INET_ECN_CE != 0x3);
389
390 counter_array[INET_ECN_ECT_1 - 1] = 0;
391 counter_array[INET_ECN_ECT_0 - 1] = 0;
392 counter_array[INET_ECN_CE - 1] = 0;
393}
394
395static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
396{
397 tp->received_ce = 0;
398 tp->received_ce_pending = 0;
399 __tcp_accecn_init_bytes_counters(counter_array: tp->received_ecn_bytes);
400 __tcp_accecn_init_bytes_counters(counter_array: tp->delivered_ecn_bytes);
401 tp->accecn_minlen = 0;
402 tp->accecn_opt_demand = 0;
403 tp->est_ecnfield = 0;
404}
405
406/* Used for make_synack to form the ACE flags */
407static inline void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
408{
409 /* TCP ACE flags of SYN/ACK are set based on IP-ECN codepoint received
410 * from SYN. Below is an excerpt from Table 2 of the AccECN spec:
411 * +====================+====================================+
412 * | IP-ECN codepoint | Respective ACE falgs on SYN/ACK |
413 * | received on SYN | AE CWR ECE |
414 * +====================+====================================+
415 * | Not-ECT | 0 1 0 |
416 * | ECT(1) | 0 1 1 |
417 * | ECT(0) | 1 0 0 |
418 * | CE | 1 1 0 |
419 * +====================+====================================+
420 */
421 th->ae = !!(ect & INET_ECN_ECT_0);
422 th->cwr = ect != INET_ECN_ECT_0;
423 th->ece = ect == INET_ECN_ECT_1;
424}
425
426static inline void tcp_accecn_set_ace(struct tcp_sock *tp, struct sk_buff *skb,
427 struct tcphdr *th)
428{
429 u32 wire_ace;
430
431 /* The final packet of the 3WHS or anything like it must reflect
432 * the SYN/ACK ECT instead of putting CEP into ACE field, such
433 * case show up in tcp_flags.
434 */
435 if (likely(!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACE))) {
436 wire_ace = tp->received_ce + TCP_ACCECN_CEP_INIT_OFFSET;
437 th->ece = !!(wire_ace & 0x1);
438 th->cwr = !!(wire_ace & 0x2);
439 th->ae = !!(wire_ace & 0x4);
440 tp->received_ce_pending = 0;
441 }
442}
443
444static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
445 u8 opt_offset)
446{
447 u8 *ptr = skb_transport_header(skb) + opt_offset;
448 unsigned int optlen = ptr[1] - 2;
449
450 if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1))
451 return TCP_ACCECN_OPT_FAIL_SEEN;
452 ptr += 2;
453
454 /* Detect option zeroing: an AccECN connection "MAY check that the
455 * initial value of the EE0B field or the EE1B field is non-zero"
456 */
457 if (optlen < TCPOLEN_ACCECN_PERFIELD)
458 return TCP_ACCECN_OPT_EMPTY_SEEN;
459 if (get_unaligned_be24(p: ptr) == 0)
460 return TCP_ACCECN_OPT_FAIL_SEEN;
461 if (optlen < TCPOLEN_ACCECN_PERFIELD * 3)
462 return TCP_ACCECN_OPT_COUNTER_SEEN;
463 ptr += TCPOLEN_ACCECN_PERFIELD * 2;
464 if (get_unaligned_be24(p: ptr) == 0)
465 return TCP_ACCECN_OPT_FAIL_SEEN;
466
467 return TCP_ACCECN_OPT_COUNTER_SEEN;
468}
469
470/* See Table 2 of the AccECN draft */
471static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
472 const struct tcphdr *th, u8 ip_dsfield)
473{
474 struct tcp_sock *tp = tcp_sk(sk);
475 u8 ace = tcp_accecn_ace(th);
476
477 switch (ace) {
478 case 0x0:
479 case 0x7:
480 /* +========+========+============+=============+
481 * | A | B | SYN/ACK | Feedback |
482 * | | | B->A | Mode of A |
483 * | | | AE CWR ECE | |
484 * +========+========+============+=============+
485 * | AccECN | No ECN | 0 0 0 | Not ECN |
486 * | AccECN | Broken | 1 1 1 | Not ECN |
487 * +========+========+============+=============+
488 */
489 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
490 break;
491 case 0x1:
492 case 0x5:
493 /* +========+========+============+=============+
494 * | A | B | SYN/ACK | Feedback |
495 * | | | B->A | Mode of A |
496 * | | | AE CWR ECE | |
497 * +========+========+============+=============+
498 * | AccECN | Nonce | 1 0 1 | (Reserved) |
499 * | AccECN | ECN | 0 0 1 | Classic ECN |
500 * | Nonce | AccECN | 0 0 1 | Classic ECN |
501 * | ECN | AccECN | 0 0 1 | Classic ECN |
502 * +========+========+============+=============+
503 */
504 if (tcp_ecn_mode_pending(tp))
505 /* Downgrade from AccECN, or requested initially */
506 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
507 break;
508 default:
509 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
510 tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
511 if (tp->rx_opt.accecn &&
512 tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
513 u8 saw_opt = tcp_accecn_option_init(skb, opt_offset: tp->rx_opt.accecn);
514
515 tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
516 tp->accecn_opt_demand = 2;
517 }
518 if (INET_ECN_is_ce(dsfield: ip_dsfield) &&
519 tcp_accecn_validate_syn_feedback(sk, ace,
520 sent_ect: tp->syn_ect_snt)) {
521 tp->received_ce++;
522 tp->received_ce_pending++;
523 }
524 break;
525 }
526}
527
528static inline void tcp_ecn_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th,
529 const struct sk_buff *skb)
530{
531 if (tcp_ecn_mode_pending(tp)) {
532 if (!tcp_accecn_syn_requested(th)) {
533 /* Downgrade to classic ECN feedback */
534 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
535 } else {
536 tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
537 INET_ECN_MASK;
538 tp->prev_ecnfield = tp->syn_ect_rcv;
539 tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
540 }
541 }
542 if (tcp_ecn_mode_rfc3168(tp) && (!th->ece || !th->cwr))
543 tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
544}
545
546static inline bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp,
547 const struct tcphdr *th)
548{
549 if (th->ece && !th->syn && tcp_ecn_mode_rfc3168(tp))
550 return true;
551 return false;
552}
553
554/* Packet ECN state for a SYN-ACK */
555static inline void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
556{
557 struct tcp_sock *tp = tcp_sk(sk);
558
559 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
560 if (tcp_ecn_disabled(tp))
561 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
562 else if (tcp_ca_needs_ecn(sk) ||
563 tcp_bpf_ca_needs_ecn(sk))
564 INET_ECN_xmit(sk);
565
566 if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
567 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
568 TCP_SKB_CB(skb)->tcp_flags |=
569 tcp_accecn_reflector_flags(ect: tp->syn_ect_rcv);
570 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
571 }
572}
573
574/* Packet ECN state for a SYN. */
575static inline void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
576{
577 struct tcp_sock *tp = tcp_sk(sk);
578 bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
579 bool use_ecn, use_accecn;
580 u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
581
582 use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
583 use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
584 tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
585 tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
586
587 if (!use_ecn) {
588 const struct dst_entry *dst = __sk_dst_get(sk);
589
590 if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
591 use_ecn = true;
592 }
593
594 tp->ecn_flags = 0;
595
596 if (use_ecn) {
597 if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
598 INET_ECN_xmit(sk);
599
600 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
601 if (use_accecn) {
602 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_AE;
603 tcp_ecn_mode_set(tp, TCP_ECN_MODE_PENDING);
604 tp->syn_ect_snt = inet_sk(sk)->tos & INET_ECN_MASK;
605 } else {
606 tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
607 }
608 }
609}
610
611static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
612{
613 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) {
614 /* tp->ecn_flags are cleared at a later point in time when
615 * SYN ACK is ultimatively being received.
616 */
617 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
618 }
619}
620
621static inline void
622tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
623{
624 if (tcp_rsk(req)->accecn_ok)
625 tcp_accecn_echo_syn_ect(th, ect: tcp_rsk(req)->syn_ect_rcv);
626 else if (inet_rsk(sk: req)->ecn_ok)
627 th->ece = 1;
628}
629
630static inline bool tcp_accecn_option_beacon_check(const struct sock *sk)
631{
632 u32 ecn_beacon = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option_beacon);
633 const struct tcp_sock *tp = tcp_sk(sk);
634
635 if (!ecn_beacon)
636 return false;
637
638 return tcp_stamp_us_delta(t1: tp->tcp_mstamp, t0: tp->accecn_opt_tstamp) * ecn_beacon >=
639 (tp->srtt_us >> 3);
640}
641
642#endif /* _LINUX_TCP_ECN_H */
643