| 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | 
|---|
| 2 | /* | 
|---|
| 3 | * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
|---|
| 4 | *		operating system.  INET is implemented using the  BSD Socket | 
|---|
| 5 | *		interface as the means of communication with the user level. | 
|---|
| 6 | * | 
|---|
| 7 | *		Definitions for the TCP protocol. | 
|---|
| 8 | * | 
|---|
| 9 | * Version:	@(#)tcp.h	1.0.2	04/28/93 | 
|---|
| 10 | * | 
|---|
| 11 | * Author:	Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 
|---|
| 12 | */ | 
|---|
| 13 | #ifndef _LINUX_TCP_H | 
|---|
| 14 | #define _LINUX_TCP_H | 
|---|
| 15 |  | 
|---|
| 16 |  | 
|---|
| 17 | #include <linux/skbuff.h> | 
|---|
| 18 | #include <linux/win_minmax.h> | 
|---|
| 19 | #include <net/sock.h> | 
|---|
| 20 | #include <net/inet_connection_sock.h> | 
|---|
| 21 | #include <net/inet_timewait_sock.h> | 
|---|
| 22 | #include <uapi/linux/tcp.h> | 
|---|
| 23 |  | 
|---|
| 24 | static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) | 
|---|
| 25 | { | 
|---|
| 26 | return (struct tcphdr *)skb_transport_header(skb); | 
|---|
| 27 | } | 
|---|
| 28 |  | 
|---|
| 29 | static inline unsigned int __tcp_hdrlen(const struct tcphdr *th) | 
|---|
| 30 | { | 
|---|
| 31 | return th->doff * 4; | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) | 
|---|
| 35 | { | 
|---|
| 36 | return __tcp_hdrlen(th: tcp_hdr(skb)); | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | static inline struct tcphdr *inner_tcp_hdr(const struct sk_buff *skb) | 
|---|
| 40 | { | 
|---|
| 41 | return (struct tcphdr *)skb_inner_transport_header(skb); | 
|---|
| 42 | } | 
|---|
| 43 |  | 
|---|
| 44 | static inline unsigned int inner_tcp_hdrlen(const struct sk_buff *skb) | 
|---|
| 45 | { | 
|---|
| 46 | return inner_tcp_hdr(skb)->doff * 4; | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | /** | 
|---|
| 50 | * skb_tcp_all_headers - Returns size of all headers for a TCP packet | 
|---|
| 51 | * @skb: buffer | 
|---|
| 52 | * | 
|---|
| 53 | * Used in TX path, for a packet known to be a TCP one. | 
|---|
| 54 | * | 
|---|
| 55 | * if (skb_is_gso(skb)) { | 
|---|
| 56 | *         int hlen = skb_tcp_all_headers(skb); | 
|---|
| 57 | *         ... | 
|---|
| 58 | */ | 
|---|
| 59 | static inline int (const struct sk_buff *skb) | 
|---|
| 60 | { | 
|---|
| 61 | return skb_transport_offset(skb) + tcp_hdrlen(skb); | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | /** | 
|---|
| 65 | * skb_inner_tcp_all_headers - Returns size of all headers for an encap TCP packet | 
|---|
| 66 | * @skb: buffer | 
|---|
| 67 | * | 
|---|
| 68 | * Used in TX path, for a packet known to be a TCP one. | 
|---|
| 69 | * | 
|---|
| 70 | * if (skb_is_gso(skb) && skb->encapsulation) { | 
|---|
| 71 | *         int hlen = skb_inner_tcp_all_headers(skb); | 
|---|
| 72 | *         ... | 
|---|
| 73 | */ | 
|---|
| 74 | static inline int (const struct sk_buff *skb) | 
|---|
| 75 | { | 
|---|
| 76 | return skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | static inline unsigned int tcp_optlen(const struct sk_buff *skb) | 
|---|
| 80 | { | 
|---|
| 81 | return (tcp_hdr(skb)->doff - 5) * 4; | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | /* TCP Fast Open */ | 
|---|
| 85 | #define TCP_FASTOPEN_COOKIE_MIN	4	/* Min Fast Open Cookie size in bytes */ | 
|---|
| 86 | #define TCP_FASTOPEN_COOKIE_MAX	16	/* Max Fast Open Cookie size in bytes */ | 
|---|
| 87 | #define TCP_FASTOPEN_COOKIE_SIZE 8	/* the size employed by this impl. */ | 
|---|
| 88 |  | 
|---|
| 89 | /* TCP Fast Open Cookie as stored in memory */ | 
|---|
| 90 | struct tcp_fastopen_cookie { | 
|---|
| 91 | __le64	val[DIV_ROUND_UP(TCP_FASTOPEN_COOKIE_MAX, sizeof(u64))]; | 
|---|
| 92 | s8	len; | 
|---|
| 93 | bool	exp;	/* In RFC6994 experimental option format */ | 
|---|
| 94 | }; | 
|---|
| 95 |  | 
|---|
| 96 | /* This defines a selective acknowledgement block. */ | 
|---|
| 97 | struct tcp_sack_block_wire { | 
|---|
| 98 | __be32	start_seq; | 
|---|
| 99 | __be32	end_seq; | 
|---|
| 100 | }; | 
|---|
| 101 |  | 
|---|
| 102 | struct tcp_sack_block { | 
|---|
| 103 | u32	start_seq; | 
|---|
| 104 | u32	end_seq; | 
|---|
| 105 | }; | 
|---|
| 106 |  | 
|---|
| 107 | /*These are used to set the sack_ok field in struct tcp_options_received */ | 
|---|
| 108 | #define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */ | 
|---|
| 109 | #define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/ | 
|---|
| 110 |  | 
|---|
| 111 | struct tcp_options_received { | 
|---|
| 112 | /*	PAWS/RTTM data	*/ | 
|---|
| 113 | int	ts_recent_stamp;/* Time we stored ts_recent (for aging) */ | 
|---|
| 114 | u32	ts_recent;	/* Time stamp to echo next		*/ | 
|---|
| 115 | u32	rcv_tsval;	/* Time stamp value             	*/ | 
|---|
| 116 | u32	rcv_tsecr;	/* Time stamp echo reply        	*/ | 
|---|
| 117 | u16 	saw_tstamp : 1,	/* Saw TIMESTAMP on last packet		*/ | 
|---|
| 118 | tstamp_ok : 1,	/* TIMESTAMP seen on SYN packet		*/ | 
|---|
| 119 | dsack : 1,	/* D-SACK is scheduled			*/ | 
|---|
| 120 | wscale_ok : 1,	/* Wscale seen on SYN packet		*/ | 
|---|
| 121 | sack_ok : 3,	/* SACK seen on SYN packet		*/ | 
|---|
| 122 | smc_ok : 1,	/* SMC seen on SYN packet		*/ | 
|---|
| 123 | snd_wscale : 4,	/* Window scaling received from sender	*/ | 
|---|
| 124 | rcv_wscale : 4;	/* Window scaling to send to receiver	*/ | 
|---|
| 125 | u8	accecn:6,	/* AccECN index in header, 0=no options	*/ | 
|---|
| 126 | saw_unknown:1,	/* Received unknown option		*/ | 
|---|
| 127 | unused:1; | 
|---|
| 128 | u8	num_sacks;	/* Number of SACK blocks		*/ | 
|---|
| 129 | u16	user_mss;	/* mss requested by user in ioctl	*/ | 
|---|
| 130 | u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */ | 
|---|
| 131 | }; | 
|---|
| 132 |  | 
|---|
| 133 | static inline void tcp_clear_options(struct tcp_options_received *rx_opt) | 
|---|
| 134 | { | 
|---|
| 135 | rx_opt->tstamp_ok = rx_opt->sack_ok = 0; | 
|---|
| 136 | rx_opt->wscale_ok = rx_opt->snd_wscale = 0; | 
|---|
| 137 | #if IS_ENABLED(CONFIG_SMC) | 
|---|
| 138 | rx_opt->smc_ok = 0; | 
|---|
| 139 | #endif | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | /* This is the max number of SACKS that we'll generate and process. It's safe | 
|---|
| 143 | * to increase this, although since: | 
|---|
| 144 | *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8) | 
|---|
| 145 | * only four options will fit in a standard TCP header */ | 
|---|
| 146 | #define TCP_NUM_SACKS 4 | 
|---|
| 147 |  | 
|---|
| 148 | struct tcp_request_sock_ops; | 
|---|
| 149 |  | 
|---|
| 150 | struct tcp_request_sock { | 
|---|
| 151 | struct inet_request_sock 	req; | 
|---|
| 152 | const struct tcp_request_sock_ops *af_specific; | 
|---|
| 153 | u64				snt_synack; /* first SYNACK sent time */ | 
|---|
| 154 | bool				tfo_listener; | 
|---|
| 155 | bool				is_mptcp; | 
|---|
| 156 | bool				req_usec_ts; | 
|---|
| 157 | #if IS_ENABLED(CONFIG_MPTCP) | 
|---|
| 158 | bool				drop_req; | 
|---|
| 159 | #endif | 
|---|
| 160 | u32				txhash; | 
|---|
| 161 | u32				rcv_isn; | 
|---|
| 162 | u32				snt_isn; | 
|---|
| 163 | u32				ts_off; | 
|---|
| 164 | u32				snt_tsval_first; | 
|---|
| 165 | u32				snt_tsval_last; | 
|---|
| 166 | u32				last_oow_ack_time; /* last SYNACK */ | 
|---|
| 167 | u32				rcv_nxt; /* the ack # by SYNACK. For | 
|---|
| 168 | * FastOpen it's the seq# | 
|---|
| 169 | * after data-in-SYN. | 
|---|
| 170 | */ | 
|---|
| 171 | u8				syn_tos; | 
|---|
| 172 | bool				accecn_ok; | 
|---|
| 173 | u8				syn_ect_snt: 2, | 
|---|
| 174 | syn_ect_rcv: 2, | 
|---|
| 175 | accecn_fail_mode:4; | 
|---|
| 176 | u8				saw_accecn_opt  :2; | 
|---|
| 177 | #ifdef CONFIG_TCP_AO | 
|---|
| 178 | u8				ao_keyid; | 
|---|
| 179 | u8				ao_rcv_next; | 
|---|
| 180 | bool				used_tcp_ao; | 
|---|
| 181 | #endif | 
|---|
| 182 | }; | 
|---|
| 183 |  | 
|---|
| 184 | static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) | 
|---|
| 185 | { | 
|---|
| 186 | return (struct tcp_request_sock *)req; | 
|---|
| 187 | } | 
|---|
| 188 |  | 
|---|
| 189 | static inline bool tcp_rsk_used_ao(const struct request_sock *req) | 
|---|
| 190 | { | 
|---|
| 191 | #ifndef CONFIG_TCP_AO | 
|---|
| 192 | return false; | 
|---|
| 193 | #else | 
|---|
| 194 | return tcp_rsk(req)->used_tcp_ao; | 
|---|
| 195 | #endif | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | #define TCP_RMEM_TO_WIN_SCALE 8 | 
|---|
| 199 |  | 
|---|
| 200 | struct tcp_sock { | 
|---|
| 201 | /* Cacheline organization can be found documented in | 
|---|
| 202 | * Documentation/networking/net_cachelines/tcp_sock.rst. | 
|---|
| 203 | * Please update the document when adding new fields. | 
|---|
| 204 | */ | 
|---|
| 205 |  | 
|---|
| 206 | /* inet_connection_sock has to be the first member of tcp_sock */ | 
|---|
| 207 | struct inet_connection_sock	inet_conn; | 
|---|
| 208 |  | 
|---|
| 209 | /* TX read-mostly hotpath cache lines */ | 
|---|
| 210 | __cacheline_group_begin(tcp_sock_read_tx); | 
|---|
| 211 | u32	max_window;	/* Maximal window ever seen from peer	*/ | 
|---|
| 212 | u32	rcv_ssthresh;	/* Current window clamp			*/ | 
|---|
| 213 | u32	reordering;	/* Packet reordering metric.		*/ | 
|---|
| 214 | u32	notsent_lowat;	/* TCP_NOTSENT_LOWAT */ | 
|---|
| 215 | u16	gso_segs;	/* Max number of segs per GSO packet	*/ | 
|---|
| 216 | /* from STCP, retrans queue hinting */ | 
|---|
| 217 | struct sk_buff *retransmit_skb_hint; | 
|---|
| 218 | #if defined(CONFIG_TLS_DEVICE) | 
|---|
| 219 | void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); | 
|---|
| 220 | #endif | 
|---|
| 221 | __cacheline_group_end(tcp_sock_read_tx); | 
|---|
| 222 |  | 
|---|
| 223 | /* TXRX read-mostly hotpath cache lines */ | 
|---|
| 224 | __cacheline_group_begin(tcp_sock_read_txrx); | 
|---|
| 225 | u32	tsoffset;	/* timestamp offset */ | 
|---|
| 226 | u32	snd_wnd;	/* The window we expect to receive	*/ | 
|---|
| 227 | u32	mss_cache;	/* Cached effective mss, not including SACKS */ | 
|---|
| 228 | u32	snd_cwnd;	/* Sending congestion window		*/ | 
|---|
| 229 | u32	prr_out;	/* Total number of pkts sent during Recovery. */ | 
|---|
| 230 | u32	lost_out;	/* Lost packets			*/ | 
|---|
| 231 | u32	sacked_out;	/* SACK'd packets			*/ | 
|---|
| 232 | u16	;	/* Bytes of tcp header to send		*/ | 
|---|
| 233 | u8	scaling_ratio;	/* see tcp_win_from_space() */ | 
|---|
| 234 | u8	chrono_type : 2,	/* current chronograph type */ | 
|---|
| 235 | repair      : 1, | 
|---|
| 236 | tcp_usec_ts : 1, /* TSval values in usec */ | 
|---|
| 237 | is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */ | 
|---|
| 238 | is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ | 
|---|
| 239 | recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ | 
|---|
| 240 | __cacheline_group_end(tcp_sock_read_txrx); | 
|---|
| 241 |  | 
|---|
| 242 | /* RX read-mostly hotpath cache lines */ | 
|---|
| 243 | __cacheline_group_begin(tcp_sock_read_rx); | 
|---|
| 244 | u32	copied_seq;	/* Head of yet unread data */ | 
|---|
| 245 | u32	snd_wl1;	/* Sequence for window update		*/ | 
|---|
| 246 | u32	tlp_high_seq;	/* snd_nxt at the time of TLP */ | 
|---|
| 247 | u32	rttvar_us;	/* smoothed mdev_max			*/ | 
|---|
| 248 | u32	retrans_out;	/* Retransmitted packets out		*/ | 
|---|
| 249 | u16	advmss;		/* Advertised MSS			*/ | 
|---|
| 250 | u16	urg_data;	/* Saved octet of OOB data and control flags */ | 
|---|
| 251 | u32	lost;		/* Total data packets lost incl. rexmits */ | 
|---|
| 252 | u32	snd_ssthresh;	/* Slow start size threshold		*/ | 
|---|
| 253 | struct  minmax rtt_min; | 
|---|
| 254 | /* OOO segments go in this rbtree. Socket lock must be held. */ | 
|---|
| 255 | struct rb_root	out_of_order_queue; | 
|---|
| 256 | __cacheline_group_end(tcp_sock_read_rx); | 
|---|
| 257 |  | 
|---|
| 258 | /* TX read-write hotpath cache lines */ | 
|---|
| 259 | __cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; | 
|---|
| 260 | u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut | 
|---|
| 261 | * The total number of segments sent. | 
|---|
| 262 | */ | 
|---|
| 263 | u32	data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut | 
|---|
| 264 | * total number of data segments sent. | 
|---|
| 265 | */ | 
|---|
| 266 | u64	bytes_sent;	/* RFC4898 tcpEStatsPerfHCDataOctetsOut | 
|---|
| 267 | * total number of data bytes sent. | 
|---|
| 268 | */ | 
|---|
| 269 | u32	snd_sml;	/* Last byte of the most recently transmitted small packet */ | 
|---|
| 270 | u32	chrono_start;	/* Start time in jiffies of a TCP chrono */ | 
|---|
| 271 | u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */ | 
|---|
| 272 | u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */ | 
|---|
| 273 | u32	pushed_seq;	/* Last pushed seq, required to talk to windows */ | 
|---|
| 274 | u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */ | 
|---|
| 275 | u32	mdev_us;	/* medium deviation			*/ | 
|---|
| 276 | u32	rtt_seq;	/* sequence number to update rttvar	*/ | 
|---|
| 277 | u64	tcp_wstamp_ns;	/* departure time for next sent data packet */ | 
|---|
| 278 | u64	accecn_opt_tstamp;	/* Last AccECN option sent timestamp */ | 
|---|
| 279 | struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ | 
|---|
| 280 | struct sk_buff *highest_sack;   /* skb just after the highest | 
|---|
| 281 | * skb with SACKed bit set | 
|---|
| 282 | * (validity guaranteed only if | 
|---|
| 283 | * sacked_out > 0) | 
|---|
| 284 | */ | 
|---|
| 285 | u8	ecn_flags;	/* ECN status bits.			*/ | 
|---|
| 286 | __cacheline_group_end(tcp_sock_write_tx); | 
|---|
| 287 |  | 
|---|
| 288 | /* TXRX read-write hotpath cache lines */ | 
|---|
| 289 | __cacheline_group_begin(tcp_sock_write_txrx); | 
|---|
| 290 | /* | 
|---|
| 291 | *	Header prediction flags | 
|---|
| 292 | *	0x5?10 << 16 + snd_wnd in net byte order | 
|---|
| 293 | */ | 
|---|
| 294 | u8	nonagle     : 4,/* Disable Nagle algorithm?             */ | 
|---|
| 295 | rate_app_limited:1;  /* rate_{delivered,interval_us} limited? */ | 
|---|
| 296 | u8	received_ce_pending:4, /* Not yet transmit cnt of received_ce */ | 
|---|
| 297 | unused2:4; | 
|---|
| 298 | u8	accecn_minlen:2,/* Minimum length of AccECN option sent */ | 
|---|
| 299 | est_ecnfield:2,/* ECN field for AccECN delivered estimates */ | 
|---|
| 300 | accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ | 
|---|
| 301 | prev_ecnfield:2; /* ECN bits from the previous segment */ | 
|---|
| 302 | __be32	pred_flags; | 
|---|
| 303 | u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ | 
|---|
| 304 | u64	tcp_mstamp;	/* most recent packet received/sent */ | 
|---|
| 305 | u32	rcv_nxt;	/* What we want to receive next		*/ | 
|---|
| 306 | u32	snd_nxt;	/* Next sequence we send		*/ | 
|---|
| 307 | u32	snd_una;	/* First byte we want an ack for	*/ | 
|---|
| 308 | u32	window_clamp;	/* Maximal window to advertise		*/ | 
|---|
| 309 | u32	srtt_us;	/* smoothed round trip time << 3 in usecs */ | 
|---|
| 310 | u32	packets_out;	/* Packets which are "in flight"	*/ | 
|---|
| 311 | u32	snd_up;		/* Urgent pointer		*/ | 
|---|
| 312 | u32	delivered;	/* Total data packets delivered incl. rexmits */ | 
|---|
| 313 | u32	delivered_ce;	/* Like the above but only ECE marked packets */ | 
|---|
| 314 | u32	received_ce;	/* Like the above but for rcvd CE marked pkts */ | 
|---|
| 315 | u32	received_ecn_bytes[3]; /* received byte counters for three ECN | 
|---|
| 316 | * types: INET_ECN_ECT_1, INET_ECN_ECT_0, | 
|---|
| 317 | * and INET_ECN_CE | 
|---|
| 318 | */ | 
|---|
| 319 | u32	app_limited;	/* limited until "delivered" reaches this val */ | 
|---|
| 320 | u32	rcv_wnd;	/* Current receiver window		*/ | 
|---|
| 321 | u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */ | 
|---|
| 322 | /* | 
|---|
| 323 | *      Options received (usually on last packet, some only on SYN packets). | 
|---|
| 324 | */ | 
|---|
| 325 | struct tcp_options_received rx_opt; | 
|---|
| 326 | __cacheline_group_end(tcp_sock_write_txrx); | 
|---|
| 327 |  | 
|---|
| 328 | /* RX read-write hotpath cache lines */ | 
|---|
| 329 | __cacheline_group_begin(tcp_sock_write_rx) __aligned(8); | 
|---|
| 330 | u64	bytes_received; | 
|---|
| 331 | /* RFC4898 tcpEStatsAppHCThruOctetsReceived | 
|---|
| 332 | * sum(delta(rcv_nxt)), or how many bytes | 
|---|
| 333 | * were acked. | 
|---|
| 334 | */ | 
|---|
| 335 | u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn | 
|---|
| 336 | * total number of segments in. | 
|---|
| 337 | */ | 
|---|
| 338 | u32	data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn | 
|---|
| 339 | * total number of data segments in. | 
|---|
| 340 | */ | 
|---|
| 341 | u32	rcv_wup;	/* rcv_nxt on last window update sent	*/ | 
|---|
| 342 | u32	max_packets_out;  /* max packets_out in last window */ | 
|---|
| 343 | u32	cwnd_usage_seq;  /* right edge of cwnd usage tracking flight */ | 
|---|
| 344 | u32	rate_delivered;    /* saved rate sample: packets delivered */ | 
|---|
| 345 | u32	rate_interval_us;  /* saved rate sample: time elapsed */ | 
|---|
| 346 | u32	rcv_rtt_last_tsecr; | 
|---|
| 347 | u32	delivered_ecn_bytes[3]; | 
|---|
| 348 | u64	first_tx_mstamp;  /* start of window send phase */ | 
|---|
| 349 | u64	delivered_mstamp; /* time we reached "delivered" */ | 
|---|
| 350 | u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked | 
|---|
| 351 | * sum(delta(snd_una)), or how many bytes | 
|---|
| 352 | * were acked. | 
|---|
| 353 | */ | 
|---|
| 354 | struct { | 
|---|
| 355 | u32	rtt_us; | 
|---|
| 356 | u32	seq; | 
|---|
| 357 | u64	time; | 
|---|
| 358 | } rcv_rtt_est; | 
|---|
| 359 | /* Receiver queue space */ | 
|---|
| 360 | struct { | 
|---|
| 361 | int	space; | 
|---|
| 362 | u32	seq; | 
|---|
| 363 | u64	time; | 
|---|
| 364 | } rcvq_space; | 
|---|
| 365 | __cacheline_group_end(tcp_sock_write_rx); | 
|---|
| 366 | /* End of Hot Path */ | 
|---|
| 367 |  | 
|---|
| 368 | /* | 
|---|
| 369 | *	RFC793 variables by their proper names. This means you can | 
|---|
| 370 | *	read the code and the spec side by side (and laugh ...) | 
|---|
| 371 | *	See RFC793 and RFC1122. The RFC writes these in capitals. | 
|---|
| 372 | */ | 
|---|
| 373 | u32	dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups | 
|---|
| 374 | * total number of DSACK blocks received | 
|---|
| 375 | */ | 
|---|
| 376 | u32	compressed_ack_rcv_nxt; | 
|---|
| 377 | struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ | 
|---|
| 378 |  | 
|---|
| 379 | /* Information of the most recently (s)acked skb */ | 
|---|
| 380 | struct tcp_rack { | 
|---|
| 381 | u64 mstamp; /* (Re)sent time of the skb */ | 
|---|
| 382 | u32 rtt_us;  /* Associated RTT */ | 
|---|
| 383 | u32 end_seq; /* Ending TCP sequence of the skb */ | 
|---|
| 384 | u32 last_delivered; /* tp->delivered at last reo_wnd adj */ | 
|---|
| 385 | u8 reo_wnd_steps;   /* Allowed reordering window */ | 
|---|
| 386 | #define TCP_RACK_RECOVERY_THRESH 16 | 
|---|
| 387 | u8 reo_wnd_persist:5, /* No. of recovery since last adj */ | 
|---|
| 388 | dsack_seen:1, /* Whether DSACK seen after last adj */ | 
|---|
| 389 | advanced:1;	 /* mstamp advanced since last lost marking */ | 
|---|
| 390 | } rack; | 
|---|
| 391 | u8	compressed_ack; | 
|---|
| 392 | u8	dup_ack_counter:2, | 
|---|
| 393 | tlp_retrans:1,	/* TLP is a retransmission */ | 
|---|
| 394 | syn_ect_snt:2,	/* AccECN ECT memory, only */ | 
|---|
| 395 | syn_ect_rcv:2;	/* ... needed during 3WHS + first seqno */ | 
|---|
| 396 | u8	thin_lto    : 1,/* Use linear timeouts for thin streams */ | 
|---|
| 397 | fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ | 
|---|
| 398 | fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ | 
|---|
| 399 | fastopen_client_fail:2, /* reason why fastopen failed */ | 
|---|
| 400 | frto        : 1;/* F-RTO (RFC5682) activated in CA_Loss */ | 
|---|
| 401 | u8	repair_queue; | 
|---|
| 402 | u8	save_syn:2,	/* Save headers of SYN packet */ | 
|---|
| 403 | syn_data:1,	/* SYN includes data */ | 
|---|
| 404 | syn_fastopen:1,	/* SYN includes Fast Open option */ | 
|---|
| 405 | syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ | 
|---|
| 406 | syn_fastopen_ch:1, /* Active TFO re-enabling probe */ | 
|---|
| 407 | syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ | 
|---|
| 408 | syn_fastopen_child:1; /* created TFO passive child socket */ | 
|---|
| 409 |  | 
|---|
| 410 | u8	keepalive_probes; /* num of allowed keep alive probes	*/ | 
|---|
| 411 | u8	accecn_fail_mode:4,	/* AccECN failure handling */ | 
|---|
| 412 | saw_accecn_opt:2;	/* An AccECN option was seen */ | 
|---|
| 413 | u32	tcp_tx_delay;	/* delay (in usec) added to TX packets */ | 
|---|
| 414 |  | 
|---|
| 415 | /* RTT measurement */ | 
|---|
| 416 | u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/ | 
|---|
| 417 |  | 
|---|
| 418 | u32	reord_seen;	/* number of data packet reordering events */ | 
|---|
| 419 |  | 
|---|
| 420 | /* | 
|---|
| 421 | *	Slow start and congestion control (see also Nagle, and Karn & Partridge) | 
|---|
| 422 | */ | 
|---|
| 423 | u32	snd_cwnd_cnt;	/* Linear increase counter		*/ | 
|---|
| 424 | u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ | 
|---|
| 425 | u32	snd_cwnd_used; | 
|---|
| 426 | u32	snd_cwnd_stamp; | 
|---|
| 427 | u32	prior_cwnd;	/* cwnd right before starting loss recovery */ | 
|---|
| 428 | u32	prr_delivered;	/* Number of newly delivered packets to | 
|---|
| 429 | * receiver in Recovery. */ | 
|---|
| 430 | u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */ | 
|---|
| 431 |  | 
|---|
| 432 | struct hrtimer	pacing_timer; | 
|---|
| 433 | struct hrtimer	compressed_ack_timer; | 
|---|
| 434 |  | 
|---|
| 435 | struct sk_buff	*ooo_last_skb; /* cache rb_last(out_of_order_queue) */ | 
|---|
| 436 |  | 
|---|
| 437 | /* SACKs data, these 2 need to be together (see tcp_options_write) */ | 
|---|
| 438 | struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ | 
|---|
| 439 | struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ | 
|---|
| 440 |  | 
|---|
| 441 | struct tcp_sack_block recv_sack_cache[4]; | 
|---|
| 442 |  | 
|---|
| 443 | u32	prior_ssthresh; /* ssthresh saved at recovery start	*/ | 
|---|
| 444 | u32	high_seq;	/* snd_nxt at onset of congestion	*/ | 
|---|
| 445 |  | 
|---|
| 446 | u32	retrans_stamp;	/* Timestamp of the last retransmit, | 
|---|
| 447 | * also used in SYN-SENT to remember stamp of | 
|---|
| 448 | * the first SYN. */ | 
|---|
| 449 | u32	undo_marker;	/* snd_una upon a new recovery episode. */ | 
|---|
| 450 | int	undo_retrans;	/* number of undoable retransmissions. */ | 
|---|
| 451 | u32	mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG | 
|---|
| 452 | * while socket was owned by user. | 
|---|
| 453 | */ | 
|---|
| 454 | u64	bytes_retrans;	/* RFC4898 tcpEStatsPerfOctetsRetrans | 
|---|
| 455 | * Total data bytes retransmitted | 
|---|
| 456 | */ | 
|---|
| 457 | u32	total_retrans;	/* Total retransmits for entire connection */ | 
|---|
| 458 | u32	rto_stamp;	/* Start time (ms) of last CA_Loss recovery */ | 
|---|
| 459 | u16	total_rto;	/* Total number of RTO timeouts, including | 
|---|
| 460 | * SYN/SYN-ACK and recurring timeouts. | 
|---|
| 461 | */ | 
|---|
| 462 | u16	total_rto_recoveries;	/* Total number of RTO recoveries, | 
|---|
| 463 | * including any unfinished recovery. | 
|---|
| 464 | */ | 
|---|
| 465 | u32	total_rto_time;	/* ms spent in (completed) RTO recoveries. */ | 
|---|
| 466 |  | 
|---|
| 467 | u32	urg_seq;	/* Seq of received urgent pointer */ | 
|---|
| 468 | unsigned int		keepalive_time;	  /* time before keep alive takes place */ | 
|---|
| 469 | unsigned int		keepalive_intvl;  /* time interval between keep alive probes */ | 
|---|
| 470 |  | 
|---|
| 471 | int			linger2; | 
|---|
| 472 |  | 
|---|
| 473 |  | 
|---|
| 474 | /* Sock_ops bpf program related variables */ | 
|---|
| 475 | #ifdef CONFIG_BPF | 
|---|
| 476 | u8	bpf_sock_ops_cb_flags;  /* Control calling BPF programs | 
|---|
| 477 | * values defined in uapi/linux/tcp.h | 
|---|
| 478 | */ | 
|---|
| 479 | u8	bpf_chg_cc_inprogress:1; /* In the middle of | 
|---|
| 480 | * bpf_setsockopt(TCP_CONGESTION), | 
|---|
| 481 | * it is to avoid the bpf_tcp_cc->init() | 
|---|
| 482 | * to recur itself by calling | 
|---|
| 483 | * bpf_setsockopt(TCP_CONGESTION, "itself"). | 
|---|
| 484 | */ | 
|---|
| 485 | #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) | 
|---|
| 486 | #else | 
|---|
| 487 | #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 | 
|---|
| 488 | #endif | 
|---|
| 489 |  | 
|---|
| 490 | u16 timeout_rehash;	/* Timeout-triggered rehash attempts */ | 
|---|
| 491 |  | 
|---|
| 492 | u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ | 
|---|
| 493 |  | 
|---|
| 494 | /* TCP-specific MTU probe information. */ | 
|---|
| 495 | struct { | 
|---|
| 496 | u32		  probe_seq_start; | 
|---|
| 497 | u32		  probe_seq_end; | 
|---|
| 498 | } mtu_probe; | 
|---|
| 499 | u32     plb_rehash;     /* PLB-triggered rehash attempts */ | 
|---|
| 500 | #if IS_ENABLED(CONFIG_MPTCP) | 
|---|
| 501 | bool	is_mptcp; | 
|---|
| 502 | #endif | 
|---|
| 503 | #if IS_ENABLED(CONFIG_SMC) | 
|---|
| 504 | bool	syn_smc;	/* SYN includes SMC */ | 
|---|
| 505 | bool	(*smc_hs_congested)(const struct sock *sk); | 
|---|
| 506 | #endif | 
|---|
| 507 |  | 
|---|
| 508 | #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) | 
|---|
| 509 | /* TCP AF-Specific parts; only used by TCP-AO/MD5 Signature support so far */ | 
|---|
| 510 | const struct tcp_sock_af_ops	*af_specific; | 
|---|
| 511 |  | 
|---|
| 512 | #ifdef CONFIG_TCP_MD5SIG | 
|---|
| 513 | /* TCP MD5 Signature Option information */ | 
|---|
| 514 | struct tcp_md5sig_info	__rcu *md5sig_info; | 
|---|
| 515 | #endif | 
|---|
| 516 | #ifdef CONFIG_TCP_AO | 
|---|
| 517 | struct tcp_ao_info	__rcu *ao_info; | 
|---|
| 518 | #endif | 
|---|
| 519 | #endif | 
|---|
| 520 |  | 
|---|
| 521 | /* TCP fastopen related information */ | 
|---|
| 522 | struct tcp_fastopen_request *fastopen_req; | 
|---|
| 523 | /* fastopen_rsk points to request_sock that resulted in this big | 
|---|
| 524 | * socket. Used to retransmit SYNACKs etc. | 
|---|
| 525 | */ | 
|---|
| 526 | struct request_sock __rcu *fastopen_rsk; | 
|---|
| 527 | struct saved_syn *saved_syn; | 
|---|
| 528 | }; | 
|---|
| 529 |  | 
|---|
| 530 | enum tsq_enum { | 
|---|
| 531 | TSQ_THROTTLED, | 
|---|
| 532 | TSQ_QUEUED, | 
|---|
| 533 | TCP_TSQ_DEFERRED,	   /* tcp_tasklet_func() found socket was owned */ | 
|---|
| 534 | TCP_WRITE_TIMER_DEFERRED,  /* tcp_write_timer() found socket was owned */ | 
|---|
| 535 | TCP_DELACK_TIMER_DEFERRED, /* tcp_delack_timer() found socket was owned */ | 
|---|
| 536 | TCP_MTU_REDUCED_DEFERRED,  /* tcp_v{4|6}_err() could not call | 
|---|
| 537 | * tcp_v{4|6}_mtu_reduced() | 
|---|
| 538 | */ | 
|---|
| 539 | TCP_ACK_DEFERRED,	   /* TX pure ack is deferred */ | 
|---|
| 540 | }; | 
|---|
| 541 |  | 
|---|
| 542 | enum tsq_flags { | 
|---|
| 543 | TSQF_THROTTLED			= BIT(TSQ_THROTTLED), | 
|---|
| 544 | TSQF_QUEUED			= BIT(TSQ_QUEUED), | 
|---|
| 545 | TCPF_TSQ_DEFERRED		= BIT(TCP_TSQ_DEFERRED), | 
|---|
| 546 | TCPF_WRITE_TIMER_DEFERRED	= BIT(TCP_WRITE_TIMER_DEFERRED), | 
|---|
| 547 | TCPF_DELACK_TIMER_DEFERRED	= BIT(TCP_DELACK_TIMER_DEFERRED), | 
|---|
| 548 | TCPF_MTU_REDUCED_DEFERRED	= BIT(TCP_MTU_REDUCED_DEFERRED), | 
|---|
| 549 | TCPF_ACK_DEFERRED		= BIT(TCP_ACK_DEFERRED), | 
|---|
| 550 | }; | 
|---|
| 551 |  | 
|---|
| 552 | #define tcp_sk(ptr) container_of_const(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) | 
|---|
| 553 |  | 
|---|
| 554 | /* Variant of tcp_sk() upgrading a const sock to a read/write tcp socket. | 
|---|
| 555 | * Used in context of (lockless) tcp listeners. | 
|---|
| 556 | */ | 
|---|
| 557 | #define tcp_sk_rw(ptr) container_of(ptr, struct tcp_sock, inet_conn.icsk_inet.sk) | 
|---|
| 558 |  | 
|---|
| 559 | struct tcp_timewait_sock { | 
|---|
| 560 | struct inet_timewait_sock tw_sk; | 
|---|
| 561 | #define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt | 
|---|
| 562 | #define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt | 
|---|
| 563 | u32			  tw_rcv_wnd; | 
|---|
| 564 | u32			  tw_ts_offset; | 
|---|
| 565 | u32			  tw_ts_recent; | 
|---|
| 566 |  | 
|---|
| 567 | /* The time we sent the last out-of-window ACK: */ | 
|---|
| 568 | u32			  tw_last_oow_ack_time; | 
|---|
| 569 |  | 
|---|
| 570 | int			  tw_ts_recent_stamp; | 
|---|
| 571 | u32			  tw_tx_delay; | 
|---|
| 572 | #ifdef CONFIG_TCP_MD5SIG | 
|---|
| 573 | struct tcp_md5sig_key	  *tw_md5_key; | 
|---|
| 574 | #endif | 
|---|
| 575 | #ifdef CONFIG_TCP_AO | 
|---|
| 576 | struct tcp_ao_info	__rcu *ao_info; | 
|---|
| 577 | #endif | 
|---|
| 578 | }; | 
|---|
| 579 |  | 
|---|
| 580 | static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) | 
|---|
| 581 | { | 
|---|
| 582 | return (struct tcp_timewait_sock *)sk; | 
|---|
| 583 | } | 
|---|
| 584 |  | 
|---|
| 585 | static inline bool tcp_passive_fastopen(const struct sock *sk) | 
|---|
| 586 | { | 
|---|
| 587 | return sk->sk_state == TCP_SYN_RECV && | 
|---|
| 588 | rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL; | 
|---|
| 589 | } | 
|---|
| 590 |  | 
|---|
| 591 | static inline void fastopen_queue_tune(struct sock *sk, int backlog) | 
|---|
| 592 | { | 
|---|
| 593 | struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; | 
|---|
| 594 | int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); | 
|---|
| 595 |  | 
|---|
| 596 | WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn)); | 
|---|
| 597 | } | 
|---|
| 598 |  | 
|---|
| 599 | static inline void tcp_move_syn(struct tcp_sock *tp, | 
|---|
| 600 | struct request_sock *req) | 
|---|
| 601 | { | 
|---|
| 602 | tp->saved_syn = req->saved_syn; | 
|---|
| 603 | req->saved_syn = NULL; | 
|---|
| 604 | } | 
|---|
| 605 |  | 
|---|
| 606 | static inline void tcp_saved_syn_free(struct tcp_sock *tp) | 
|---|
| 607 | { | 
|---|
| 608 | kfree(objp: tp->saved_syn); | 
|---|
| 609 | tp->saved_syn = NULL; | 
|---|
| 610 | } | 
|---|
| 611 |  | 
|---|
| 612 | static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) | 
|---|
| 613 | { | 
|---|
| 614 | return saved_syn->mac_hdrlen + saved_syn->network_hdrlen + | 
|---|
| 615 | saved_syn->tcp_hdrlen; | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, | 
|---|
| 619 | const struct sk_buff *orig_skb, | 
|---|
| 620 | const struct sk_buff *ack_skb); | 
|---|
| 621 |  | 
|---|
| 622 | static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) | 
|---|
| 623 | { | 
|---|
| 624 | /* We use READ_ONCE() here because socket might not be locked. | 
|---|
| 625 | * This happens for listeners. | 
|---|
| 626 | */ | 
|---|
| 627 | u16 user_mss = READ_ONCE(tp->rx_opt.user_mss); | 
|---|
| 628 |  | 
|---|
| 629 | return (user_mss && user_mss < mss) ? user_mss : mss; | 
|---|
| 630 | } | 
|---|
| 631 |  | 
|---|
| 632 | int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, | 
|---|
| 633 | int shiftlen); | 
|---|
| 634 |  | 
|---|
| 635 | void __tcp_sock_set_cork(struct sock *sk, bool on); | 
|---|
| 636 | void tcp_sock_set_cork(struct sock *sk, bool on); | 
|---|
| 637 | int tcp_sock_set_keepcnt(struct sock *sk, int val); | 
|---|
| 638 | int tcp_sock_set_keepidle_locked(struct sock *sk, int val); | 
|---|
| 639 | int tcp_sock_set_keepidle(struct sock *sk, int val); | 
|---|
| 640 | int tcp_sock_set_keepintvl(struct sock *sk, int val); | 
|---|
| 641 | void __tcp_sock_set_nodelay(struct sock *sk, bool on); | 
|---|
| 642 | void tcp_sock_set_nodelay(struct sock *sk); | 
|---|
| 643 | void tcp_sock_set_quickack(struct sock *sk, int val); | 
|---|
| 644 | int tcp_sock_set_syncnt(struct sock *sk, int val); | 
|---|
| 645 | int tcp_sock_set_user_timeout(struct sock *sk, int val); | 
|---|
| 646 | int tcp_sock_set_maxseg(struct sock *sk, int val); | 
|---|
| 647 |  | 
|---|
| 648 | static inline bool dst_tcp_usec_ts(const struct dst_entry *dst) | 
|---|
| 649 | { | 
|---|
| 650 | return dst_feature(dst, RTAX_FEATURE_TCP_USEC_TS); | 
|---|
| 651 | } | 
|---|
| 652 |  | 
|---|
| 653 | #endif	/* _LINUX_TCP_H */ | 
|---|
| 654 |  | 
|---|