| 1 | // SPDX-License-Identifier: GPL-2.0-or-later | 
|---|
| 2 | /* | 
|---|
| 3 | * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
|---|
| 4 | *		operating system.  INET is implemented using the  BSD Socket | 
|---|
| 5 | *		interface as the means of communication with the user level. | 
|---|
| 6 | * | 
|---|
| 7 | *		ROUTE - implementation of the IP router. | 
|---|
| 8 | * | 
|---|
| 9 | * Authors:	Ross Biro | 
|---|
| 10 | *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 
|---|
| 11 | *		Alan Cox, <gw4pts@gw4pts.ampr.org> | 
|---|
| 12 | *		Linus Torvalds, <Linus.Torvalds@helsinki.fi> | 
|---|
| 13 | *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> | 
|---|
| 14 | * | 
|---|
| 15 | * Fixes: | 
|---|
| 16 | *		Alan Cox	:	Verify area fixes. | 
|---|
| 17 | *		Alan Cox	:	cli() protects routing changes | 
|---|
| 18 | *		Rui Oliveira	:	ICMP routing table updates | 
|---|
| 19 | *		(rco@di.uminho.pt)	Routing table insertion and update | 
|---|
| 20 | *		Linus Torvalds	:	Rewrote bits to be sensible | 
|---|
| 21 | *		Alan Cox	:	Added BSD route gw semantics | 
|---|
| 22 | *		Alan Cox	:	Super /proc >4K | 
|---|
| 23 | *		Alan Cox	:	MTU in route table | 
|---|
| 24 | *		Alan Cox	:	MSS actually. Also added the window | 
|---|
| 25 | *					clamper. | 
|---|
| 26 | *		Sam Lantinga	:	Fixed route matching in rt_del() | 
|---|
| 27 | *		Alan Cox	:	Routing cache support. | 
|---|
| 28 | *		Alan Cox	:	Removed compatibility cruft. | 
|---|
| 29 | *		Alan Cox	:	RTF_REJECT support. | 
|---|
| 30 | *		Alan Cox	:	TCP irtt support. | 
|---|
| 31 | *		Jonathan Naylor	:	Added Metric support. | 
|---|
| 32 | *	Miquel van Smoorenburg	:	BSD API fixes. | 
|---|
| 33 | *	Miquel van Smoorenburg	:	Metrics. | 
|---|
| 34 | *		Alan Cox	:	Use __u32 properly | 
|---|
| 35 | *		Alan Cox	:	Aligned routing errors more closely with BSD | 
|---|
| 36 | *					our system is still very different. | 
|---|
| 37 | *		Alan Cox	:	Faster /proc handling | 
|---|
| 38 | *	Alexey Kuznetsov	:	Massive rework to support tree based routing, | 
|---|
| 39 | *					routing caches and better behaviour. | 
|---|
| 40 | * | 
|---|
| 41 | *		Olaf Erb	:	irtt wasn't being copied right. | 
|---|
| 42 | *		Bjorn Ekwall	:	Kerneld route support. | 
|---|
| 43 | *		Alan Cox	:	Multicast fixed (I hope) | 
|---|
| 44 | *		Pavel Krauz	:	Limited broadcast fixed | 
|---|
| 45 | *		Mike McLagan	:	Routing by source | 
|---|
| 46 | *	Alexey Kuznetsov	:	End of old history. Split to fib.c and | 
|---|
| 47 | *					route.c and rewritten from scratch. | 
|---|
| 48 | *		Andi Kleen	:	Load-limit warning messages. | 
|---|
| 49 | *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma. | 
|---|
| 50 | *	Vitaly E. Lavrov	:	Race condition in ip_route_input_slow. | 
|---|
| 51 | *	Tobias Ringstrom	:	Uninitialized res.type in ip_route_output_slow. | 
|---|
| 52 | *	Vladimir V. Ivanov	:	IP rule info (flowid) is really useful. | 
|---|
| 53 | *		Marc Boucher	:	routing by fwmark | 
|---|
| 54 | *	Robert Olsson		:	Added rt_cache statistics | 
|---|
| 55 | *	Arnaldo C. Melo		:	Convert proc stuff to seq_file | 
|---|
| 56 | *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes. | 
|---|
| 57 | *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect | 
|---|
| 58 | *	Ilia Sotnikov		:	Removed TOS from hash calculations | 
|---|
| 59 | */ | 
|---|
| 60 |  | 
|---|
| 61 | #define pr_fmt(fmt) "IPv4: " fmt | 
|---|
| 62 |  | 
|---|
| 63 | #include <linux/module.h> | 
|---|
| 64 | #include <linux/bitops.h> | 
|---|
| 65 | #include <linux/kernel.h> | 
|---|
| 66 | #include <linux/mm.h> | 
|---|
| 67 | #include <linux/memblock.h> | 
|---|
| 68 | #include <linux/socket.h> | 
|---|
| 69 | #include <linux/errno.h> | 
|---|
| 70 | #include <linux/in.h> | 
|---|
| 71 | #include <linux/inet.h> | 
|---|
| 72 | #include <linux/netdevice.h> | 
|---|
| 73 | #include <linux/proc_fs.h> | 
|---|
| 74 | #include <linux/init.h> | 
|---|
| 75 | #include <linux/skbuff.h> | 
|---|
| 76 | #include <linux/inetdevice.h> | 
|---|
| 77 | #include <linux/igmp.h> | 
|---|
| 78 | #include <linux/pkt_sched.h> | 
|---|
| 79 | #include <linux/mroute.h> | 
|---|
| 80 | #include <linux/netfilter_ipv4.h> | 
|---|
| 81 | #include <linux/random.h> | 
|---|
| 82 | #include <linux/rcupdate.h> | 
|---|
| 83 | #include <linux/slab.h> | 
|---|
| 84 | #include <linux/jhash.h> | 
|---|
| 85 | #include <net/dst.h> | 
|---|
| 86 | #include <net/dst_metadata.h> | 
|---|
| 87 | #include <net/flow.h> | 
|---|
| 88 | #include <net/inet_dscp.h> | 
|---|
| 89 | #include <net/net_namespace.h> | 
|---|
| 90 | #include <net/ip.h> | 
|---|
| 91 | #include <net/route.h> | 
|---|
| 92 | #include <net/inetpeer.h> | 
|---|
| 93 | #include <net/sock.h> | 
|---|
| 94 | #include <net/ip_fib.h> | 
|---|
| 95 | #include <net/nexthop.h> | 
|---|
| 96 | #include <net/tcp.h> | 
|---|
| 97 | #include <net/icmp.h> | 
|---|
| 98 | #include <net/xfrm.h> | 
|---|
| 99 | #include <net/lwtunnel.h> | 
|---|
| 100 | #include <net/netevent.h> | 
|---|
| 101 | #include <net/rtnetlink.h> | 
|---|
| 102 | #ifdef CONFIG_SYSCTL | 
|---|
| 103 | #include <linux/sysctl.h> | 
|---|
| 104 | #endif | 
|---|
| 105 | #include <net/secure_seq.h> | 
|---|
| 106 | #include <net/ip_tunnels.h> | 
|---|
| 107 |  | 
|---|
| 108 | #include "fib_lookup.h" | 
|---|
| 109 |  | 
|---|
| 110 | #define RT_GC_TIMEOUT (300*HZ) | 
|---|
| 111 |  | 
|---|
| 112 | #define DEFAULT_MIN_PMTU (512 + 20 + 20) | 
|---|
| 113 | #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) | 
|---|
| 114 | #define DEFAULT_MIN_ADVMSS 256 | 
|---|
| 115 | static int ip_rt_max_size; | 
|---|
| 116 | static int ip_rt_redirect_number __read_mostly	= 9; | 
|---|
| 117 | static int ip_rt_redirect_load __read_mostly	= HZ / 50; | 
|---|
| 118 | static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1)); | 
|---|
| 119 | static int ip_rt_error_cost __read_mostly	= HZ; | 
|---|
| 120 | static int ip_rt_error_burst __read_mostly	= 5 * HZ; | 
|---|
| 121 |  | 
|---|
| 122 | static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT; | 
|---|
| 123 |  | 
|---|
| 124 | /* | 
|---|
| 125 | *	Interface to generic destination cache. | 
|---|
| 126 | */ | 
|---|
| 127 |  | 
|---|
| 128 | INDIRECT_CALLABLE_SCOPE | 
|---|
| 129 | struct dst_entry	*ipv4_dst_check(struct dst_entry *dst, u32 cookie); | 
|---|
| 130 | static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst); | 
|---|
| 131 | INDIRECT_CALLABLE_SCOPE | 
|---|
| 132 | unsigned int		ipv4_mtu(const struct dst_entry *dst); | 
|---|
| 133 | static void		ipv4_negative_advice(struct sock *sk, | 
|---|
| 134 | struct dst_entry *dst); | 
|---|
| 135 | static void		 ipv4_link_failure(struct sk_buff *skb); | 
|---|
| 136 | static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | 
|---|
| 137 | struct sk_buff *skb, u32 mtu, | 
|---|
| 138 | bool confirm_neigh); | 
|---|
| 139 | static void		 ip_do_redirect(struct dst_entry *dst, struct sock *sk, | 
|---|
| 140 | struct sk_buff *skb); | 
|---|
| 141 | static void		ipv4_dst_destroy(struct dst_entry *dst); | 
|---|
| 142 |  | 
|---|
| 143 | static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) | 
|---|
| 144 | { | 
|---|
| 145 | WARN_ON(1); | 
|---|
| 146 | return NULL; | 
|---|
| 147 | } | 
|---|
| 148 |  | 
|---|
| 149 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | 
|---|
| 150 | struct sk_buff *skb, | 
|---|
| 151 | const void *daddr); | 
|---|
| 152 | static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); | 
|---|
| 153 |  | 
|---|
| 154 | static struct dst_ops ipv4_dst_ops = { | 
|---|
| 155 | .family =		AF_INET, | 
|---|
| 156 | .check =		ipv4_dst_check, | 
|---|
| 157 | .default_advmss =	ipv4_default_advmss, | 
|---|
| 158 | .mtu =			ipv4_mtu, | 
|---|
| 159 | .cow_metrics =		ipv4_cow_metrics, | 
|---|
| 160 | .destroy =		ipv4_dst_destroy, | 
|---|
| 161 | .negative_advice =	ipv4_negative_advice, | 
|---|
| 162 | .link_failure =		ipv4_link_failure, | 
|---|
| 163 | .update_pmtu =		ip_rt_update_pmtu, | 
|---|
| 164 | .redirect =		ip_do_redirect, | 
|---|
| 165 | .local_out =		__ip_local_out, | 
|---|
| 166 | .neigh_lookup =		ipv4_neigh_lookup, | 
|---|
| 167 | .confirm_neigh =	ipv4_confirm_neigh, | 
|---|
| 168 | }; | 
|---|
| 169 |  | 
|---|
| 170 | #define ECN_OR_COST(class)	TC_PRIO_##class | 
|---|
| 171 |  | 
|---|
| 172 | const __u8 ip_tos2prio[16] = { | 
|---|
| 173 | TC_PRIO_BESTEFFORT, | 
|---|
| 174 | ECN_OR_COST(BESTEFFORT), | 
|---|
| 175 | TC_PRIO_BESTEFFORT, | 
|---|
| 176 | ECN_OR_COST(BESTEFFORT), | 
|---|
| 177 | TC_PRIO_BULK, | 
|---|
| 178 | ECN_OR_COST(BULK), | 
|---|
| 179 | TC_PRIO_BULK, | 
|---|
| 180 | ECN_OR_COST(BULK), | 
|---|
| 181 | TC_PRIO_INTERACTIVE, | 
|---|
| 182 | ECN_OR_COST(INTERACTIVE), | 
|---|
| 183 | TC_PRIO_INTERACTIVE, | 
|---|
| 184 | ECN_OR_COST(INTERACTIVE), | 
|---|
| 185 | TC_PRIO_INTERACTIVE_BULK, | 
|---|
| 186 | ECN_OR_COST(INTERACTIVE_BULK), | 
|---|
| 187 | TC_PRIO_INTERACTIVE_BULK, | 
|---|
| 188 | ECN_OR_COST(INTERACTIVE_BULK) | 
|---|
| 189 | }; | 
|---|
| 190 | EXPORT_SYMBOL(ip_tos2prio); | 
|---|
| 191 |  | 
|---|
| 192 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 
|---|
| 193 | #ifndef CONFIG_PREEMPT_RT | 
|---|
| 194 | #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) | 
|---|
| 195 | #else | 
|---|
| 196 | #define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field) | 
|---|
| 197 | #endif | 
|---|
| 198 |  | 
|---|
| 199 | #ifdef CONFIG_PROC_FS | 
|---|
| 200 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 
|---|
| 201 | { | 
|---|
| 202 | if (*pos) | 
|---|
| 203 | return NULL; | 
|---|
| 204 | return SEQ_START_TOKEN; | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 
|---|
| 208 | { | 
|---|
| 209 | ++*pos; | 
|---|
| 210 | return NULL; | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | static void rt_cache_seq_stop(struct seq_file *seq, void *v) | 
|---|
| 214 | { | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | static int rt_cache_seq_show(struct seq_file *seq, void *v) | 
|---|
| 218 | { | 
|---|
| 219 | if (v == SEQ_START_TOKEN) | 
|---|
| 220 | seq_printf(m: seq, fmt: "%-127s\n", | 
|---|
| 221 | "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" | 
|---|
| 222 | "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" | 
|---|
| 223 | "HHUptod\tSpecDst"); | 
|---|
| 224 | return 0; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | static const struct seq_operations rt_cache_seq_ops = { | 
|---|
| 228 | .start  = rt_cache_seq_start, | 
|---|
| 229 | .next   = rt_cache_seq_next, | 
|---|
| 230 | .stop   = rt_cache_seq_stop, | 
|---|
| 231 | .show   = rt_cache_seq_show, | 
|---|
| 232 | }; | 
|---|
| 233 |  | 
|---|
| 234 | static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) | 
|---|
| 235 | { | 
|---|
| 236 | int cpu; | 
|---|
| 237 |  | 
|---|
| 238 | if (*pos == 0) | 
|---|
| 239 | return SEQ_START_TOKEN; | 
|---|
| 240 |  | 
|---|
| 241 | for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { | 
|---|
| 242 | if (!cpu_possible(cpu)) | 
|---|
| 243 | continue; | 
|---|
| 244 | *pos = cpu+1; | 
|---|
| 245 | return &per_cpu(rt_cache_stat, cpu); | 
|---|
| 246 | } | 
|---|
| 247 | return NULL; | 
|---|
| 248 | } | 
|---|
| 249 |  | 
|---|
| 250 | static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 
|---|
| 251 | { | 
|---|
| 252 | int cpu; | 
|---|
| 253 |  | 
|---|
| 254 | for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { | 
|---|
| 255 | if (!cpu_possible(cpu)) | 
|---|
| 256 | continue; | 
|---|
| 257 | *pos = cpu+1; | 
|---|
| 258 | return &per_cpu(rt_cache_stat, cpu); | 
|---|
| 259 | } | 
|---|
| 260 | (*pos)++; | 
|---|
| 261 | return NULL; | 
|---|
| 262 |  | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | static void rt_cpu_seq_stop(struct seq_file *seq, void *v) | 
|---|
| 266 | { | 
|---|
| 267 |  | 
|---|
| 268 | } | 
|---|
| 269 |  | 
|---|
| 270 | static int rt_cpu_seq_show(struct seq_file *seq, void *v) | 
|---|
| 271 | { | 
|---|
| 272 | struct rt_cache_stat *st = v; | 
|---|
| 273 |  | 
|---|
| 274 | if (v == SEQ_START_TOKEN) { | 
|---|
| 275 | seq_puts(m: seq, s: "entries  in_hit   in_slow_tot in_slow_mc in_no_route in_brd   in_martian_dst in_martian_src out_hit  out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); | 
|---|
| 276 | return 0; | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | seq_printf(m: seq, fmt: "%08x %08x %08x    %08x   %08x    %08x %08x       " | 
|---|
| 280 | "%08x       %08x %08x     %08x    %08x %08x   " | 
|---|
| 281 | "%08x     %08x        %08x        %08x\n", | 
|---|
| 282 | dst_entries_get_slow(dst: &ipv4_dst_ops), | 
|---|
| 283 | 0, /* st->in_hit */ | 
|---|
| 284 | st->in_slow_tot, | 
|---|
| 285 | st->in_slow_mc, | 
|---|
| 286 | st->in_no_route, | 
|---|
| 287 | st->in_brd, | 
|---|
| 288 | st->in_martian_dst, | 
|---|
| 289 | st->in_martian_src, | 
|---|
| 290 |  | 
|---|
| 291 | 0, /* st->out_hit */ | 
|---|
| 292 | st->out_slow_tot, | 
|---|
| 293 | st->out_slow_mc, | 
|---|
| 294 |  | 
|---|
| 295 | 0, /* st->gc_total */ | 
|---|
| 296 | 0, /* st->gc_ignored */ | 
|---|
| 297 | 0, /* st->gc_goal_miss */ | 
|---|
| 298 | 0, /* st->gc_dst_overflow */ | 
|---|
| 299 | 0, /* st->in_hlist_search */ | 
|---|
| 300 | 0  /* st->out_hlist_search */ | 
|---|
| 301 | ); | 
|---|
| 302 | return 0; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | static const struct seq_operations rt_cpu_seq_ops = { | 
|---|
| 306 | .start  = rt_cpu_seq_start, | 
|---|
| 307 | .next   = rt_cpu_seq_next, | 
|---|
| 308 | .stop   = rt_cpu_seq_stop, | 
|---|
| 309 | .show   = rt_cpu_seq_show, | 
|---|
| 310 | }; | 
|---|
| 311 |  | 
|---|
| 312 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 313 | static int rt_acct_proc_show(struct seq_file *m, void *v) | 
|---|
| 314 | { | 
|---|
| 315 | struct ip_rt_acct *dst, *src; | 
|---|
| 316 | unsigned int i, j; | 
|---|
| 317 |  | 
|---|
| 318 | dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); | 
|---|
| 319 | if (!dst) | 
|---|
| 320 | return -ENOMEM; | 
|---|
| 321 |  | 
|---|
| 322 | for_each_possible_cpu(i) { | 
|---|
| 323 | src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); | 
|---|
| 324 | for (j = 0; j < 256; j++) { | 
|---|
| 325 | dst[j].o_bytes   += src[j].o_bytes; | 
|---|
| 326 | dst[j].o_packets += src[j].o_packets; | 
|---|
| 327 | dst[j].i_bytes   += src[j].i_bytes; | 
|---|
| 328 | dst[j].i_packets += src[j].i_packets; | 
|---|
| 329 | } | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 | seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); | 
|---|
| 333 | kfree(dst); | 
|---|
| 334 | return 0; | 
|---|
| 335 | } | 
|---|
| 336 | #endif | 
|---|
| 337 |  | 
|---|
| 338 | static int __net_init ip_rt_do_proc_init(struct net *net) | 
|---|
| 339 | { | 
|---|
| 340 | struct proc_dir_entry *pde; | 
|---|
| 341 |  | 
|---|
| 342 | pde = proc_create_seq( "rt_cache", 0444, net->proc_net, | 
|---|
| 343 | &rt_cache_seq_ops); | 
|---|
| 344 | if (!pde) | 
|---|
| 345 | goto err1; | 
|---|
| 346 |  | 
|---|
| 347 | pde = proc_create_seq( "rt_cache", 0444, net->proc_net_stat, | 
|---|
| 348 | &rt_cpu_seq_ops); | 
|---|
| 349 | if (!pde) | 
|---|
| 350 | goto err2; | 
|---|
| 351 |  | 
|---|
| 352 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 353 | pde = proc_create_single( "rt_acct", 0, net->proc_net, | 
|---|
| 354 | rt_acct_proc_show); | 
|---|
| 355 | if (!pde) | 
|---|
| 356 | goto err3; | 
|---|
| 357 | #endif | 
|---|
| 358 | return 0; | 
|---|
| 359 |  | 
|---|
| 360 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 361 | err3: | 
|---|
| 362 | remove_proc_entry( "rt_cache", net->proc_net_stat); | 
|---|
| 363 | #endif | 
|---|
| 364 | err2: | 
|---|
| 365 | remove_proc_entry( "rt_cache", net->proc_net); | 
|---|
| 366 | err1: | 
|---|
| 367 | return -ENOMEM; | 
|---|
| 368 | } | 
|---|
| 369 |  | 
|---|
| 370 | static void __net_exit ip_rt_do_proc_exit(struct net *net) | 
|---|
| 371 | { | 
|---|
| 372 | remove_proc_entry( "rt_cache", net->proc_net_stat); | 
|---|
| 373 | remove_proc_entry( "rt_cache", net->proc_net); | 
|---|
| 374 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 375 | remove_proc_entry( "rt_acct", net->proc_net); | 
|---|
| 376 | #endif | 
|---|
| 377 | } | 
|---|
| 378 |  | 
|---|
| 379 | static struct pernet_operations ip_rt_proc_ops __net_initdata =  { | 
|---|
| 380 | .init = ip_rt_do_proc_init, | 
|---|
| 381 | .exit = ip_rt_do_proc_exit, | 
|---|
| 382 | }; | 
|---|
| 383 |  | 
|---|
| 384 | static int __init ip_rt_proc_init(void) | 
|---|
| 385 | { | 
|---|
| 386 | return register_pernet_subsys(&ip_rt_proc_ops); | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | #else | 
|---|
| 390 | static inline int ip_rt_proc_init(void) | 
|---|
| 391 | { | 
|---|
| 392 | return 0; | 
|---|
| 393 | } | 
|---|
| 394 | #endif /* CONFIG_PROC_FS */ | 
|---|
| 395 |  | 
|---|
| 396 | static inline bool rt_is_expired(const struct rtable *rth) | 
|---|
| 397 | { | 
|---|
| 398 | bool res; | 
|---|
| 399 |  | 
|---|
| 400 | rcu_read_lock(); | 
|---|
| 401 | res = rth->rt_genid != rt_genid_ipv4(net: dev_net_rcu(dev: rth->dst.dev)); | 
|---|
| 402 | rcu_read_unlock(); | 
|---|
| 403 |  | 
|---|
| 404 | return res; | 
|---|
| 405 | } | 
|---|
| 406 |  | 
|---|
| 407 | void rt_cache_flush(struct net *net) | 
|---|
| 408 | { | 
|---|
| 409 | rt_genid_bump_ipv4(net); | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|
| 412 | static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, | 
|---|
| 413 | struct sk_buff *skb, | 
|---|
| 414 | const void *daddr) | 
|---|
| 415 | { | 
|---|
| 416 | const struct rtable *rt = container_of(dst, struct rtable, dst); | 
|---|
| 417 | struct net_device *dev; | 
|---|
| 418 | struct neighbour *n; | 
|---|
| 419 |  | 
|---|
| 420 | rcu_read_lock(); | 
|---|
| 421 | dev = dst_dev_rcu(dst); | 
|---|
| 422 | if (likely(rt->rt_gw_family == AF_INET)) { | 
|---|
| 423 | n = ip_neigh_gw4(dev, daddr: rt->rt_gw4); | 
|---|
| 424 | } else if (rt->rt_gw_family == AF_INET6) { | 
|---|
| 425 | n = ip_neigh_gw6(dev, addr: &rt->rt_gw6); | 
|---|
| 426 | } else { | 
|---|
| 427 | __be32 pkey; | 
|---|
| 428 |  | 
|---|
| 429 | pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); | 
|---|
| 430 | n = ip_neigh_gw4(dev, daddr: pkey); | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | if (!IS_ERR(ptr: n) && !refcount_inc_not_zero(r: &n->refcnt)) | 
|---|
| 434 | n = NULL; | 
|---|
| 435 |  | 
|---|
| 436 | rcu_read_unlock(); | 
|---|
| 437 |  | 
|---|
| 438 | return n; | 
|---|
| 439 | } | 
|---|
| 440 |  | 
|---|
| 441 | static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) | 
|---|
| 442 | { | 
|---|
| 443 | const struct rtable *rt = container_of(dst, struct rtable, dst); | 
|---|
| 444 | struct net_device *dev = dst_dev(dst); | 
|---|
| 445 | const __be32 *pkey = daddr; | 
|---|
| 446 |  | 
|---|
| 447 | if (rt->rt_gw_family == AF_INET) { | 
|---|
| 448 | pkey = (const __be32 *)&rt->rt_gw4; | 
|---|
| 449 | } else if (rt->rt_gw_family == AF_INET6) { | 
|---|
| 450 | return __ipv6_confirm_neigh_stub(dev, pkey: &rt->rt_gw6); | 
|---|
| 451 | } else if (!daddr || | 
|---|
| 452 | (rt->rt_flags & | 
|---|
| 453 | (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { | 
|---|
| 454 | return; | 
|---|
| 455 | } | 
|---|
| 456 | __ipv4_confirm_neigh(dev, key: *(__force u32 *)pkey); | 
|---|
| 457 | } | 
|---|
| 458 |  | 
|---|
| 459 | /* Hash tables of size 2048..262144 depending on RAM size. | 
|---|
| 460 | * Each bucket uses 8 bytes. | 
|---|
| 461 | */ | 
|---|
| 462 | static u32 ip_idents_mask __read_mostly; | 
|---|
| 463 | static atomic_t *ip_idents __read_mostly; | 
|---|
| 464 | static u32 *ip_tstamps __read_mostly; | 
|---|
| 465 |  | 
|---|
| 466 | /* In order to protect privacy, we add a perturbation to identifiers | 
|---|
| 467 | * if one generator is seldom used. This makes hard for an attacker | 
|---|
| 468 | * to infer how many packets were sent between two points in time. | 
|---|
| 469 | */ | 
|---|
| 470 | static u32 ip_idents_reserve(u32 hash, int segs) | 
|---|
| 471 | { | 
|---|
| 472 | u32 bucket, old, now = (u32)jiffies; | 
|---|
| 473 | atomic_t *p_id; | 
|---|
| 474 | u32 *p_tstamp; | 
|---|
| 475 | u32 delta = 0; | 
|---|
| 476 |  | 
|---|
| 477 | bucket = hash & ip_idents_mask; | 
|---|
| 478 | p_tstamp = ip_tstamps + bucket; | 
|---|
| 479 | p_id = ip_idents + bucket; | 
|---|
| 480 | old = READ_ONCE(*p_tstamp); | 
|---|
| 481 |  | 
|---|
| 482 | if (old != now && cmpxchg(p_tstamp, old, now) == old) | 
|---|
| 483 | delta = get_random_u32_below(ceil: now - old); | 
|---|
| 484 |  | 
|---|
| 485 | /* If UBSAN reports an error there, please make sure your compiler | 
|---|
| 486 | * supports -fno-strict-overflow before reporting it that was a bug | 
|---|
| 487 | * in UBSAN, and it has been fixed in GCC-8. | 
|---|
| 488 | */ | 
|---|
| 489 | return atomic_add_return(i: segs + delta, v: p_id) - segs; | 
|---|
| 490 | } | 
|---|
| 491 |  | 
|---|
| 492 | void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) | 
|---|
| 493 | { | 
|---|
| 494 | u32 hash, id; | 
|---|
| 495 |  | 
|---|
| 496 | /* Note the following code is not safe, but this is okay. */ | 
|---|
| 497 | if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) | 
|---|
| 498 | get_random_bytes(buf: &net->ipv4.ip_id_key, | 
|---|
| 499 | len: sizeof(net->ipv4.ip_id_key)); | 
|---|
| 500 |  | 
|---|
| 501 | hash = siphash_3u32(a: (__force u32)iph->daddr, | 
|---|
| 502 | b: (__force u32)iph->saddr, | 
|---|
| 503 | c: iph->protocol, | 
|---|
| 504 | key: &net->ipv4.ip_id_key); | 
|---|
| 505 | id = ip_idents_reserve(hash, segs); | 
|---|
| 506 | iph->id = htons(id); | 
|---|
| 507 | } | 
|---|
| 508 | EXPORT_SYMBOL(__ip_select_ident); | 
|---|
| 509 |  | 
|---|
| 510 | static void __build_flow_key(const struct net *net, struct flowi4 *fl4, | 
|---|
| 511 | const struct sock *sk, const struct iphdr *iph, | 
|---|
| 512 | int oif, __u8 tos, u8 prot, u32 mark, | 
|---|
| 513 | int flow_flags) | 
|---|
| 514 | { | 
|---|
| 515 | __u8 scope = RT_SCOPE_UNIVERSE; | 
|---|
| 516 |  | 
|---|
| 517 | if (sk) { | 
|---|
| 518 | oif = sk->sk_bound_dev_if; | 
|---|
| 519 | mark = READ_ONCE(sk->sk_mark); | 
|---|
| 520 | tos = ip_sock_rt_tos(sk); | 
|---|
| 521 | scope = ip_sock_rt_scope(sk); | 
|---|
| 522 | prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : | 
|---|
| 523 | sk->sk_protocol; | 
|---|
| 524 | } | 
|---|
| 525 |  | 
|---|
| 526 | flowi4_init_output(fl4, oif, mark, tos: tos & INET_DSCP_MASK, scope, | 
|---|
| 527 | proto: prot, flags: flow_flags, daddr: iph->daddr, saddr: iph->saddr, dport: 0, sport: 0, | 
|---|
| 528 | uid: sock_net_uid(net, sk)); | 
|---|
| 529 | } | 
|---|
| 530 |  | 
|---|
| 531 | static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, | 
|---|
| 532 | const struct sock *sk) | 
|---|
| 533 | { | 
|---|
| 534 | const struct net *net = dev_net(dev: skb->dev); | 
|---|
| 535 | const struct iphdr *iph = ip_hdr(skb); | 
|---|
| 536 | int oif = skb->dev->ifindex; | 
|---|
| 537 | u8 prot = iph->protocol; | 
|---|
| 538 | u32 mark = skb->mark; | 
|---|
| 539 | __u8 tos = iph->tos; | 
|---|
| 540 |  | 
|---|
| 541 | __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, flow_flags: 0); | 
|---|
| 542 | } | 
|---|
| 543 |  | 
|---|
| 544 | static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) | 
|---|
| 545 | { | 
|---|
| 546 | const struct inet_sock *inet = inet_sk(sk); | 
|---|
| 547 | const struct ip_options_rcu *inet_opt; | 
|---|
| 548 | __be32 daddr = inet->inet_daddr; | 
|---|
| 549 |  | 
|---|
| 550 | rcu_read_lock(); | 
|---|
| 551 | inet_opt = rcu_dereference(inet->inet_opt); | 
|---|
| 552 | if (inet_opt && inet_opt->opt.srr) | 
|---|
| 553 | daddr = inet_opt->opt.faddr; | 
|---|
| 554 | flowi4_init_output(fl4, oif: sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), | 
|---|
| 555 | tos: ip_sock_rt_tos(sk), | 
|---|
| 556 | scope: ip_sock_rt_scope(sk), | 
|---|
| 557 | inet_test_bit(HDRINCL, sk) ? | 
|---|
| 558 | IPPROTO_RAW : sk->sk_protocol, | 
|---|
| 559 | flags: inet_sk_flowi_flags(sk), | 
|---|
| 560 | daddr, saddr: inet->inet_saddr, dport: 0, sport: 0, | 
|---|
| 561 | uid: sk_uid(sk)); | 
|---|
| 562 | rcu_read_unlock(); | 
|---|
| 563 | } | 
|---|
| 564 |  | 
|---|
| 565 | static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, | 
|---|
| 566 | const struct sk_buff *skb) | 
|---|
| 567 | { | 
|---|
| 568 | if (skb) | 
|---|
| 569 | build_skb_flow_key(fl4, skb, sk); | 
|---|
| 570 | else | 
|---|
| 571 | build_sk_flow_key(fl4, sk); | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | static DEFINE_SPINLOCK(fnhe_lock); | 
|---|
| 575 |  | 
|---|
| 576 | static void fnhe_flush_routes(struct fib_nh_exception *fnhe) | 
|---|
| 577 | { | 
|---|
| 578 | struct rtable *rt; | 
|---|
| 579 |  | 
|---|
| 580 | rt = rcu_dereference(fnhe->fnhe_rth_input); | 
|---|
| 581 | if (rt) { | 
|---|
| 582 | RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); | 
|---|
| 583 | dst_dev_put(dst: &rt->dst); | 
|---|
| 584 | dst_release(dst: &rt->dst); | 
|---|
| 585 | } | 
|---|
| 586 | rt = rcu_dereference(fnhe->fnhe_rth_output); | 
|---|
| 587 | if (rt) { | 
|---|
| 588 | RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); | 
|---|
| 589 | dst_dev_put(dst: &rt->dst); | 
|---|
| 590 | dst_release(dst: &rt->dst); | 
|---|
| 591 | } | 
|---|
| 592 | } | 
|---|
| 593 |  | 
|---|
| 594 | static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) | 
|---|
| 595 | { | 
|---|
| 596 | struct fib_nh_exception __rcu **fnhe_p, **oldest_p; | 
|---|
| 597 | struct fib_nh_exception *fnhe, *oldest = NULL; | 
|---|
| 598 |  | 
|---|
| 599 | for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { | 
|---|
| 600 | fnhe = rcu_dereference_protected(*fnhe_p, | 
|---|
| 601 | lockdep_is_held(&fnhe_lock)); | 
|---|
| 602 | if (!fnhe) | 
|---|
| 603 | break; | 
|---|
| 604 | if (!oldest || | 
|---|
| 605 | time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { | 
|---|
| 606 | oldest = fnhe; | 
|---|
| 607 | oldest_p = fnhe_p; | 
|---|
| 608 | } | 
|---|
| 609 | } | 
|---|
| 610 | fnhe_flush_routes(fnhe: oldest); | 
|---|
| 611 | *oldest_p = oldest->fnhe_next; | 
|---|
| 612 | kfree_rcu(oldest, rcu); | 
|---|
| 613 | } | 
|---|
| 614 |  | 
|---|
| 615 | static u32 fnhe_hashfun(__be32 daddr) | 
|---|
| 616 | { | 
|---|
| 617 | static siphash_aligned_key_t fnhe_hash_key; | 
|---|
| 618 | u64 hval; | 
|---|
| 619 |  | 
|---|
| 620 | net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); | 
|---|
| 621 | hval = siphash_1u32(a: (__force u32)daddr, key: &fnhe_hash_key); | 
|---|
| 622 | return hash_64(val: hval, FNHE_HASH_SHIFT); | 
|---|
| 623 | } | 
|---|
| 624 |  | 
|---|
| 625 | static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) | 
|---|
| 626 | { | 
|---|
| 627 | rt->rt_pmtu = fnhe->fnhe_pmtu; | 
|---|
| 628 | rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; | 
|---|
| 629 | rt->dst.expires = fnhe->fnhe_expires; | 
|---|
| 630 |  | 
|---|
| 631 | if (fnhe->fnhe_gw) { | 
|---|
| 632 | rt->rt_flags |= RTCF_REDIRECTED; | 
|---|
| 633 | rt->rt_uses_gateway = 1; | 
|---|
| 634 | rt->rt_gw_family = AF_INET; | 
|---|
| 635 | rt->rt_gw4 = fnhe->fnhe_gw; | 
|---|
| 636 | } | 
|---|
| 637 | } | 
|---|
| 638 |  | 
|---|
| 639 | static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, | 
|---|
| 640 | __be32 gw, u32 pmtu, bool lock, | 
|---|
| 641 | unsigned long expires) | 
|---|
| 642 | { | 
|---|
| 643 | struct fnhe_hash_bucket *hash; | 
|---|
| 644 | struct fib_nh_exception *fnhe; | 
|---|
| 645 | struct rtable *rt; | 
|---|
| 646 | u32 genid, hval; | 
|---|
| 647 | unsigned int i; | 
|---|
| 648 | int depth; | 
|---|
| 649 |  | 
|---|
| 650 | genid = fnhe_genid(net: dev_net(dev: nhc->nhc_dev)); | 
|---|
| 651 | hval = fnhe_hashfun(daddr); | 
|---|
| 652 |  | 
|---|
| 653 | spin_lock_bh(lock: &fnhe_lock); | 
|---|
| 654 |  | 
|---|
| 655 | hash = rcu_dereference(nhc->nhc_exceptions); | 
|---|
| 656 | if (!hash) { | 
|---|
| 657 | hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); | 
|---|
| 658 | if (!hash) | 
|---|
| 659 | goto out_unlock; | 
|---|
| 660 | rcu_assign_pointer(nhc->nhc_exceptions, hash); | 
|---|
| 661 | } | 
|---|
| 662 |  | 
|---|
| 663 | hash += hval; | 
|---|
| 664 |  | 
|---|
| 665 | depth = 0; | 
|---|
| 666 | for (fnhe = rcu_dereference(hash->chain); fnhe; | 
|---|
| 667 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 
|---|
| 668 | if (fnhe->fnhe_daddr == daddr) | 
|---|
| 669 | break; | 
|---|
| 670 | depth++; | 
|---|
| 671 | } | 
|---|
| 672 |  | 
|---|
| 673 | if (fnhe) { | 
|---|
| 674 | if (fnhe->fnhe_genid != genid) | 
|---|
| 675 | fnhe->fnhe_genid = genid; | 
|---|
| 676 | if (gw) | 
|---|
| 677 | fnhe->fnhe_gw = gw; | 
|---|
| 678 | if (pmtu) { | 
|---|
| 679 | fnhe->fnhe_pmtu = pmtu; | 
|---|
| 680 | fnhe->fnhe_mtu_locked = lock; | 
|---|
| 681 | } | 
|---|
| 682 | fnhe->fnhe_expires = max(1UL, expires); | 
|---|
| 683 | /* Update all cached dsts too */ | 
|---|
| 684 | rt = rcu_dereference(fnhe->fnhe_rth_input); | 
|---|
| 685 | if (rt) | 
|---|
| 686 | fill_route_from_fnhe(rt, fnhe); | 
|---|
| 687 | rt = rcu_dereference(fnhe->fnhe_rth_output); | 
|---|
| 688 | if (rt) | 
|---|
| 689 | fill_route_from_fnhe(rt, fnhe); | 
|---|
| 690 | } else { | 
|---|
| 691 | /* Randomize max depth to avoid some side channels attacks. */ | 
|---|
| 692 | int max_depth = FNHE_RECLAIM_DEPTH + | 
|---|
| 693 | get_random_u32_below(FNHE_RECLAIM_DEPTH); | 
|---|
| 694 |  | 
|---|
| 695 | while (depth > max_depth) { | 
|---|
| 696 | fnhe_remove_oldest(hash); | 
|---|
| 697 | depth--; | 
|---|
| 698 | } | 
|---|
| 699 |  | 
|---|
| 700 | fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); | 
|---|
| 701 | if (!fnhe) | 
|---|
| 702 | goto out_unlock; | 
|---|
| 703 |  | 
|---|
| 704 | fnhe->fnhe_next = hash->chain; | 
|---|
| 705 |  | 
|---|
| 706 | fnhe->fnhe_genid = genid; | 
|---|
| 707 | fnhe->fnhe_daddr = daddr; | 
|---|
| 708 | fnhe->fnhe_gw = gw; | 
|---|
| 709 | fnhe->fnhe_pmtu = pmtu; | 
|---|
| 710 | fnhe->fnhe_mtu_locked = lock; | 
|---|
| 711 | fnhe->fnhe_expires = max(1UL, expires); | 
|---|
| 712 |  | 
|---|
| 713 | rcu_assign_pointer(hash->chain, fnhe); | 
|---|
| 714 |  | 
|---|
| 715 | /* Exception created; mark the cached routes for the nexthop | 
|---|
| 716 | * stale, so anyone caching it rechecks if this exception | 
|---|
| 717 | * applies to them. | 
|---|
| 718 | */ | 
|---|
| 719 | rt = rcu_dereference(nhc->nhc_rth_input); | 
|---|
| 720 | if (rt) | 
|---|
| 721 | WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); | 
|---|
| 722 |  | 
|---|
| 723 | for_each_possible_cpu(i) { | 
|---|
| 724 | struct rtable __rcu **prt; | 
|---|
| 725 |  | 
|---|
| 726 | prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); | 
|---|
| 727 | rt = rcu_dereference(*prt); | 
|---|
| 728 | if (rt) | 
|---|
| 729 | WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); | 
|---|
| 730 | } | 
|---|
| 731 | } | 
|---|
| 732 |  | 
|---|
| 733 | fnhe->fnhe_stamp = jiffies; | 
|---|
| 734 |  | 
|---|
| 735 | out_unlock: | 
|---|
| 736 | spin_unlock_bh(lock: &fnhe_lock); | 
|---|
| 737 | } | 
|---|
| 738 |  | 
|---|
| 739 | static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, | 
|---|
| 740 | bool kill_route) | 
|---|
| 741 | { | 
|---|
| 742 | __be32 new_gw = icmp_hdr(skb)->un.gateway; | 
|---|
| 743 | __be32 old_gw = ip_hdr(skb)->saddr; | 
|---|
| 744 | struct net_device *dev = skb->dev; | 
|---|
| 745 | struct in_device *in_dev; | 
|---|
| 746 | struct fib_result res; | 
|---|
| 747 | struct neighbour *n; | 
|---|
| 748 | struct net *net; | 
|---|
| 749 |  | 
|---|
| 750 | switch (icmp_hdr(skb)->code & 7) { | 
|---|
| 751 | case ICMP_REDIR_NET: | 
|---|
| 752 | case ICMP_REDIR_NETTOS: | 
|---|
| 753 | case ICMP_REDIR_HOST: | 
|---|
| 754 | case ICMP_REDIR_HOSTTOS: | 
|---|
| 755 | break; | 
|---|
| 756 |  | 
|---|
| 757 | default: | 
|---|
| 758 | return; | 
|---|
| 759 | } | 
|---|
| 760 |  | 
|---|
| 761 | if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) | 
|---|
| 762 | return; | 
|---|
| 763 |  | 
|---|
| 764 | in_dev = __in_dev_get_rcu(dev); | 
|---|
| 765 | if (!in_dev) | 
|---|
| 766 | return; | 
|---|
| 767 |  | 
|---|
| 768 | net = dev_net(dev); | 
|---|
| 769 | if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || | 
|---|
| 770 | ipv4_is_multicast(addr: new_gw) || ipv4_is_lbcast(addr: new_gw) || | 
|---|
| 771 | ipv4_is_zeronet(addr: new_gw)) | 
|---|
| 772 | goto reject_redirect; | 
|---|
| 773 |  | 
|---|
| 774 | if (!IN_DEV_SHARED_MEDIA(in_dev)) { | 
|---|
| 775 | if (!inet_addr_onlink(in_dev, a: new_gw, b: old_gw)) | 
|---|
| 776 | goto reject_redirect; | 
|---|
| 777 | if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(gw: new_gw, dev)) | 
|---|
| 778 | goto reject_redirect; | 
|---|
| 779 | } else { | 
|---|
| 780 | if (inet_addr_type(net, addr: new_gw) != RTN_UNICAST) | 
|---|
| 781 | goto reject_redirect; | 
|---|
| 782 | } | 
|---|
| 783 |  | 
|---|
| 784 | n = __ipv4_neigh_lookup(dev: rt->dst.dev, key: (__force u32)new_gw); | 
|---|
| 785 | if (!n) | 
|---|
| 786 | n = neigh_create(tbl: &arp_tbl, pkey: &new_gw, dev: rt->dst.dev); | 
|---|
| 787 | if (!IS_ERR(ptr: n)) { | 
|---|
| 788 | if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { | 
|---|
| 789 | neigh_event_send(neigh: n, NULL); | 
|---|
| 790 | } else { | 
|---|
| 791 | if (fib_lookup(net, flp: fl4, res: &res, flags: 0) == 0) { | 
|---|
| 792 | struct fib_nh_common *nhc; | 
|---|
| 793 |  | 
|---|
| 794 | fib_select_path(net, res: &res, fl4, skb); | 
|---|
| 795 | nhc = FIB_RES_NHC(res); | 
|---|
| 796 | update_or_create_fnhe(nhc, daddr: fl4->daddr, gw: new_gw, | 
|---|
| 797 | pmtu: 0, lock: false, | 
|---|
| 798 | expires: jiffies + ip_rt_gc_timeout); | 
|---|
| 799 | } | 
|---|
| 800 | if (kill_route) | 
|---|
| 801 | WRITE_ONCE(rt->dst.obsolete, DST_OBSOLETE_KILL); | 
|---|
| 802 | call_netevent_notifiers(val: NETEVENT_NEIGH_UPDATE, v: n); | 
|---|
| 803 | } | 
|---|
| 804 | neigh_release(neigh: n); | 
|---|
| 805 | } | 
|---|
| 806 | return; | 
|---|
| 807 |  | 
|---|
| 808 | reject_redirect: | 
|---|
| 809 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 
|---|
| 810 | if (IN_DEV_LOG_MARTIANS(in_dev)) { | 
|---|
| 811 | const struct iphdr *iph = (const struct iphdr *) skb->data; | 
|---|
| 812 | __be32 daddr = iph->daddr; | 
|---|
| 813 | __be32 saddr = iph->saddr; | 
|---|
| 814 |  | 
|---|
| 815 | net_info_ratelimited( "Redirect from %pI4 on %s about %pI4 ignored\n" | 
|---|
| 816 | "  Advised path = %pI4 -> %pI4\n", | 
|---|
| 817 | &old_gw, dev->name, &new_gw, | 
|---|
| 818 | &saddr, &daddr); | 
|---|
| 819 | } | 
|---|
| 820 | #endif | 
|---|
| 821 | ; | 
|---|
| 822 | } | 
|---|
| 823 |  | 
|---|
| 824 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) | 
|---|
| 825 | { | 
|---|
| 826 | struct rtable *rt; | 
|---|
| 827 | struct flowi4 fl4; | 
|---|
| 828 | const struct iphdr *iph = (const struct iphdr *) skb->data; | 
|---|
| 829 | struct net *net = dev_net(dev: skb->dev); | 
|---|
| 830 | int oif = skb->dev->ifindex; | 
|---|
| 831 | u8 prot = iph->protocol; | 
|---|
| 832 | u32 mark = skb->mark; | 
|---|
| 833 | __u8 tos = iph->tos; | 
|---|
| 834 |  | 
|---|
| 835 | rt = dst_rtable(dst); | 
|---|
| 836 |  | 
|---|
| 837 | __build_flow_key(net, fl4: &fl4, sk, iph, oif, tos, prot, mark, flow_flags: 0); | 
|---|
| 838 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: true); | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | static void ipv4_negative_advice(struct sock *sk, | 
|---|
| 842 | struct dst_entry *dst) | 
|---|
| 843 | { | 
|---|
| 844 | struct rtable *rt = dst_rtable(dst); | 
|---|
| 845 |  | 
|---|
| 846 | if ((READ_ONCE(dst->obsolete) > 0) || | 
|---|
| 847 | (rt->rt_flags & RTCF_REDIRECTED) || | 
|---|
| 848 | READ_ONCE(rt->dst.expires)) | 
|---|
| 849 | sk_dst_reset(sk); | 
|---|
| 850 | } | 
|---|
| 851 |  | 
|---|
| 852 | /* | 
|---|
| 853 | * Algorithm: | 
|---|
| 854 | *	1. The first ip_rt_redirect_number redirects are sent | 
|---|
| 855 | *	   with exponential backoff, then we stop sending them at all, | 
|---|
| 856 | *	   assuming that the host ignores our redirects. | 
|---|
| 857 | *	2. If we did not see packets requiring redirects | 
|---|
| 858 | *	   during ip_rt_redirect_silence, we assume that the host | 
|---|
| 859 | *	   forgot redirected route and start to send redirects again. | 
|---|
| 860 | * | 
|---|
| 861 | * This algorithm is much cheaper and more intelligent than dumb load limiting | 
|---|
| 862 | * in icmp.c. | 
|---|
| 863 | * | 
|---|
| 864 | * NOTE. Do not forget to inhibit load limiting for redirects (redundant) | 
|---|
| 865 | * and "frag. need" (breaks PMTU discovery) in icmp.c. | 
|---|
| 866 | */ | 
|---|
| 867 |  | 
|---|
| 868 | void ip_rt_send_redirect(struct sk_buff *skb) | 
|---|
| 869 | { | 
|---|
| 870 | struct rtable *rt = skb_rtable(skb); | 
|---|
| 871 | struct in_device *in_dev; | 
|---|
| 872 | struct inet_peer *peer; | 
|---|
| 873 | struct net *net; | 
|---|
| 874 | int log_martians; | 
|---|
| 875 | int vif; | 
|---|
| 876 |  | 
|---|
| 877 | rcu_read_lock(); | 
|---|
| 878 | in_dev = __in_dev_get_rcu(dev: rt->dst.dev); | 
|---|
| 879 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | 
|---|
| 880 | rcu_read_unlock(); | 
|---|
| 881 | return; | 
|---|
| 882 | } | 
|---|
| 883 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); | 
|---|
| 884 | vif = l3mdev_master_ifindex_rcu(dev: rt->dst.dev); | 
|---|
| 885 |  | 
|---|
| 886 | net = dev_net(dev: rt->dst.dev); | 
|---|
| 887 | peer = inet_getpeer_v4(base: net->ipv4.peers, v4daddr: ip_hdr(skb)->saddr, vif); | 
|---|
| 888 | if (!peer) { | 
|---|
| 889 | rcu_read_unlock(); | 
|---|
| 890 | icmp_send(skb_in: skb, ICMP_REDIRECT, ICMP_REDIR_HOST, | 
|---|
| 891 | info: rt_nexthop(rt, daddr: ip_hdr(skb)->daddr)); | 
|---|
| 892 | return; | 
|---|
| 893 | } | 
|---|
| 894 |  | 
|---|
| 895 | /* No redirected packets during ip_rt_redirect_silence; | 
|---|
| 896 | * reset the algorithm. | 
|---|
| 897 | */ | 
|---|
| 898 | if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { | 
|---|
| 899 | peer->rate_tokens = 0; | 
|---|
| 900 | peer->n_redirects = 0; | 
|---|
| 901 | } | 
|---|
| 902 |  | 
|---|
| 903 | /* Too many ignored redirects; do not send anything | 
|---|
| 904 | * set dst.rate_last to the last seen redirected packet. | 
|---|
| 905 | */ | 
|---|
| 906 | if (peer->n_redirects >= ip_rt_redirect_number) { | 
|---|
| 907 | peer->rate_last = jiffies; | 
|---|
| 908 | goto out_unlock; | 
|---|
| 909 | } | 
|---|
| 910 |  | 
|---|
| 911 | /* Check for load limit; set rate_last to the latest sent | 
|---|
| 912 | * redirect. | 
|---|
| 913 | */ | 
|---|
| 914 | if (peer->n_redirects == 0 || | 
|---|
| 915 | time_after(jiffies, | 
|---|
| 916 | (peer->rate_last + | 
|---|
| 917 | (ip_rt_redirect_load << peer->n_redirects)))) { | 
|---|
| 918 | __be32 gw = rt_nexthop(rt, daddr: ip_hdr(skb)->daddr); | 
|---|
| 919 |  | 
|---|
| 920 | icmp_send(skb_in: skb, ICMP_REDIRECT, ICMP_REDIR_HOST, info: gw); | 
|---|
| 921 | peer->rate_last = jiffies; | 
|---|
| 922 | ++peer->n_redirects; | 
|---|
| 923 | if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && | 
|---|
| 924 | peer->n_redirects == ip_rt_redirect_number) | 
|---|
| 925 | net_warn_ratelimited( "host %pI4/if%d ignores redirects for %pI4 to %pI4\n", | 
|---|
| 926 | &ip_hdr(skb)->saddr, inet_iif(skb), | 
|---|
| 927 | &ip_hdr(skb)->daddr, &gw); | 
|---|
| 928 | } | 
|---|
| 929 | out_unlock: | 
|---|
| 930 | rcu_read_unlock(); | 
|---|
| 931 | } | 
|---|
| 932 |  | 
|---|
| 933 | static int ip_error(struct sk_buff *skb) | 
|---|
| 934 | { | 
|---|
| 935 | struct rtable *rt = skb_rtable(skb); | 
|---|
| 936 | struct net_device *dev = skb->dev; | 
|---|
| 937 | struct in_device *in_dev; | 
|---|
| 938 | struct inet_peer *peer; | 
|---|
| 939 | unsigned long now; | 
|---|
| 940 | struct net *net; | 
|---|
| 941 | SKB_DR(reason); | 
|---|
| 942 | bool send; | 
|---|
| 943 | int code; | 
|---|
| 944 |  | 
|---|
| 945 | if (netif_is_l3_master(dev: skb->dev)) { | 
|---|
| 946 | dev = __dev_get_by_index(net: dev_net(dev: skb->dev), IPCB(skb)->iif); | 
|---|
| 947 | if (!dev) | 
|---|
| 948 | goto out; | 
|---|
| 949 | } | 
|---|
| 950 |  | 
|---|
| 951 | in_dev = __in_dev_get_rcu(dev); | 
|---|
| 952 |  | 
|---|
| 953 | /* IP on this device is disabled. */ | 
|---|
| 954 | if (!in_dev) | 
|---|
| 955 | goto out; | 
|---|
| 956 |  | 
|---|
| 957 | net = dev_net(dev: rt->dst.dev); | 
|---|
| 958 | if (!IN_DEV_FORWARD(in_dev)) { | 
|---|
| 959 | switch (rt->dst.error) { | 
|---|
| 960 | case EHOSTUNREACH: | 
|---|
| 961 | SKB_DR_SET(reason, IP_INADDRERRORS); | 
|---|
| 962 | __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); | 
|---|
| 963 | break; | 
|---|
| 964 |  | 
|---|
| 965 | case ENETUNREACH: | 
|---|
| 966 | SKB_DR_SET(reason, IP_INNOROUTES); | 
|---|
| 967 | __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); | 
|---|
| 968 | break; | 
|---|
| 969 | } | 
|---|
| 970 | goto out; | 
|---|
| 971 | } | 
|---|
| 972 |  | 
|---|
| 973 | switch (rt->dst.error) { | 
|---|
| 974 | case EINVAL: | 
|---|
| 975 | default: | 
|---|
| 976 | goto out; | 
|---|
| 977 | case EHOSTUNREACH: | 
|---|
| 978 | code = ICMP_HOST_UNREACH; | 
|---|
| 979 | break; | 
|---|
| 980 | case ENETUNREACH: | 
|---|
| 981 | code = ICMP_NET_UNREACH; | 
|---|
| 982 | SKB_DR_SET(reason, IP_INNOROUTES); | 
|---|
| 983 | __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); | 
|---|
| 984 | break; | 
|---|
| 985 | case EACCES: | 
|---|
| 986 | code = ICMP_PKT_FILTERED; | 
|---|
| 987 | break; | 
|---|
| 988 | } | 
|---|
| 989 |  | 
|---|
| 990 | rcu_read_lock(); | 
|---|
| 991 | peer = inet_getpeer_v4(base: net->ipv4.peers, v4daddr: ip_hdr(skb)->saddr, | 
|---|
| 992 | vif: l3mdev_master_ifindex_rcu(dev: skb->dev)); | 
|---|
| 993 | send = true; | 
|---|
| 994 | if (peer) { | 
|---|
| 995 | now = jiffies; | 
|---|
| 996 | peer->rate_tokens += now - peer->rate_last; | 
|---|
| 997 | if (peer->rate_tokens > ip_rt_error_burst) | 
|---|
| 998 | peer->rate_tokens = ip_rt_error_burst; | 
|---|
| 999 | peer->rate_last = now; | 
|---|
| 1000 | if (peer->rate_tokens >= ip_rt_error_cost) | 
|---|
| 1001 | peer->rate_tokens -= ip_rt_error_cost; | 
|---|
| 1002 | else | 
|---|
| 1003 | send = false; | 
|---|
| 1004 | } | 
|---|
| 1005 | rcu_read_unlock(); | 
|---|
| 1006 |  | 
|---|
| 1007 | if (send) | 
|---|
| 1008 | icmp_send(skb_in: skb, ICMP_DEST_UNREACH, code, info: 0); | 
|---|
| 1009 |  | 
|---|
| 1010 | out:	kfree_skb_reason(skb, reason); | 
|---|
| 1011 | return 0; | 
|---|
| 1012 | } | 
|---|
| 1013 |  | 
|---|
| 1014 | static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | 
|---|
| 1015 | { | 
|---|
| 1016 | struct dst_entry *dst = &rt->dst; | 
|---|
| 1017 | struct fib_result res; | 
|---|
| 1018 | bool lock = false; | 
|---|
| 1019 | struct net *net; | 
|---|
| 1020 | u32 old_mtu; | 
|---|
| 1021 |  | 
|---|
| 1022 | if (ip_mtu_locked(dst)) | 
|---|
| 1023 | return; | 
|---|
| 1024 |  | 
|---|
| 1025 | old_mtu = ipv4_mtu(dst); | 
|---|
| 1026 | if (old_mtu < mtu) | 
|---|
| 1027 | return; | 
|---|
| 1028 |  | 
|---|
| 1029 | rcu_read_lock(); | 
|---|
| 1030 | net = dst_dev_net_rcu(dst); | 
|---|
| 1031 | if (mtu < net->ipv4.ip_rt_min_pmtu) { | 
|---|
| 1032 | lock = true; | 
|---|
| 1033 | mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); | 
|---|
| 1034 | } | 
|---|
| 1035 |  | 
|---|
| 1036 | if (rt->rt_pmtu == mtu && !lock && | 
|---|
| 1037 | time_before(jiffies, READ_ONCE(dst->expires) - | 
|---|
| 1038 | net->ipv4.ip_rt_mtu_expires / 2)) | 
|---|
| 1039 | goto out; | 
|---|
| 1040 |  | 
|---|
| 1041 | if (fib_lookup(net, flp: fl4, res: &res, flags: 0) == 0) { | 
|---|
| 1042 | struct fib_nh_common *nhc; | 
|---|
| 1043 |  | 
|---|
| 1044 | fib_select_path(net, res: &res, fl4, NULL); | 
|---|
| 1045 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 
|---|
| 1046 | if (fib_info_num_path(fi: res.fi) > 1) { | 
|---|
| 1047 | int nhsel; | 
|---|
| 1048 |  | 
|---|
| 1049 | for (nhsel = 0; nhsel < fib_info_num_path(fi: res.fi); nhsel++) { | 
|---|
| 1050 | nhc = fib_info_nhc(fi: res.fi, nhsel); | 
|---|
| 1051 | update_or_create_fnhe(nhc, daddr: fl4->daddr, gw: 0, pmtu: mtu, lock, | 
|---|
| 1052 | expires: jiffies + net->ipv4.ip_rt_mtu_expires); | 
|---|
| 1053 | } | 
|---|
| 1054 | goto out; | 
|---|
| 1055 | } | 
|---|
| 1056 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 
|---|
| 1057 | nhc = FIB_RES_NHC(res); | 
|---|
| 1058 | update_or_create_fnhe(nhc, daddr: fl4->daddr, gw: 0, pmtu: mtu, lock, | 
|---|
| 1059 | expires: jiffies + net->ipv4.ip_rt_mtu_expires); | 
|---|
| 1060 | } | 
|---|
| 1061 | out: | 
|---|
| 1062 | rcu_read_unlock(); | 
|---|
| 1063 | } | 
|---|
| 1064 |  | 
|---|
| 1065 | static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | 
|---|
| 1066 | struct sk_buff *skb, u32 mtu, | 
|---|
| 1067 | bool confirm_neigh) | 
|---|
| 1068 | { | 
|---|
| 1069 | struct rtable *rt = dst_rtable(dst); | 
|---|
| 1070 | struct flowi4 fl4; | 
|---|
| 1071 |  | 
|---|
| 1072 | ip_rt_build_flow_key(fl4: &fl4, sk, skb); | 
|---|
| 1073 |  | 
|---|
| 1074 | /* Don't make lookup fail for bridged encapsulations */ | 
|---|
| 1075 | if (skb && netif_is_any_bridge_port(dev: skb->dev)) | 
|---|
| 1076 | fl4.flowi4_oif = 0; | 
|---|
| 1077 |  | 
|---|
| 1078 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); | 
|---|
| 1079 | } | 
|---|
| 1080 |  | 
|---|
| 1081 | void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, | 
|---|
| 1082 | int oif, u8 protocol) | 
|---|
| 1083 | { | 
|---|
| 1084 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 
|---|
| 1085 | struct flowi4 fl4; | 
|---|
| 1086 | struct rtable *rt; | 
|---|
| 1087 | u32 mark = IP4_REPLY_MARK(net, skb->mark); | 
|---|
| 1088 |  | 
|---|
| 1089 | __build_flow_key(net, fl4: &fl4, NULL, iph, oif, tos: iph->tos, prot: protocol, mark, | 
|---|
| 1090 | flow_flags: 0); | 
|---|
| 1091 | rt = __ip_route_output_key(net, flp: &fl4); | 
|---|
| 1092 | if (!IS_ERR(ptr: rt)) { | 
|---|
| 1093 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); | 
|---|
| 1094 | ip_rt_put(rt); | 
|---|
| 1095 | } | 
|---|
| 1096 | } | 
|---|
| 1097 | EXPORT_SYMBOL_GPL(ipv4_update_pmtu); | 
|---|
| 1098 |  | 
|---|
| 1099 | static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | 
|---|
| 1100 | { | 
|---|
| 1101 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 
|---|
| 1102 | struct flowi4 fl4; | 
|---|
| 1103 | struct rtable *rt; | 
|---|
| 1104 |  | 
|---|
| 1105 | __build_flow_key(net: sock_net(sk), fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); | 
|---|
| 1106 |  | 
|---|
| 1107 | if (!fl4.flowi4_mark) | 
|---|
| 1108 | fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); | 
|---|
| 1109 |  | 
|---|
| 1110 | rt = __ip_route_output_key(net: sock_net(sk), flp: &fl4); | 
|---|
| 1111 | if (!IS_ERR(ptr: rt)) { | 
|---|
| 1112 | __ip_rt_update_pmtu(rt, fl4: &fl4, mtu); | 
|---|
| 1113 | ip_rt_put(rt); | 
|---|
| 1114 | } | 
|---|
| 1115 | } | 
|---|
| 1116 |  | 
|---|
| 1117 | void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) | 
|---|
| 1118 | { | 
|---|
| 1119 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 
|---|
| 1120 | struct flowi4 fl4; | 
|---|
| 1121 | struct rtable *rt; | 
|---|
| 1122 | struct dst_entry *odst = NULL; | 
|---|
| 1123 | bool new = false; | 
|---|
| 1124 | struct net *net = sock_net(sk); | 
|---|
| 1125 |  | 
|---|
| 1126 | bh_lock_sock(sk); | 
|---|
| 1127 |  | 
|---|
| 1128 | if (!ip_sk_accept_pmtu(sk)) | 
|---|
| 1129 | goto out; | 
|---|
| 1130 |  | 
|---|
| 1131 | odst = sk_dst_get(sk); | 
|---|
| 1132 |  | 
|---|
| 1133 | if (sock_owned_by_user(sk) || !odst) { | 
|---|
| 1134 | __ipv4_sk_update_pmtu(skb, sk, mtu); | 
|---|
| 1135 | goto out; | 
|---|
| 1136 | } | 
|---|
| 1137 |  | 
|---|
| 1138 | __build_flow_key(net, fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); | 
|---|
| 1139 |  | 
|---|
| 1140 | rt = dst_rtable(odst); | 
|---|
| 1141 | if (READ_ONCE(odst->obsolete) && !odst->ops->check(odst, 0)) { | 
|---|
| 1142 | rt = ip_route_output_flow(sock_net(sk), flp: &fl4, sk); | 
|---|
| 1143 | if (IS_ERR(ptr: rt)) | 
|---|
| 1144 | goto out; | 
|---|
| 1145 |  | 
|---|
| 1146 | new = true; | 
|---|
| 1147 | } | 
|---|
| 1148 |  | 
|---|
| 1149 | __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), fl4: &fl4, mtu); | 
|---|
| 1150 |  | 
|---|
| 1151 | if (!dst_check(dst: &rt->dst, cookie: 0)) { | 
|---|
| 1152 | if (new) | 
|---|
| 1153 | dst_release(dst: &rt->dst); | 
|---|
| 1154 |  | 
|---|
| 1155 | rt = ip_route_output_flow(sock_net(sk), flp: &fl4, sk); | 
|---|
| 1156 | if (IS_ERR(ptr: rt)) | 
|---|
| 1157 | goto out; | 
|---|
| 1158 |  | 
|---|
| 1159 | new = true; | 
|---|
| 1160 | } | 
|---|
| 1161 |  | 
|---|
| 1162 | if (new) | 
|---|
| 1163 | sk_dst_set(sk, dst: &rt->dst); | 
|---|
| 1164 |  | 
|---|
| 1165 | out: | 
|---|
| 1166 | bh_unlock_sock(sk); | 
|---|
| 1167 | dst_release(dst: odst); | 
|---|
| 1168 | } | 
|---|
| 1169 | EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); | 
|---|
| 1170 |  | 
|---|
| 1171 | void ipv4_redirect(struct sk_buff *skb, struct net *net, | 
|---|
| 1172 | int oif, u8 protocol) | 
|---|
| 1173 | { | 
|---|
| 1174 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 
|---|
| 1175 | struct flowi4 fl4; | 
|---|
| 1176 | struct rtable *rt; | 
|---|
| 1177 |  | 
|---|
| 1178 | __build_flow_key(net, fl4: &fl4, NULL, iph, oif, tos: iph->tos, prot: protocol, mark: 0, flow_flags: 0); | 
|---|
| 1179 | rt = __ip_route_output_key(net, flp: &fl4); | 
|---|
| 1180 | if (!IS_ERR(ptr: rt)) { | 
|---|
| 1181 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: false); | 
|---|
| 1182 | ip_rt_put(rt); | 
|---|
| 1183 | } | 
|---|
| 1184 | } | 
|---|
| 1185 | EXPORT_SYMBOL_GPL(ipv4_redirect); | 
|---|
| 1186 |  | 
|---|
| 1187 | void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) | 
|---|
| 1188 | { | 
|---|
| 1189 | const struct iphdr *iph = (const struct iphdr *)skb->data; | 
|---|
| 1190 | struct flowi4 fl4; | 
|---|
| 1191 | struct rtable *rt; | 
|---|
| 1192 | struct net *net = sock_net(sk); | 
|---|
| 1193 |  | 
|---|
| 1194 | __build_flow_key(net, fl4: &fl4, sk, iph, oif: 0, tos: 0, prot: 0, mark: 0, flow_flags: 0); | 
|---|
| 1195 | rt = __ip_route_output_key(net, flp: &fl4); | 
|---|
| 1196 | if (!IS_ERR(ptr: rt)) { | 
|---|
| 1197 | __ip_do_redirect(rt, skb, fl4: &fl4, kill_route: false); | 
|---|
| 1198 | ip_rt_put(rt); | 
|---|
| 1199 | } | 
|---|
| 1200 | } | 
|---|
| 1201 | EXPORT_SYMBOL_GPL(ipv4_sk_redirect); | 
|---|
| 1202 |  | 
|---|
| 1203 | INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, | 
|---|
| 1204 | u32 cookie) | 
|---|
| 1205 | { | 
|---|
| 1206 | struct rtable *rt = dst_rtable(dst); | 
|---|
| 1207 |  | 
|---|
| 1208 | /* All IPV4 dsts are created with ->obsolete set to the value | 
|---|
| 1209 | * DST_OBSOLETE_FORCE_CHK which forces validation calls down | 
|---|
| 1210 | * into this function always. | 
|---|
| 1211 | * | 
|---|
| 1212 | * When a PMTU/redirect information update invalidates a route, | 
|---|
| 1213 | * this is indicated by setting obsolete to DST_OBSOLETE_KILL or | 
|---|
| 1214 | * DST_OBSOLETE_DEAD. | 
|---|
| 1215 | */ | 
|---|
| 1216 | if (READ_ONCE(dst->obsolete) != DST_OBSOLETE_FORCE_CHK || | 
|---|
| 1217 | rt_is_expired(rth: rt)) | 
|---|
| 1218 | return NULL; | 
|---|
| 1219 | return dst; | 
|---|
| 1220 | } | 
|---|
| 1221 | EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); | 
|---|
| 1222 |  | 
|---|
| 1223 | static void ipv4_send_dest_unreach(struct sk_buff *skb) | 
|---|
| 1224 | { | 
|---|
| 1225 | struct inet_skb_parm parm; | 
|---|
| 1226 | struct net_device *dev; | 
|---|
| 1227 | int res; | 
|---|
| 1228 |  | 
|---|
| 1229 | /* Recompile ip options since IPCB may not be valid anymore. | 
|---|
| 1230 | * Also check we have a reasonable ipv4 header. | 
|---|
| 1231 | */ | 
|---|
| 1232 | if (!pskb_network_may_pull(skb, len: sizeof(struct iphdr)) || | 
|---|
| 1233 | ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) | 
|---|
| 1234 | return; | 
|---|
| 1235 |  | 
|---|
| 1236 | memset(s: &parm, c: 0, n: sizeof(parm)); | 
|---|
| 1237 | if (ip_hdr(skb)->ihl > 5) { | 
|---|
| 1238 | if (!pskb_network_may_pull(skb, len: ip_hdr(skb)->ihl * 4)) | 
|---|
| 1239 | return; | 
|---|
| 1240 | parm.opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); | 
|---|
| 1241 |  | 
|---|
| 1242 | rcu_read_lock(); | 
|---|
| 1243 | dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; | 
|---|
| 1244 | res = __ip_options_compile(net: dev_net(dev), opt: &parm.opt, skb, NULL); | 
|---|
| 1245 | rcu_read_unlock(); | 
|---|
| 1246 |  | 
|---|
| 1247 | if (res) | 
|---|
| 1248 | return; | 
|---|
| 1249 | } | 
|---|
| 1250 | __icmp_send(skb_in: skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, info: 0, parm: &parm); | 
|---|
| 1251 | } | 
|---|
| 1252 |  | 
|---|
| 1253 | static void ipv4_link_failure(struct sk_buff *skb) | 
|---|
| 1254 | { | 
|---|
| 1255 | struct rtable *rt; | 
|---|
| 1256 |  | 
|---|
| 1257 | ipv4_send_dest_unreach(skb); | 
|---|
| 1258 |  | 
|---|
| 1259 | rt = skb_rtable(skb); | 
|---|
| 1260 | if (rt) | 
|---|
| 1261 | dst_set_expires(dst: &rt->dst, timeout: 0); | 
|---|
| 1262 | } | 
|---|
| 1263 |  | 
|---|
| 1264 | static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) | 
|---|
| 1265 | { | 
|---|
| 1266 | pr_debug( "%s: %pI4 -> %pI4, %s\n", | 
|---|
| 1267 | __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, | 
|---|
| 1268 | skb->dev ? skb->dev->name : "?"); | 
|---|
| 1269 | kfree_skb(skb); | 
|---|
| 1270 | WARN_ON(1); | 
|---|
| 1271 | return 0; | 
|---|
| 1272 | } | 
|---|
| 1273 |  | 
|---|
| 1274 | /* | 
|---|
| 1275 | * We do not cache source address of outgoing interface, | 
|---|
| 1276 | * because it is used only by IP RR, TS and SRR options, | 
|---|
| 1277 | * so that it out of fast path. | 
|---|
| 1278 | * | 
|---|
| 1279 | * BTW remember: "addr" is allowed to be not aligned | 
|---|
| 1280 | * in IP options! | 
|---|
| 1281 | */ | 
|---|
| 1282 |  | 
|---|
| 1283 | void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) | 
|---|
| 1284 | { | 
|---|
| 1285 | __be32 src; | 
|---|
| 1286 |  | 
|---|
| 1287 | if (rt_is_output_route(rt)) | 
|---|
| 1288 | src = ip_hdr(skb)->saddr; | 
|---|
| 1289 | else { | 
|---|
| 1290 | struct fib_result res; | 
|---|
| 1291 | struct iphdr *iph = ip_hdr(skb); | 
|---|
| 1292 | struct flowi4 fl4 = { | 
|---|
| 1293 | .daddr = iph->daddr, | 
|---|
| 1294 | .saddr = iph->saddr, | 
|---|
| 1295 | .flowi4_dscp = ip4h_dscp(ip4h: iph), | 
|---|
| 1296 | .flowi4_oif = rt->dst.dev->ifindex, | 
|---|
| 1297 | .flowi4_iif = skb->dev->ifindex, | 
|---|
| 1298 | .flowi4_mark = skb->mark, | 
|---|
| 1299 | }; | 
|---|
| 1300 |  | 
|---|
| 1301 | rcu_read_lock(); | 
|---|
| 1302 | if (fib_lookup(net: dev_net(dev: rt->dst.dev), flp: &fl4, res: &res, flags: 0) == 0) | 
|---|
| 1303 | src = fib_result_prefsrc(net: dev_net(dev: rt->dst.dev), res: &res); | 
|---|
| 1304 | else | 
|---|
| 1305 | src = inet_select_addr(dev: rt->dst.dev, | 
|---|
| 1306 | dst: rt_nexthop(rt, daddr: iph->daddr), | 
|---|
| 1307 | scope: RT_SCOPE_UNIVERSE); | 
|---|
| 1308 | rcu_read_unlock(); | 
|---|
| 1309 | } | 
|---|
| 1310 | memcpy(to: addr, from: &src, len: 4); | 
|---|
| 1311 | } | 
|---|
| 1312 |  | 
|---|
| 1313 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 1314 | static void set_class_tag(struct rtable *rt, u32 tag) | 
|---|
| 1315 | { | 
|---|
| 1316 | if (!(rt->dst.tclassid & 0xFFFF)) | 
|---|
| 1317 | rt->dst.tclassid |= tag & 0xFFFF; | 
|---|
| 1318 | if (!(rt->dst.tclassid & 0xFFFF0000)) | 
|---|
| 1319 | rt->dst.tclassid |= tag & 0xFFFF0000; | 
|---|
| 1320 | } | 
|---|
| 1321 | #endif | 
|---|
| 1322 |  | 
|---|
| 1323 | static unsigned int ipv4_default_advmss(const struct dst_entry *dst) | 
|---|
| 1324 | { | 
|---|
| 1325 | unsigned int  = sizeof(struct tcphdr) + sizeof(struct iphdr); | 
|---|
| 1326 | unsigned int advmss; | 
|---|
| 1327 | struct net *net; | 
|---|
| 1328 |  | 
|---|
| 1329 | rcu_read_lock(); | 
|---|
| 1330 | net = dst_dev_net_rcu(dst); | 
|---|
| 1331 | advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, | 
|---|
| 1332 | net->ipv4.ip_rt_min_advmss); | 
|---|
| 1333 | rcu_read_unlock(); | 
|---|
| 1334 |  | 
|---|
| 1335 | return min(advmss, IPV4_MAX_PMTU - header_size); | 
|---|
| 1336 | } | 
|---|
| 1337 |  | 
|---|
| 1338 | INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) | 
|---|
| 1339 | { | 
|---|
| 1340 | return ip_dst_mtu_maybe_forward(dst, forwarding: false); | 
|---|
| 1341 | } | 
|---|
| 1342 | EXPORT_INDIRECT_CALLABLE(ipv4_mtu); | 
|---|
| 1343 |  | 
|---|
| 1344 | static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) | 
|---|
| 1345 | { | 
|---|
| 1346 | struct fnhe_hash_bucket *hash; | 
|---|
| 1347 | struct fib_nh_exception *fnhe, __rcu **fnhe_p; | 
|---|
| 1348 | u32 hval = fnhe_hashfun(daddr); | 
|---|
| 1349 |  | 
|---|
| 1350 | spin_lock_bh(lock: &fnhe_lock); | 
|---|
| 1351 |  | 
|---|
| 1352 | hash = rcu_dereference_protected(nhc->nhc_exceptions, | 
|---|
| 1353 | lockdep_is_held(&fnhe_lock)); | 
|---|
| 1354 | hash += hval; | 
|---|
| 1355 |  | 
|---|
| 1356 | fnhe_p = &hash->chain; | 
|---|
| 1357 | fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); | 
|---|
| 1358 | while (fnhe) { | 
|---|
| 1359 | if (fnhe->fnhe_daddr == daddr) { | 
|---|
| 1360 | rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( | 
|---|
| 1361 | fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); | 
|---|
| 1362 | /* set fnhe_daddr to 0 to ensure it won't bind with | 
|---|
| 1363 | * new dsts in rt_bind_exception(). | 
|---|
| 1364 | */ | 
|---|
| 1365 | fnhe->fnhe_daddr = 0; | 
|---|
| 1366 | fnhe_flush_routes(fnhe); | 
|---|
| 1367 | kfree_rcu(fnhe, rcu); | 
|---|
| 1368 | break; | 
|---|
| 1369 | } | 
|---|
| 1370 | fnhe_p = &fnhe->fnhe_next; | 
|---|
| 1371 | fnhe = rcu_dereference_protected(fnhe->fnhe_next, | 
|---|
| 1372 | lockdep_is_held(&fnhe_lock)); | 
|---|
| 1373 | } | 
|---|
| 1374 |  | 
|---|
| 1375 | spin_unlock_bh(lock: &fnhe_lock); | 
|---|
| 1376 | } | 
|---|
| 1377 |  | 
|---|
| 1378 | static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, | 
|---|
| 1379 | __be32 daddr) | 
|---|
| 1380 | { | 
|---|
| 1381 | struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); | 
|---|
| 1382 | struct fib_nh_exception *fnhe; | 
|---|
| 1383 | u32 hval; | 
|---|
| 1384 |  | 
|---|
| 1385 | if (!hash) | 
|---|
| 1386 | return NULL; | 
|---|
| 1387 |  | 
|---|
| 1388 | hval = fnhe_hashfun(daddr); | 
|---|
| 1389 |  | 
|---|
| 1390 | for (fnhe = rcu_dereference(hash[hval].chain); fnhe; | 
|---|
| 1391 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 
|---|
| 1392 | if (fnhe->fnhe_daddr == daddr) { | 
|---|
| 1393 | if (fnhe->fnhe_expires && | 
|---|
| 1394 | time_after(jiffies, fnhe->fnhe_expires)) { | 
|---|
| 1395 | ip_del_fnhe(nhc, daddr); | 
|---|
| 1396 | break; | 
|---|
| 1397 | } | 
|---|
| 1398 | return fnhe; | 
|---|
| 1399 | } | 
|---|
| 1400 | } | 
|---|
| 1401 | return NULL; | 
|---|
| 1402 | } | 
|---|
| 1403 |  | 
|---|
| 1404 | /* MTU selection: | 
|---|
| 1405 | * 1. mtu on route is locked - use it | 
|---|
| 1406 | * 2. mtu from nexthop exception | 
|---|
| 1407 | * 3. mtu from egress device | 
|---|
| 1408 | */ | 
|---|
| 1409 |  | 
|---|
| 1410 | u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) | 
|---|
| 1411 | { | 
|---|
| 1412 | struct fib_nh_common *nhc = res->nhc; | 
|---|
| 1413 | struct net_device *dev = nhc->nhc_dev; | 
|---|
| 1414 | struct fib_info *fi = res->fi; | 
|---|
| 1415 | u32 mtu = 0; | 
|---|
| 1416 |  | 
|---|
| 1417 | if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || | 
|---|
| 1418 | fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) | 
|---|
| 1419 | mtu = fi->fib_mtu; | 
|---|
| 1420 |  | 
|---|
| 1421 | if (likely(!mtu)) { | 
|---|
| 1422 | struct fib_nh_exception *fnhe; | 
|---|
| 1423 |  | 
|---|
| 1424 | fnhe = find_exception(nhc, daddr); | 
|---|
| 1425 | if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) | 
|---|
| 1426 | mtu = fnhe->fnhe_pmtu; | 
|---|
| 1427 | } | 
|---|
| 1428 |  | 
|---|
| 1429 | if (likely(!mtu)) | 
|---|
| 1430 | mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); | 
|---|
| 1431 |  | 
|---|
| 1432 | return mtu - lwtunnel_headroom(lwtstate: nhc->nhc_lwtstate, mtu); | 
|---|
| 1433 | } | 
|---|
| 1434 |  | 
|---|
| 1435 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | 
|---|
| 1436 | __be32 daddr, const bool do_cache) | 
|---|
| 1437 | { | 
|---|
| 1438 | bool ret = false; | 
|---|
| 1439 |  | 
|---|
| 1440 | spin_lock_bh(lock: &fnhe_lock); | 
|---|
| 1441 |  | 
|---|
| 1442 | if (daddr == fnhe->fnhe_daddr) { | 
|---|
| 1443 | struct rtable __rcu **porig; | 
|---|
| 1444 | struct rtable *orig; | 
|---|
| 1445 | int genid = fnhe_genid(net: dev_net(dev: rt->dst.dev)); | 
|---|
| 1446 |  | 
|---|
| 1447 | if (rt_is_input_route(rt)) | 
|---|
| 1448 | porig = &fnhe->fnhe_rth_input; | 
|---|
| 1449 | else | 
|---|
| 1450 | porig = &fnhe->fnhe_rth_output; | 
|---|
| 1451 | orig = rcu_dereference(*porig); | 
|---|
| 1452 |  | 
|---|
| 1453 | if (fnhe->fnhe_genid != genid) { | 
|---|
| 1454 | fnhe->fnhe_genid = genid; | 
|---|
| 1455 | fnhe->fnhe_gw = 0; | 
|---|
| 1456 | fnhe->fnhe_pmtu = 0; | 
|---|
| 1457 | fnhe->fnhe_expires = 0; | 
|---|
| 1458 | fnhe->fnhe_mtu_locked = false; | 
|---|
| 1459 | fnhe_flush_routes(fnhe); | 
|---|
| 1460 | orig = NULL; | 
|---|
| 1461 | } | 
|---|
| 1462 | fill_route_from_fnhe(rt, fnhe); | 
|---|
| 1463 | if (!rt->rt_gw4) { | 
|---|
| 1464 | rt->rt_gw4 = daddr; | 
|---|
| 1465 | rt->rt_gw_family = AF_INET; | 
|---|
| 1466 | } | 
|---|
| 1467 |  | 
|---|
| 1468 | if (do_cache) { | 
|---|
| 1469 | dst_hold(dst: &rt->dst); | 
|---|
| 1470 | rcu_assign_pointer(*porig, rt); | 
|---|
| 1471 | if (orig) { | 
|---|
| 1472 | dst_dev_put(dst: &orig->dst); | 
|---|
| 1473 | dst_release(dst: &orig->dst); | 
|---|
| 1474 | } | 
|---|
| 1475 | ret = true; | 
|---|
| 1476 | } | 
|---|
| 1477 |  | 
|---|
| 1478 | fnhe->fnhe_stamp = jiffies; | 
|---|
| 1479 | } | 
|---|
| 1480 | spin_unlock_bh(lock: &fnhe_lock); | 
|---|
| 1481 |  | 
|---|
| 1482 | return ret; | 
|---|
| 1483 | } | 
|---|
| 1484 |  | 
|---|
| 1485 | static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) | 
|---|
| 1486 | { | 
|---|
| 1487 | struct rtable *orig, *prev, **p; | 
|---|
| 1488 | bool ret = true; | 
|---|
| 1489 |  | 
|---|
| 1490 | if (rt_is_input_route(rt)) { | 
|---|
| 1491 | p = (struct rtable **)&nhc->nhc_rth_input; | 
|---|
| 1492 | } else { | 
|---|
| 1493 | p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); | 
|---|
| 1494 | } | 
|---|
| 1495 | orig = *p; | 
|---|
| 1496 |  | 
|---|
| 1497 | /* hold dst before doing cmpxchg() to avoid race condition | 
|---|
| 1498 | * on this dst | 
|---|
| 1499 | */ | 
|---|
| 1500 | dst_hold(dst: &rt->dst); | 
|---|
| 1501 | prev = cmpxchg(p, orig, rt); | 
|---|
| 1502 | if (prev == orig) { | 
|---|
| 1503 | if (orig) { | 
|---|
| 1504 | rt_add_uncached_list(rt: orig); | 
|---|
| 1505 | dst_release(dst: &orig->dst); | 
|---|
| 1506 | } | 
|---|
| 1507 | } else { | 
|---|
| 1508 | dst_release(dst: &rt->dst); | 
|---|
| 1509 | ret = false; | 
|---|
| 1510 | } | 
|---|
| 1511 |  | 
|---|
| 1512 | return ret; | 
|---|
| 1513 | } | 
|---|
| 1514 |  | 
|---|
| 1515 | struct uncached_list { | 
|---|
| 1516 | spinlock_t		lock; | 
|---|
| 1517 | struct list_head	head; | 
|---|
| 1518 | }; | 
|---|
| 1519 |  | 
|---|
| 1520 | static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); | 
|---|
| 1521 |  | 
|---|
| 1522 | void rt_add_uncached_list(struct rtable *rt) | 
|---|
| 1523 | { | 
|---|
| 1524 | struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); | 
|---|
| 1525 |  | 
|---|
| 1526 | rt->dst.rt_uncached_list = ul; | 
|---|
| 1527 |  | 
|---|
| 1528 | spin_lock_bh(lock: &ul->lock); | 
|---|
| 1529 | list_add_tail(new: &rt->dst.rt_uncached, head: &ul->head); | 
|---|
| 1530 | spin_unlock_bh(lock: &ul->lock); | 
|---|
| 1531 | } | 
|---|
| 1532 |  | 
|---|
| 1533 | void rt_del_uncached_list(struct rtable *rt) | 
|---|
| 1534 | { | 
|---|
| 1535 | if (!list_empty(head: &rt->dst.rt_uncached)) { | 
|---|
| 1536 | struct uncached_list *ul = rt->dst.rt_uncached_list; | 
|---|
| 1537 |  | 
|---|
| 1538 | spin_lock_bh(lock: &ul->lock); | 
|---|
| 1539 | list_del_init(entry: &rt->dst.rt_uncached); | 
|---|
| 1540 | spin_unlock_bh(lock: &ul->lock); | 
|---|
| 1541 | } | 
|---|
| 1542 | } | 
|---|
| 1543 |  | 
|---|
| 1544 | static void ipv4_dst_destroy(struct dst_entry *dst) | 
|---|
| 1545 | { | 
|---|
| 1546 | ip_dst_metrics_put(dst); | 
|---|
| 1547 | rt_del_uncached_list(dst_rtable(dst)); | 
|---|
| 1548 | } | 
|---|
| 1549 |  | 
|---|
| 1550 | void rt_flush_dev(struct net_device *dev) | 
|---|
| 1551 | { | 
|---|
| 1552 | struct rtable *rt, *safe; | 
|---|
| 1553 | int cpu; | 
|---|
| 1554 |  | 
|---|
| 1555 | for_each_possible_cpu(cpu) { | 
|---|
| 1556 | struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); | 
|---|
| 1557 |  | 
|---|
| 1558 | if (list_empty(head: &ul->head)) | 
|---|
| 1559 | continue; | 
|---|
| 1560 |  | 
|---|
| 1561 | spin_lock_bh(lock: &ul->lock); | 
|---|
| 1562 | list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { | 
|---|
| 1563 | if (rt->dst.dev != dev) | 
|---|
| 1564 | continue; | 
|---|
| 1565 | rt->dst.dev = blackhole_netdev; | 
|---|
| 1566 | netdev_ref_replace(odev: dev, ndev: blackhole_netdev, | 
|---|
| 1567 | tracker: &rt->dst.dev_tracker, GFP_ATOMIC); | 
|---|
| 1568 | list_del_init(entry: &rt->dst.rt_uncached); | 
|---|
| 1569 | } | 
|---|
| 1570 | spin_unlock_bh(lock: &ul->lock); | 
|---|
| 1571 | } | 
|---|
| 1572 | } | 
|---|
| 1573 |  | 
|---|
| 1574 | static bool rt_cache_valid(const struct rtable *rt) | 
|---|
| 1575 | { | 
|---|
| 1576 | return	rt && | 
|---|
| 1577 | READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK && | 
|---|
| 1578 | !rt_is_expired(rth: rt); | 
|---|
| 1579 | } | 
|---|
| 1580 |  | 
|---|
| 1581 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | 
|---|
| 1582 | const struct fib_result *res, | 
|---|
| 1583 | struct fib_nh_exception *fnhe, | 
|---|
| 1584 | struct fib_info *fi, u16 type, u32 itag, | 
|---|
| 1585 | const bool do_cache) | 
|---|
| 1586 | { | 
|---|
| 1587 | bool cached = false; | 
|---|
| 1588 |  | 
|---|
| 1589 | if (fi) { | 
|---|
| 1590 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); | 
|---|
| 1591 |  | 
|---|
| 1592 | if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { | 
|---|
| 1593 | rt->rt_uses_gateway = 1; | 
|---|
| 1594 | rt->rt_gw_family = nhc->nhc_gw_family; | 
|---|
| 1595 | /* only INET and INET6 are supported */ | 
|---|
| 1596 | if (likely(nhc->nhc_gw_family == AF_INET)) | 
|---|
| 1597 | rt->rt_gw4 = nhc->nhc_gw.ipv4; | 
|---|
| 1598 | else | 
|---|
| 1599 | rt->rt_gw6 = nhc->nhc_gw.ipv6; | 
|---|
| 1600 | } | 
|---|
| 1601 |  | 
|---|
| 1602 | ip_dst_init_metrics(dst: &rt->dst, fib_metrics: fi->fib_metrics); | 
|---|
| 1603 |  | 
|---|
| 1604 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 1605 | if (nhc->nhc_family == AF_INET) { | 
|---|
| 1606 | struct fib_nh *nh; | 
|---|
| 1607 |  | 
|---|
| 1608 | nh = container_of(nhc, struct fib_nh, nh_common); | 
|---|
| 1609 | rt->dst.tclassid = nh->nh_tclassid; | 
|---|
| 1610 | } | 
|---|
| 1611 | #endif | 
|---|
| 1612 | rt->dst.lwtstate = lwtstate_get(lws: nhc->nhc_lwtstate); | 
|---|
| 1613 | if (unlikely(fnhe)) | 
|---|
| 1614 | cached = rt_bind_exception(rt, fnhe, daddr, do_cache); | 
|---|
| 1615 | else if (do_cache) | 
|---|
| 1616 | cached = rt_cache_route(nhc, rt); | 
|---|
| 1617 | if (unlikely(!cached)) { | 
|---|
| 1618 | /* Routes we intend to cache in nexthop exception or | 
|---|
| 1619 | * FIB nexthop have the DST_NOCACHE bit clear. | 
|---|
| 1620 | * However, if we are unsuccessful at storing this | 
|---|
| 1621 | * route into the cache we really need to set it. | 
|---|
| 1622 | */ | 
|---|
| 1623 | if (!rt->rt_gw4) { | 
|---|
| 1624 | rt->rt_gw_family = AF_INET; | 
|---|
| 1625 | rt->rt_gw4 = daddr; | 
|---|
| 1626 | } | 
|---|
| 1627 | rt_add_uncached_list(rt); | 
|---|
| 1628 | } | 
|---|
| 1629 | } else | 
|---|
| 1630 | rt_add_uncached_list(rt); | 
|---|
| 1631 |  | 
|---|
| 1632 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 1633 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 
|---|
| 1634 | set_class_tag(rt, res->tclassid); | 
|---|
| 1635 | #endif | 
|---|
| 1636 | set_class_tag(rt, itag); | 
|---|
| 1637 | #endif | 
|---|
| 1638 | } | 
|---|
| 1639 |  | 
|---|
| 1640 | struct rtable *rt_dst_alloc(struct net_device *dev, | 
|---|
| 1641 | unsigned int flags, u16 type, | 
|---|
| 1642 | bool noxfrm) | 
|---|
| 1643 | { | 
|---|
| 1644 | struct rtable *rt; | 
|---|
| 1645 |  | 
|---|
| 1646 | rt = dst_alloc(ops: &ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, | 
|---|
| 1647 | flags: (noxfrm ? DST_NOXFRM : 0)); | 
|---|
| 1648 |  | 
|---|
| 1649 | if (rt) { | 
|---|
| 1650 | rt->rt_genid = rt_genid_ipv4(net: dev_net(dev)); | 
|---|
| 1651 | rt->rt_flags = flags; | 
|---|
| 1652 | rt->rt_type = type; | 
|---|
| 1653 | rt->rt_is_input = 0; | 
|---|
| 1654 | rt->rt_iif = 0; | 
|---|
| 1655 | rt->rt_pmtu = 0; | 
|---|
| 1656 | rt->rt_mtu_locked = 0; | 
|---|
| 1657 | rt->rt_uses_gateway = 0; | 
|---|
| 1658 | rt->rt_gw_family = 0; | 
|---|
| 1659 | rt->rt_gw4 = 0; | 
|---|
| 1660 |  | 
|---|
| 1661 | rt->dst.output = ip_output; | 
|---|
| 1662 | if (flags & RTCF_LOCAL) | 
|---|
| 1663 | rt->dst.input = ip_local_deliver; | 
|---|
| 1664 | } | 
|---|
| 1665 |  | 
|---|
| 1666 | return rt; | 
|---|
| 1667 | } | 
|---|
| 1668 | EXPORT_SYMBOL(rt_dst_alloc); | 
|---|
| 1669 |  | 
|---|
| 1670 | struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) | 
|---|
| 1671 | { | 
|---|
| 1672 | struct rtable *new_rt; | 
|---|
| 1673 |  | 
|---|
| 1674 | new_rt = dst_alloc(ops: &ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, | 
|---|
| 1675 | flags: rt->dst.flags); | 
|---|
| 1676 |  | 
|---|
| 1677 | if (new_rt) { | 
|---|
| 1678 | new_rt->rt_genid = rt_genid_ipv4(net: dev_net(dev)); | 
|---|
| 1679 | new_rt->rt_flags = rt->rt_flags; | 
|---|
| 1680 | new_rt->rt_type = rt->rt_type; | 
|---|
| 1681 | new_rt->rt_is_input = rt->rt_is_input; | 
|---|
| 1682 | new_rt->rt_iif = rt->rt_iif; | 
|---|
| 1683 | new_rt->rt_pmtu = rt->rt_pmtu; | 
|---|
| 1684 | new_rt->rt_mtu_locked = rt->rt_mtu_locked; | 
|---|
| 1685 | new_rt->rt_gw_family = rt->rt_gw_family; | 
|---|
| 1686 | if (rt->rt_gw_family == AF_INET) | 
|---|
| 1687 | new_rt->rt_gw4 = rt->rt_gw4; | 
|---|
| 1688 | else if (rt->rt_gw_family == AF_INET6) | 
|---|
| 1689 | new_rt->rt_gw6 = rt->rt_gw6; | 
|---|
| 1690 |  | 
|---|
| 1691 | new_rt->dst.input = READ_ONCE(rt->dst.input); | 
|---|
| 1692 | new_rt->dst.output = READ_ONCE(rt->dst.output); | 
|---|
| 1693 | new_rt->dst.error = rt->dst.error; | 
|---|
| 1694 | new_rt->dst.lastuse = jiffies; | 
|---|
| 1695 | new_rt->dst.lwtstate = lwtstate_get(lws: rt->dst.lwtstate); | 
|---|
| 1696 | } | 
|---|
| 1697 | return new_rt; | 
|---|
| 1698 | } | 
|---|
| 1699 | EXPORT_SYMBOL(rt_dst_clone); | 
|---|
| 1700 |  | 
|---|
| 1701 | /* called in rcu_read_lock() section */ | 
|---|
| 1702 | enum skb_drop_reason | 
|---|
| 1703 | ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 
|---|
| 1704 | dscp_t dscp, struct net_device *dev, | 
|---|
| 1705 | struct in_device *in_dev, u32 *itag) | 
|---|
| 1706 | { | 
|---|
| 1707 | enum skb_drop_reason reason; | 
|---|
| 1708 |  | 
|---|
| 1709 | /* Primary sanity checks. */ | 
|---|
| 1710 | if (!in_dev) | 
|---|
| 1711 | return SKB_DROP_REASON_NOT_SPECIFIED; | 
|---|
| 1712 |  | 
|---|
| 1713 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr)) | 
|---|
| 1714 | return SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 1715 |  | 
|---|
| 1716 | if (skb->protocol != htons(ETH_P_IP)) | 
|---|
| 1717 | return SKB_DROP_REASON_INVALID_PROTO; | 
|---|
| 1718 |  | 
|---|
| 1719 | if (ipv4_is_loopback(addr: saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) | 
|---|
| 1720 | return SKB_DROP_REASON_IP_LOCALNET; | 
|---|
| 1721 |  | 
|---|
| 1722 | if (ipv4_is_zeronet(addr: saddr)) { | 
|---|
| 1723 | if (!ipv4_is_local_multicast(addr: daddr) && | 
|---|
| 1724 | ip_hdr(skb)->protocol != IPPROTO_IGMP) | 
|---|
| 1725 | return SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 1726 | } else { | 
|---|
| 1727 | reason = fib_validate_source_reason(skb, src: saddr, dst: 0, dscp, oif: 0, | 
|---|
| 1728 | dev, idev: in_dev, itag); | 
|---|
| 1729 | if (reason) | 
|---|
| 1730 | return reason; | 
|---|
| 1731 | } | 
|---|
| 1732 | return SKB_NOT_DROPPED_YET; | 
|---|
| 1733 | } | 
|---|
| 1734 |  | 
|---|
| 1735 | /* called in rcu_read_lock() section */ | 
|---|
| 1736 | static enum skb_drop_reason | 
|---|
| 1737 | ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 
|---|
| 1738 | dscp_t dscp, struct net_device *dev, int our) | 
|---|
| 1739 | { | 
|---|
| 1740 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 
|---|
| 1741 | unsigned int flags = RTCF_MULTICAST; | 
|---|
| 1742 | enum skb_drop_reason reason; | 
|---|
| 1743 | struct rtable *rth; | 
|---|
| 1744 | u32 itag = 0; | 
|---|
| 1745 |  | 
|---|
| 1746 | reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, | 
|---|
| 1747 | itag: &itag); | 
|---|
| 1748 | if (reason) | 
|---|
| 1749 | return reason; | 
|---|
| 1750 |  | 
|---|
| 1751 | if (our) | 
|---|
| 1752 | flags |= RTCF_LOCAL; | 
|---|
| 1753 |  | 
|---|
| 1754 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) | 
|---|
| 1755 | IPCB(skb)->flags |= IPSKB_NOPOLICY; | 
|---|
| 1756 |  | 
|---|
| 1757 | rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, | 
|---|
| 1758 | false); | 
|---|
| 1759 | if (!rth) | 
|---|
| 1760 | return SKB_DROP_REASON_NOMEM; | 
|---|
| 1761 |  | 
|---|
| 1762 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 1763 | rth->dst.tclassid = itag; | 
|---|
| 1764 | #endif | 
|---|
| 1765 | rth->dst.output = ip_rt_bug; | 
|---|
| 1766 | rth->rt_is_input= 1; | 
|---|
| 1767 |  | 
|---|
| 1768 | #ifdef CONFIG_IP_MROUTE | 
|---|
| 1769 | if (!ipv4_is_local_multicast(addr: daddr) && IN_DEV_MFORWARD(in_dev)) | 
|---|
| 1770 | rth->dst.input = ip_mr_input; | 
|---|
| 1771 | #endif | 
|---|
| 1772 | RT_CACHE_STAT_INC(in_slow_mc); | 
|---|
| 1773 |  | 
|---|
| 1774 | skb_dst_drop(skb); | 
|---|
| 1775 | skb_dst_set(skb, dst: &rth->dst); | 
|---|
| 1776 | return SKB_NOT_DROPPED_YET; | 
|---|
| 1777 | } | 
|---|
| 1778 |  | 
|---|
| 1779 |  | 
|---|
| 1780 | static void ip_handle_martian_source(struct net_device *dev, | 
|---|
| 1781 | struct in_device *in_dev, | 
|---|
| 1782 | struct sk_buff *skb, | 
|---|
| 1783 | __be32 daddr, | 
|---|
| 1784 | __be32 saddr) | 
|---|
| 1785 | { | 
|---|
| 1786 | RT_CACHE_STAT_INC(in_martian_src); | 
|---|
| 1787 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 
|---|
| 1788 | if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { | 
|---|
| 1789 | /* | 
|---|
| 1790 | *	RFC1812 recommendation, if source is martian, | 
|---|
| 1791 | *	the only hint is MAC header. | 
|---|
| 1792 | */ | 
|---|
| 1793 | pr_warn( "martian source %pI4 from %pI4, on dev %s\n", | 
|---|
| 1794 | &daddr, &saddr, dev->name); | 
|---|
| 1795 | if (dev->hard_header_len && skb_mac_header_was_set(skb)) { | 
|---|
| 1796 | print_hex_dump(KERN_WARNING, prefix_str: "ll header: ", | 
|---|
| 1797 | prefix_type: DUMP_PREFIX_OFFSET, rowsize: 16, groupsize: 1, | 
|---|
| 1798 | buf: skb_mac_header(skb), | 
|---|
| 1799 | len: dev->hard_header_len, ascii: false); | 
|---|
| 1800 | } | 
|---|
| 1801 | } | 
|---|
| 1802 | #endif | 
|---|
| 1803 | } | 
|---|
| 1804 |  | 
|---|
| 1805 | /* called in rcu_read_lock() section */ | 
|---|
| 1806 | static enum skb_drop_reason | 
|---|
| 1807 | __mkroute_input(struct sk_buff *skb, const struct fib_result *res, | 
|---|
| 1808 | struct in_device *in_dev, __be32 daddr, | 
|---|
| 1809 | __be32 saddr, dscp_t dscp) | 
|---|
| 1810 | { | 
|---|
| 1811 | enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; | 
|---|
| 1812 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); | 
|---|
| 1813 | struct net_device *dev = nhc->nhc_dev; | 
|---|
| 1814 | struct fib_nh_exception *fnhe; | 
|---|
| 1815 | struct rtable *rth; | 
|---|
| 1816 | int err; | 
|---|
| 1817 | struct in_device *out_dev; | 
|---|
| 1818 | bool do_cache; | 
|---|
| 1819 | u32 itag = 0; | 
|---|
| 1820 |  | 
|---|
| 1821 | /* get a working reference to the output device */ | 
|---|
| 1822 | out_dev = __in_dev_get_rcu(dev); | 
|---|
| 1823 | if (!out_dev) { | 
|---|
| 1824 | net_crit_ratelimited( "Bug in ip_route_input_slow(). Please report.\n"); | 
|---|
| 1825 | return reason; | 
|---|
| 1826 | } | 
|---|
| 1827 |  | 
|---|
| 1828 | err = fib_validate_source(skb, src: saddr, dst: daddr, dscp, FIB_RES_OIF(*res), | 
|---|
| 1829 | dev: in_dev->dev, idev: in_dev, itag: &itag); | 
|---|
| 1830 | if (err < 0) { | 
|---|
| 1831 | reason = -err; | 
|---|
| 1832 | ip_handle_martian_source(dev: in_dev->dev, in_dev, skb, daddr, | 
|---|
| 1833 | saddr); | 
|---|
| 1834 |  | 
|---|
| 1835 | goto cleanup; | 
|---|
| 1836 | } | 
|---|
| 1837 |  | 
|---|
| 1838 | do_cache = res->fi && !itag; | 
|---|
| 1839 | if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && | 
|---|
| 1840 | skb->protocol == htons(ETH_P_IP)) { | 
|---|
| 1841 | __be32 gw; | 
|---|
| 1842 |  | 
|---|
| 1843 | gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; | 
|---|
| 1844 | if (IN_DEV_SHARED_MEDIA(out_dev) || | 
|---|
| 1845 | inet_addr_onlink(in_dev: out_dev, a: saddr, b: gw)) | 
|---|
| 1846 | IPCB(skb)->flags |= IPSKB_DOREDIRECT; | 
|---|
| 1847 | } | 
|---|
| 1848 |  | 
|---|
| 1849 | if (skb->protocol != htons(ETH_P_IP)) { | 
|---|
| 1850 | /* Not IP (i.e. ARP). Do not create route, if it is | 
|---|
| 1851 | * invalid for proxy arp. DNAT routes are always valid. | 
|---|
| 1852 | * | 
|---|
| 1853 | * Proxy arp feature have been extended to allow, ARP | 
|---|
| 1854 | * replies back to the same interface, to support | 
|---|
| 1855 | * Private VLAN switch technologies. See arp.c. | 
|---|
| 1856 | */ | 
|---|
| 1857 | if (out_dev == in_dev && | 
|---|
| 1858 | IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { | 
|---|
| 1859 | reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE; | 
|---|
| 1860 | goto cleanup; | 
|---|
| 1861 | } | 
|---|
| 1862 | } | 
|---|
| 1863 |  | 
|---|
| 1864 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) | 
|---|
| 1865 | IPCB(skb)->flags |= IPSKB_NOPOLICY; | 
|---|
| 1866 |  | 
|---|
| 1867 | fnhe = find_exception(nhc, daddr); | 
|---|
| 1868 | if (do_cache) { | 
|---|
| 1869 | if (fnhe) | 
|---|
| 1870 | rth = rcu_dereference(fnhe->fnhe_rth_input); | 
|---|
| 1871 | else | 
|---|
| 1872 | rth = rcu_dereference(nhc->nhc_rth_input); | 
|---|
| 1873 | if (rt_cache_valid(rt: rth)) { | 
|---|
| 1874 | skb_dst_set_noref(skb, dst: &rth->dst); | 
|---|
| 1875 | goto out; | 
|---|
| 1876 | } | 
|---|
| 1877 | } | 
|---|
| 1878 |  | 
|---|
| 1879 | rth = rt_dst_alloc(out_dev->dev, 0, res->type, | 
|---|
| 1880 | IN_DEV_ORCONF(out_dev, NOXFRM)); | 
|---|
| 1881 | if (!rth) { | 
|---|
| 1882 | reason = SKB_DROP_REASON_NOMEM; | 
|---|
| 1883 | goto cleanup; | 
|---|
| 1884 | } | 
|---|
| 1885 |  | 
|---|
| 1886 | rth->rt_is_input = 1; | 
|---|
| 1887 | RT_CACHE_STAT_INC(in_slow_tot); | 
|---|
| 1888 |  | 
|---|
| 1889 | rth->dst.input = ip_forward; | 
|---|
| 1890 |  | 
|---|
| 1891 | rt_set_nexthop(rt: rth, daddr, res, fnhe, fi: res->fi, type: res->type, itag, | 
|---|
| 1892 | do_cache); | 
|---|
| 1893 | lwtunnel_set_redirect(dst: &rth->dst); | 
|---|
| 1894 | skb_dst_set(skb, dst: &rth->dst); | 
|---|
| 1895 | out: | 
|---|
| 1896 | reason = SKB_NOT_DROPPED_YET; | 
|---|
| 1897 | cleanup: | 
|---|
| 1898 | return reason; | 
|---|
| 1899 | } | 
|---|
| 1900 |  | 
|---|
| 1901 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 
|---|
| 1902 | /* To make ICMP packets follow the right flow, the multipath hash is | 
|---|
| 1903 | * calculated from the inner IP addresses. | 
|---|
| 1904 | */ | 
|---|
| 1905 | static void ip_multipath_l3_keys(const struct sk_buff *skb, | 
|---|
| 1906 | struct flow_keys *hash_keys) | 
|---|
| 1907 | { | 
|---|
| 1908 | const struct iphdr *outer_iph = ip_hdr(skb); | 
|---|
| 1909 | const struct iphdr *key_iph = outer_iph; | 
|---|
| 1910 | const struct iphdr *inner_iph; | 
|---|
| 1911 | const struct icmphdr *icmph; | 
|---|
| 1912 | struct iphdr _inner_iph; | 
|---|
| 1913 | struct icmphdr _icmph; | 
|---|
| 1914 |  | 
|---|
| 1915 | if (likely(outer_iph->protocol != IPPROTO_ICMP)) | 
|---|
| 1916 | goto out; | 
|---|
| 1917 |  | 
|---|
| 1918 | if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) | 
|---|
| 1919 | goto out; | 
|---|
| 1920 |  | 
|---|
| 1921 | icmph = skb_header_pointer(skb, offset: outer_iph->ihl * 4, len: sizeof(_icmph), | 
|---|
| 1922 | buffer: &_icmph); | 
|---|
| 1923 | if (!icmph) | 
|---|
| 1924 | goto out; | 
|---|
| 1925 |  | 
|---|
| 1926 | if (!icmp_is_err(type: icmph->type)) | 
|---|
| 1927 | goto out; | 
|---|
| 1928 |  | 
|---|
| 1929 | inner_iph = skb_header_pointer(skb, | 
|---|
| 1930 | offset: outer_iph->ihl * 4 + sizeof(_icmph), | 
|---|
| 1931 | len: sizeof(_inner_iph), buffer: &_inner_iph); | 
|---|
| 1932 | if (!inner_iph) | 
|---|
| 1933 | goto out; | 
|---|
| 1934 |  | 
|---|
| 1935 | key_iph = inner_iph; | 
|---|
| 1936 | out: | 
|---|
| 1937 | hash_keys->addrs.v4addrs.src = key_iph->saddr; | 
|---|
| 1938 | hash_keys->addrs.v4addrs.dst = key_iph->daddr; | 
|---|
| 1939 | } | 
|---|
| 1940 |  | 
|---|
| 1941 | static u32 fib_multipath_custom_hash_outer(const struct net *net, | 
|---|
| 1942 | const struct sk_buff *skb, | 
|---|
| 1943 | bool *p_has_inner) | 
|---|
| 1944 | { | 
|---|
| 1945 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); | 
|---|
| 1946 | struct flow_keys keys, hash_keys; | 
|---|
| 1947 |  | 
|---|
| 1948 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) | 
|---|
| 1949 | return 0; | 
|---|
| 1950 |  | 
|---|
| 1951 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 1952 | skb_flow_dissect_flow_keys(skb, flow: &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); | 
|---|
| 1953 |  | 
|---|
| 1954 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 1955 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) | 
|---|
| 1956 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | 
|---|
| 1957 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) | 
|---|
| 1958 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; | 
|---|
| 1959 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) | 
|---|
| 1960 | hash_keys.basic.ip_proto = keys.basic.ip_proto; | 
|---|
| 1961 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) | 
|---|
| 1962 | hash_keys.ports.src = keys.ports.src; | 
|---|
| 1963 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) | 
|---|
| 1964 | hash_keys.ports.dst = keys.ports.dst; | 
|---|
| 1965 |  | 
|---|
| 1966 | *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); | 
|---|
| 1967 | return fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 1968 | } | 
|---|
| 1969 |  | 
|---|
| 1970 | static u32 fib_multipath_custom_hash_inner(const struct net *net, | 
|---|
| 1971 | const struct sk_buff *skb, | 
|---|
| 1972 | bool has_inner) | 
|---|
| 1973 | { | 
|---|
| 1974 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); | 
|---|
| 1975 | struct flow_keys keys, hash_keys; | 
|---|
| 1976 |  | 
|---|
| 1977 | /* We assume the packet carries an encapsulation, but if none was | 
|---|
| 1978 | * encountered during dissection of the outer flow, then there is no | 
|---|
| 1979 | * point in calling the flow dissector again. | 
|---|
| 1980 | */ | 
|---|
| 1981 | if (!has_inner) | 
|---|
| 1982 | return 0; | 
|---|
| 1983 |  | 
|---|
| 1984 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) | 
|---|
| 1985 | return 0; | 
|---|
| 1986 |  | 
|---|
| 1987 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 1988 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: 0); | 
|---|
| 1989 |  | 
|---|
| 1990 | if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) | 
|---|
| 1991 | return 0; | 
|---|
| 1992 |  | 
|---|
| 1993 | if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { | 
|---|
| 1994 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 1995 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) | 
|---|
| 1996 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | 
|---|
| 1997 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) | 
|---|
| 1998 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; | 
|---|
| 1999 | } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { | 
|---|
| 2000 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; | 
|---|
| 2001 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) | 
|---|
| 2002 | hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; | 
|---|
| 2003 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) | 
|---|
| 2004 | hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; | 
|---|
| 2005 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) | 
|---|
| 2006 | hash_keys.tags.flow_label = keys.tags.flow_label; | 
|---|
| 2007 | } | 
|---|
| 2008 |  | 
|---|
| 2009 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) | 
|---|
| 2010 | hash_keys.basic.ip_proto = keys.basic.ip_proto; | 
|---|
| 2011 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) | 
|---|
| 2012 | hash_keys.ports.src = keys.ports.src; | 
|---|
| 2013 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) | 
|---|
| 2014 | hash_keys.ports.dst = keys.ports.dst; | 
|---|
| 2015 |  | 
|---|
| 2016 | return fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 2017 | } | 
|---|
| 2018 |  | 
|---|
| 2019 | static u32 fib_multipath_custom_hash_skb(const struct net *net, | 
|---|
| 2020 | const struct sk_buff *skb) | 
|---|
| 2021 | { | 
|---|
| 2022 | u32 mhash, mhash_inner; | 
|---|
| 2023 | bool has_inner = true; | 
|---|
| 2024 |  | 
|---|
| 2025 | mhash = fib_multipath_custom_hash_outer(net, skb, p_has_inner: &has_inner); | 
|---|
| 2026 | mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); | 
|---|
| 2027 |  | 
|---|
| 2028 | return jhash_2words(a: mhash, b: mhash_inner, initval: 0); | 
|---|
| 2029 | } | 
|---|
| 2030 |  | 
|---|
| 2031 | static u32 fib_multipath_custom_hash_fl4(const struct net *net, | 
|---|
| 2032 | const struct flowi4 *fl4) | 
|---|
| 2033 | { | 
|---|
| 2034 | u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); | 
|---|
| 2035 | struct flow_keys hash_keys; | 
|---|
| 2036 |  | 
|---|
| 2037 | if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) | 
|---|
| 2038 | return 0; | 
|---|
| 2039 |  | 
|---|
| 2040 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 2041 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2042 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) | 
|---|
| 2043 | hash_keys.addrs.v4addrs.src = fl4->saddr; | 
|---|
| 2044 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) | 
|---|
| 2045 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | 
|---|
| 2046 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) | 
|---|
| 2047 | hash_keys.basic.ip_proto = fl4->flowi4_proto; | 
|---|
| 2048 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) { | 
|---|
| 2049 | if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) | 
|---|
| 2050 | hash_keys.ports.src = (__force __be16)get_random_u16(); | 
|---|
| 2051 | else | 
|---|
| 2052 | hash_keys.ports.src = fl4->fl4_sport; | 
|---|
| 2053 | } | 
|---|
| 2054 | if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) | 
|---|
| 2055 | hash_keys.ports.dst = fl4->fl4_dport; | 
|---|
| 2056 |  | 
|---|
| 2057 | return fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 2058 | } | 
|---|
| 2059 |  | 
|---|
| 2060 | /* if skb is set it will be used and fl4 can be NULL */ | 
|---|
| 2061 | int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, | 
|---|
| 2062 | const struct sk_buff *skb, struct flow_keys *flkeys) | 
|---|
| 2063 | { | 
|---|
| 2064 | u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; | 
|---|
| 2065 | struct flow_keys hash_keys; | 
|---|
| 2066 | u32 mhash = 0; | 
|---|
| 2067 |  | 
|---|
| 2068 | switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { | 
|---|
| 2069 | case 0: | 
|---|
| 2070 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 2071 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2072 | if (skb) { | 
|---|
| 2073 | ip_multipath_l3_keys(skb, hash_keys: &hash_keys); | 
|---|
| 2074 | } else { | 
|---|
| 2075 | hash_keys.addrs.v4addrs.src = fl4->saddr; | 
|---|
| 2076 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | 
|---|
| 2077 | } | 
|---|
| 2078 | mhash = fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 2079 | break; | 
|---|
| 2080 | case 1: | 
|---|
| 2081 | /* skb is currently provided only when forwarding */ | 
|---|
| 2082 | if (skb) { | 
|---|
| 2083 | unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; | 
|---|
| 2084 | struct flow_keys keys; | 
|---|
| 2085 |  | 
|---|
| 2086 | /* short-circuit if we already have L4 hash present */ | 
|---|
| 2087 | if (skb->l4_hash) | 
|---|
| 2088 | return skb_get_hash_raw(skb) >> 1; | 
|---|
| 2089 |  | 
|---|
| 2090 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 2091 |  | 
|---|
| 2092 | if (!flkeys) { | 
|---|
| 2093 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: flag); | 
|---|
| 2094 | flkeys = &keys; | 
|---|
| 2095 | } | 
|---|
| 2096 |  | 
|---|
| 2097 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2098 | hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; | 
|---|
| 2099 | hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; | 
|---|
| 2100 | hash_keys.ports.src = flkeys->ports.src; | 
|---|
| 2101 | hash_keys.ports.dst = flkeys->ports.dst; | 
|---|
| 2102 | hash_keys.basic.ip_proto = flkeys->basic.ip_proto; | 
|---|
| 2103 | } else { | 
|---|
| 2104 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 2105 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2106 | hash_keys.addrs.v4addrs.src = fl4->saddr; | 
|---|
| 2107 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | 
|---|
| 2108 | if (fl4->flowi4_flags & FLOWI_FLAG_ANY_SPORT) | 
|---|
| 2109 | hash_keys.ports.src = (__force __be16)get_random_u16(); | 
|---|
| 2110 | else | 
|---|
| 2111 | hash_keys.ports.src = fl4->fl4_sport; | 
|---|
| 2112 | hash_keys.ports.dst = fl4->fl4_dport; | 
|---|
| 2113 | hash_keys.basic.ip_proto = fl4->flowi4_proto; | 
|---|
| 2114 | } | 
|---|
| 2115 | mhash = fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 2116 | break; | 
|---|
| 2117 | case 2: | 
|---|
| 2118 | memset(s: &hash_keys, c: 0, n: sizeof(hash_keys)); | 
|---|
| 2119 | /* skb is currently provided only when forwarding */ | 
|---|
| 2120 | if (skb) { | 
|---|
| 2121 | struct flow_keys keys; | 
|---|
| 2122 |  | 
|---|
| 2123 | skb_flow_dissect_flow_keys(skb, flow: &keys, flags: 0); | 
|---|
| 2124 | /* Inner can be v4 or v6 */ | 
|---|
| 2125 | if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { | 
|---|
| 2126 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2127 | hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | 
|---|
| 2128 | hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; | 
|---|
| 2129 | } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { | 
|---|
| 2130 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; | 
|---|
| 2131 | hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; | 
|---|
| 2132 | hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; | 
|---|
| 2133 | hash_keys.tags.flow_label = keys.tags.flow_label; | 
|---|
| 2134 | hash_keys.basic.ip_proto = keys.basic.ip_proto; | 
|---|
| 2135 | } else { | 
|---|
| 2136 | /* Same as case 0 */ | 
|---|
| 2137 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2138 | ip_multipath_l3_keys(skb, hash_keys: &hash_keys); | 
|---|
| 2139 | } | 
|---|
| 2140 | } else { | 
|---|
| 2141 | /* Same as case 0 */ | 
|---|
| 2142 | hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; | 
|---|
| 2143 | hash_keys.addrs.v4addrs.src = fl4->saddr; | 
|---|
| 2144 | hash_keys.addrs.v4addrs.dst = fl4->daddr; | 
|---|
| 2145 | } | 
|---|
| 2146 | mhash = fib_multipath_hash_from_keys(net, keys: &hash_keys); | 
|---|
| 2147 | break; | 
|---|
| 2148 | case 3: | 
|---|
| 2149 | if (skb) | 
|---|
| 2150 | mhash = fib_multipath_custom_hash_skb(net, skb); | 
|---|
| 2151 | else | 
|---|
| 2152 | mhash = fib_multipath_custom_hash_fl4(net, fl4); | 
|---|
| 2153 | break; | 
|---|
| 2154 | } | 
|---|
| 2155 |  | 
|---|
| 2156 | if (multipath_hash) | 
|---|
| 2157 | mhash = jhash_2words(a: mhash, b: multipath_hash, initval: 0); | 
|---|
| 2158 |  | 
|---|
| 2159 | return mhash >> 1; | 
|---|
| 2160 | } | 
|---|
| 2161 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | 
|---|
| 2162 |  | 
|---|
| 2163 | static enum skb_drop_reason | 
|---|
| 2164 | ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, | 
|---|
| 2165 | struct in_device *in_dev, __be32 daddr, | 
|---|
| 2166 | __be32 saddr, dscp_t dscp, struct flow_keys *hkeys) | 
|---|
| 2167 | { | 
|---|
| 2168 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 
|---|
| 2169 | if (res->fi && fib_info_num_path(fi: res->fi) > 1) { | 
|---|
| 2170 | int h = fib_multipath_hash(net: res->fi->fib_net, NULL, skb, flkeys: hkeys); | 
|---|
| 2171 |  | 
|---|
| 2172 | fib_select_multipath(res, hash: h, NULL); | 
|---|
| 2173 | IPCB(skb)->flags |= IPSKB_MULTIPATH; | 
|---|
| 2174 | } | 
|---|
| 2175 | #endif | 
|---|
| 2176 |  | 
|---|
| 2177 | /* create a routing cache entry */ | 
|---|
| 2178 | return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp); | 
|---|
| 2179 | } | 
|---|
| 2180 |  | 
|---|
| 2181 | /* Implements all the saddr-related checks as ip_route_input_slow(), | 
|---|
| 2182 | * assuming daddr is valid and the destination is not a local broadcast one. | 
|---|
| 2183 | * Uses the provided hint instead of performing a route lookup. | 
|---|
| 2184 | */ | 
|---|
| 2185 | enum skb_drop_reason | 
|---|
| 2186 | ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 
|---|
| 2187 | dscp_t dscp, struct net_device *dev, | 
|---|
| 2188 | const struct sk_buff *hint) | 
|---|
| 2189 | { | 
|---|
| 2190 | enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; | 
|---|
| 2191 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 
|---|
| 2192 | struct rtable *rt = skb_rtable(skb: hint); | 
|---|
| 2193 | struct net *net = dev_net(dev); | 
|---|
| 2194 | u32 tag = 0; | 
|---|
| 2195 |  | 
|---|
| 2196 | if (!in_dev) | 
|---|
| 2197 | return reason; | 
|---|
| 2198 |  | 
|---|
| 2199 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr)) { | 
|---|
| 2200 | reason = SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 2201 | goto martian_source; | 
|---|
| 2202 | } | 
|---|
| 2203 |  | 
|---|
| 2204 | if (ipv4_is_zeronet(addr: saddr)) { | 
|---|
| 2205 | reason = SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 2206 | goto martian_source; | 
|---|
| 2207 | } | 
|---|
| 2208 |  | 
|---|
| 2209 | if (ipv4_is_loopback(addr: saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { | 
|---|
| 2210 | reason = SKB_DROP_REASON_IP_LOCALNET; | 
|---|
| 2211 | goto martian_source; | 
|---|
| 2212 | } | 
|---|
| 2213 |  | 
|---|
| 2214 | if (!(rt->rt_flags & RTCF_LOCAL)) | 
|---|
| 2215 | goto skip_validate_source; | 
|---|
| 2216 |  | 
|---|
| 2217 | reason = fib_validate_source_reason(skb, src: saddr, dst: daddr, dscp, oif: 0, dev, | 
|---|
| 2218 | idev: in_dev, itag: &tag); | 
|---|
| 2219 | if (reason) | 
|---|
| 2220 | goto martian_source; | 
|---|
| 2221 |  | 
|---|
| 2222 | skip_validate_source: | 
|---|
| 2223 | skb_dst_copy(nskb: skb, oskb: hint); | 
|---|
| 2224 | return SKB_NOT_DROPPED_YET; | 
|---|
| 2225 |  | 
|---|
| 2226 | martian_source: | 
|---|
| 2227 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 
|---|
| 2228 | return reason; | 
|---|
| 2229 | } | 
|---|
| 2230 |  | 
|---|
| 2231 | /* get device for dst_alloc with local routes */ | 
|---|
| 2232 | static struct net_device *ip_rt_get_dev(struct net *net, | 
|---|
| 2233 | const struct fib_result *res) | 
|---|
| 2234 | { | 
|---|
| 2235 | struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; | 
|---|
| 2236 | struct net_device *dev = NULL; | 
|---|
| 2237 |  | 
|---|
| 2238 | if (nhc) | 
|---|
| 2239 | dev = l3mdev_master_dev_rcu(dev: nhc->nhc_dev); | 
|---|
| 2240 |  | 
|---|
| 2241 | return dev ? : net->loopback_dev; | 
|---|
| 2242 | } | 
|---|
| 2243 |  | 
|---|
| 2244 | /* | 
|---|
| 2245 | *	NOTE. We drop all the packets that has local source | 
|---|
| 2246 | *	addresses, because every properly looped back packet | 
|---|
| 2247 | *	must have correct destination already attached by output routine. | 
|---|
| 2248 | *	Changes in the enforced policies must be applied also to | 
|---|
| 2249 | *	ip_route_use_hint(). | 
|---|
| 2250 | * | 
|---|
| 2251 | *	Such approach solves two big problems: | 
|---|
| 2252 | *	1. Not simplex devices are handled properly. | 
|---|
| 2253 | *	2. IP spoofing attempts are filtered with 100% of guarantee. | 
|---|
| 2254 | *	called with rcu_read_lock() | 
|---|
| 2255 | */ | 
|---|
| 2256 |  | 
|---|
| 2257 | static enum skb_drop_reason | 
|---|
| 2258 | ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 
|---|
| 2259 | dscp_t dscp, struct net_device *dev, | 
|---|
| 2260 | struct fib_result *res) | 
|---|
| 2261 | { | 
|---|
| 2262 | enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; | 
|---|
| 2263 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 
|---|
| 2264 | struct flow_keys *flkeys = NULL, _flkeys; | 
|---|
| 2265 | struct net    *net = dev_net(dev); | 
|---|
| 2266 | struct ip_tunnel_info *tun_info; | 
|---|
| 2267 | int		err = -EINVAL; | 
|---|
| 2268 | unsigned int	flags = 0; | 
|---|
| 2269 | u32		itag = 0; | 
|---|
| 2270 | struct rtable	*rth; | 
|---|
| 2271 | struct flowi4	fl4; | 
|---|
| 2272 | bool do_cache = true; | 
|---|
| 2273 |  | 
|---|
| 2274 | /* IP on this device is disabled. */ | 
|---|
| 2275 |  | 
|---|
| 2276 | if (!in_dev) | 
|---|
| 2277 | goto out; | 
|---|
| 2278 |  | 
|---|
| 2279 | /* Check for the most weird martians, which can be not detected | 
|---|
| 2280 | * by fib_lookup. | 
|---|
| 2281 | */ | 
|---|
| 2282 |  | 
|---|
| 2283 | tun_info = skb_tunnel_info(skb); | 
|---|
| 2284 | if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) | 
|---|
| 2285 | fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; | 
|---|
| 2286 | else | 
|---|
| 2287 | fl4.flowi4_tun_key.tun_id = 0; | 
|---|
| 2288 | skb_dst_drop(skb); | 
|---|
| 2289 |  | 
|---|
| 2290 | if (ipv4_is_multicast(addr: saddr) || ipv4_is_lbcast(addr: saddr)) { | 
|---|
| 2291 | reason = SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 2292 | goto martian_source; | 
|---|
| 2293 | } | 
|---|
| 2294 |  | 
|---|
| 2295 | res->fi = NULL; | 
|---|
| 2296 | res->table = NULL; | 
|---|
| 2297 | if (ipv4_is_lbcast(addr: daddr) || (saddr == 0 && daddr == 0)) | 
|---|
| 2298 | goto brd_input; | 
|---|
| 2299 |  | 
|---|
| 2300 | /* Accept zero addresses only to limited broadcast; | 
|---|
| 2301 | * I even do not know to fix it or not. Waiting for complains :-) | 
|---|
| 2302 | */ | 
|---|
| 2303 | if (ipv4_is_zeronet(addr: saddr)) { | 
|---|
| 2304 | reason = SKB_DROP_REASON_IP_INVALID_SOURCE; | 
|---|
| 2305 | goto martian_source; | 
|---|
| 2306 | } | 
|---|
| 2307 |  | 
|---|
| 2308 | if (ipv4_is_zeronet(addr: daddr)) { | 
|---|
| 2309 | reason = SKB_DROP_REASON_IP_INVALID_DEST; | 
|---|
| 2310 | goto martian_destination; | 
|---|
| 2311 | } | 
|---|
| 2312 |  | 
|---|
| 2313 | /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), | 
|---|
| 2314 | * and call it once if daddr or/and saddr are loopback addresses | 
|---|
| 2315 | */ | 
|---|
| 2316 | if (ipv4_is_loopback(addr: daddr)) { | 
|---|
| 2317 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { | 
|---|
| 2318 | reason = SKB_DROP_REASON_IP_LOCALNET; | 
|---|
| 2319 | goto martian_destination; | 
|---|
| 2320 | } | 
|---|
| 2321 | } else if (ipv4_is_loopback(addr: saddr)) { | 
|---|
| 2322 | if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { | 
|---|
| 2323 | reason = SKB_DROP_REASON_IP_LOCALNET; | 
|---|
| 2324 | goto martian_source; | 
|---|
| 2325 | } | 
|---|
| 2326 | } | 
|---|
| 2327 |  | 
|---|
| 2328 | /* | 
|---|
| 2329 | *	Now we are ready to route packet. | 
|---|
| 2330 | */ | 
|---|
| 2331 | fl4.flowi4_l3mdev = 0; | 
|---|
| 2332 | fl4.flowi4_oif = 0; | 
|---|
| 2333 | fl4.flowi4_iif = dev->ifindex; | 
|---|
| 2334 | fl4.flowi4_mark = skb->mark; | 
|---|
| 2335 | fl4.flowi4_dscp = dscp; | 
|---|
| 2336 | fl4.flowi4_scope = RT_SCOPE_UNIVERSE; | 
|---|
| 2337 | fl4.flowi4_flags = 0; | 
|---|
| 2338 | fl4.daddr = daddr; | 
|---|
| 2339 | fl4.saddr = saddr; | 
|---|
| 2340 | fl4.flowi4_uid = sock_net_uid(net, NULL); | 
|---|
| 2341 | fl4.flowi4_multipath_hash = 0; | 
|---|
| 2342 |  | 
|---|
| 2343 | if (fib4_rules_early_flow_dissect(net, skb, fl4: &fl4, flkeys: &_flkeys)) { | 
|---|
| 2344 | flkeys = &_flkeys; | 
|---|
| 2345 | } else { | 
|---|
| 2346 | fl4.flowi4_proto = 0; | 
|---|
| 2347 | fl4.fl4_sport = 0; | 
|---|
| 2348 | fl4.fl4_dport = 0; | 
|---|
| 2349 | } | 
|---|
| 2350 |  | 
|---|
| 2351 | err = fib_lookup(net, flp: &fl4, res, flags: 0); | 
|---|
| 2352 | if (err != 0) { | 
|---|
| 2353 | if (!IN_DEV_FORWARD(in_dev)) | 
|---|
| 2354 | err = -EHOSTUNREACH; | 
|---|
| 2355 | goto no_route; | 
|---|
| 2356 | } | 
|---|
| 2357 |  | 
|---|
| 2358 | if (res->type == RTN_BROADCAST) { | 
|---|
| 2359 | if (IN_DEV_BFORWARD(in_dev)) | 
|---|
| 2360 | goto make_route; | 
|---|
| 2361 | /* not do cache if bc_forwarding is enabled */ | 
|---|
| 2362 | if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING)) | 
|---|
| 2363 | do_cache = false; | 
|---|
| 2364 | goto brd_input; | 
|---|
| 2365 | } | 
|---|
| 2366 |  | 
|---|
| 2367 | err = -EINVAL; | 
|---|
| 2368 | if (res->type == RTN_LOCAL) { | 
|---|
| 2369 | reason = fib_validate_source_reason(skb, src: saddr, dst: daddr, dscp, | 
|---|
| 2370 | oif: 0, dev, idev: in_dev, itag: &itag); | 
|---|
| 2371 | if (reason) | 
|---|
| 2372 | goto martian_source; | 
|---|
| 2373 | goto local_input; | 
|---|
| 2374 | } | 
|---|
| 2375 |  | 
|---|
| 2376 | if (!IN_DEV_FORWARD(in_dev)) { | 
|---|
| 2377 | err = -EHOSTUNREACH; | 
|---|
| 2378 | goto no_route; | 
|---|
| 2379 | } | 
|---|
| 2380 | if (res->type != RTN_UNICAST) { | 
|---|
| 2381 | reason = SKB_DROP_REASON_IP_INVALID_DEST; | 
|---|
| 2382 | goto martian_destination; | 
|---|
| 2383 | } | 
|---|
| 2384 |  | 
|---|
| 2385 | make_route: | 
|---|
| 2386 | reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, | 
|---|
| 2387 | hkeys: flkeys); | 
|---|
| 2388 |  | 
|---|
| 2389 | out: | 
|---|
| 2390 | return reason; | 
|---|
| 2391 |  | 
|---|
| 2392 | brd_input: | 
|---|
| 2393 | if (skb->protocol != htons(ETH_P_IP)) { | 
|---|
| 2394 | reason = SKB_DROP_REASON_INVALID_PROTO; | 
|---|
| 2395 | goto out; | 
|---|
| 2396 | } | 
|---|
| 2397 |  | 
|---|
| 2398 | if (!ipv4_is_zeronet(addr: saddr)) { | 
|---|
| 2399 | reason = fib_validate_source_reason(skb, src: saddr, dst: 0, dscp, oif: 0, | 
|---|
| 2400 | dev, idev: in_dev, itag: &itag); | 
|---|
| 2401 | if (reason) | 
|---|
| 2402 | goto martian_source; | 
|---|
| 2403 | } | 
|---|
| 2404 | flags |= RTCF_BROADCAST; | 
|---|
| 2405 | res->type = RTN_BROADCAST; | 
|---|
| 2406 | RT_CACHE_STAT_INC(in_brd); | 
|---|
| 2407 |  | 
|---|
| 2408 | local_input: | 
|---|
| 2409 | if (IN_DEV_ORCONF(in_dev, NOPOLICY)) | 
|---|
| 2410 | IPCB(skb)->flags |= IPSKB_NOPOLICY; | 
|---|
| 2411 |  | 
|---|
| 2412 | do_cache &= res->fi && !itag; | 
|---|
| 2413 | if (do_cache) { | 
|---|
| 2414 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); | 
|---|
| 2415 |  | 
|---|
| 2416 | rth = rcu_dereference(nhc->nhc_rth_input); | 
|---|
| 2417 | if (rt_cache_valid(rt: rth)) { | 
|---|
| 2418 | skb_dst_set_noref(skb, dst: &rth->dst); | 
|---|
| 2419 | reason = SKB_NOT_DROPPED_YET; | 
|---|
| 2420 | goto out; | 
|---|
| 2421 | } | 
|---|
| 2422 | } | 
|---|
| 2423 |  | 
|---|
| 2424 | rth = rt_dst_alloc(ip_rt_get_dev(net, res), | 
|---|
| 2425 | flags | RTCF_LOCAL, res->type, false); | 
|---|
| 2426 | if (!rth) | 
|---|
| 2427 | goto e_nobufs; | 
|---|
| 2428 |  | 
|---|
| 2429 | rth->dst.output= ip_rt_bug; | 
|---|
| 2430 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 2431 | rth->dst.tclassid = itag; | 
|---|
| 2432 | #endif | 
|---|
| 2433 | rth->rt_is_input = 1; | 
|---|
| 2434 |  | 
|---|
| 2435 | RT_CACHE_STAT_INC(in_slow_tot); | 
|---|
| 2436 | if (res->type == RTN_UNREACHABLE) { | 
|---|
| 2437 | rth->dst.input= ip_error; | 
|---|
| 2438 | rth->dst.error= -err; | 
|---|
| 2439 | rth->rt_flags	&= ~RTCF_LOCAL; | 
|---|
| 2440 | } | 
|---|
| 2441 |  | 
|---|
| 2442 | if (do_cache) { | 
|---|
| 2443 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); | 
|---|
| 2444 |  | 
|---|
| 2445 | rth->dst.lwtstate = lwtstate_get(lws: nhc->nhc_lwtstate); | 
|---|
| 2446 | if (lwtunnel_input_redirect(lwtstate: rth->dst.lwtstate)) { | 
|---|
| 2447 | WARN_ON(rth->dst.input == lwtunnel_input); | 
|---|
| 2448 | rth->dst.lwtstate->orig_input = rth->dst.input; | 
|---|
| 2449 | rth->dst.input = lwtunnel_input; | 
|---|
| 2450 | } | 
|---|
| 2451 |  | 
|---|
| 2452 | if (unlikely(!rt_cache_route(nhc, rth))) | 
|---|
| 2453 | rt_add_uncached_list(rt: rth); | 
|---|
| 2454 | } | 
|---|
| 2455 | skb_dst_set(skb, dst: &rth->dst); | 
|---|
| 2456 | reason = SKB_NOT_DROPPED_YET; | 
|---|
| 2457 | goto out; | 
|---|
| 2458 |  | 
|---|
| 2459 | no_route: | 
|---|
| 2460 | RT_CACHE_STAT_INC(in_no_route); | 
|---|
| 2461 | res->type = RTN_UNREACHABLE; | 
|---|
| 2462 | res->fi = NULL; | 
|---|
| 2463 | res->table = NULL; | 
|---|
| 2464 | goto local_input; | 
|---|
| 2465 |  | 
|---|
| 2466 | /* | 
|---|
| 2467 | *	Do not cache martian addresses: they should be logged (RFC1812) | 
|---|
| 2468 | */ | 
|---|
| 2469 | martian_destination: | 
|---|
| 2470 | RT_CACHE_STAT_INC(in_martian_dst); | 
|---|
| 2471 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 
|---|
| 2472 | if (IN_DEV_LOG_MARTIANS(in_dev)) | 
|---|
| 2473 | net_warn_ratelimited( "martian destination %pI4 from %pI4, dev %s\n", | 
|---|
| 2474 | &daddr, &saddr, dev->name); | 
|---|
| 2475 | #endif | 
|---|
| 2476 | goto out; | 
|---|
| 2477 |  | 
|---|
| 2478 | e_nobufs: | 
|---|
| 2479 | reason = SKB_DROP_REASON_NOMEM; | 
|---|
| 2480 | goto out; | 
|---|
| 2481 |  | 
|---|
| 2482 | martian_source: | 
|---|
| 2483 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 
|---|
| 2484 | goto out; | 
|---|
| 2485 | } | 
|---|
| 2486 |  | 
|---|
| 2487 | /* called with rcu_read_lock held */ | 
|---|
| 2488 | static enum skb_drop_reason | 
|---|
| 2489 | ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 
|---|
| 2490 | dscp_t dscp, struct net_device *dev, | 
|---|
| 2491 | struct fib_result *res) | 
|---|
| 2492 | { | 
|---|
| 2493 | /* Multicast recognition logic is moved from route cache to here. | 
|---|
| 2494 | * The problem was that too many Ethernet cards have broken/missing | 
|---|
| 2495 | * hardware multicast filters :-( As result the host on multicasting | 
|---|
| 2496 | * network acquires a lot of useless route cache entries, sort of | 
|---|
| 2497 | * SDR messages from all the world. Now we try to get rid of them. | 
|---|
| 2498 | * Really, provided software IP multicast filter is organized | 
|---|
| 2499 | * reasonably (at least, hashed), it does not result in a slowdown | 
|---|
| 2500 | * comparing with route cache reject entries. | 
|---|
| 2501 | * Note, that multicast routers are not affected, because | 
|---|
| 2502 | * route cache entry is created eventually. | 
|---|
| 2503 | */ | 
|---|
| 2504 | if (ipv4_is_multicast(addr: daddr)) { | 
|---|
| 2505 | enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; | 
|---|
| 2506 | struct in_device *in_dev = __in_dev_get_rcu(dev); | 
|---|
| 2507 | int our = 0; | 
|---|
| 2508 |  | 
|---|
| 2509 | if (!in_dev) | 
|---|
| 2510 | return reason; | 
|---|
| 2511 |  | 
|---|
| 2512 | our = ip_check_mc_rcu(dev: in_dev, mc_addr: daddr, src_addr: saddr, | 
|---|
| 2513 | proto: ip_hdr(skb)->protocol); | 
|---|
| 2514 |  | 
|---|
| 2515 | /* check l3 master if no match yet */ | 
|---|
| 2516 | if (!our && netif_is_l3_slave(dev)) { | 
|---|
| 2517 | struct in_device *l3_in_dev; | 
|---|
| 2518 |  | 
|---|
| 2519 | l3_in_dev = __in_dev_get_rcu(dev: skb->dev); | 
|---|
| 2520 | if (l3_in_dev) | 
|---|
| 2521 | our = ip_check_mc_rcu(dev: l3_in_dev, mc_addr: daddr, src_addr: saddr, | 
|---|
| 2522 | proto: ip_hdr(skb)->protocol); | 
|---|
| 2523 | } | 
|---|
| 2524 |  | 
|---|
| 2525 | if (our | 
|---|
| 2526 | #ifdef CONFIG_IP_MROUTE | 
|---|
| 2527 | || | 
|---|
| 2528 | (!ipv4_is_local_multicast(addr: daddr) && | 
|---|
| 2529 | IN_DEV_MFORWARD(in_dev)) | 
|---|
| 2530 | #endif | 
|---|
| 2531 | ) { | 
|---|
| 2532 | reason = ip_route_input_mc(skb, daddr, saddr, dscp, | 
|---|
| 2533 | dev, our); | 
|---|
| 2534 | } | 
|---|
| 2535 | return reason; | 
|---|
| 2536 | } | 
|---|
| 2537 |  | 
|---|
| 2538 | return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res); | 
|---|
| 2539 | } | 
|---|
| 2540 |  | 
|---|
| 2541 | enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, | 
|---|
| 2542 | __be32 saddr, dscp_t dscp, | 
|---|
| 2543 | struct net_device *dev) | 
|---|
| 2544 | { | 
|---|
| 2545 | enum skb_drop_reason reason; | 
|---|
| 2546 | struct fib_result res; | 
|---|
| 2547 |  | 
|---|
| 2548 | rcu_read_lock(); | 
|---|
| 2549 | reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, res: &res); | 
|---|
| 2550 | rcu_read_unlock(); | 
|---|
| 2551 |  | 
|---|
| 2552 | return reason; | 
|---|
| 2553 | } | 
|---|
| 2554 | EXPORT_SYMBOL(ip_route_input_noref); | 
|---|
| 2555 |  | 
|---|
| 2556 | /* called with rcu_read_lock() */ | 
|---|
| 2557 | static struct rtable *__mkroute_output(const struct fib_result *res, | 
|---|
| 2558 | const struct flowi4 *fl4, int orig_oif, | 
|---|
| 2559 | struct net_device *dev_out, | 
|---|
| 2560 | unsigned int flags) | 
|---|
| 2561 | { | 
|---|
| 2562 | struct fib_info *fi = res->fi; | 
|---|
| 2563 | struct fib_nh_exception *fnhe; | 
|---|
| 2564 | struct in_device *in_dev; | 
|---|
| 2565 | u16 type = res->type; | 
|---|
| 2566 | struct rtable *rth; | 
|---|
| 2567 | bool do_cache; | 
|---|
| 2568 |  | 
|---|
| 2569 | in_dev = __in_dev_get_rcu(dev: dev_out); | 
|---|
| 2570 | if (!in_dev) | 
|---|
| 2571 | return ERR_PTR(error: -EINVAL); | 
|---|
| 2572 |  | 
|---|
| 2573 | if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) | 
|---|
| 2574 | if (ipv4_is_loopback(addr: fl4->saddr) && | 
|---|
| 2575 | !(dev_out->flags & IFF_LOOPBACK) && | 
|---|
| 2576 | !netif_is_l3_master(dev: dev_out)) | 
|---|
| 2577 | return ERR_PTR(error: -EINVAL); | 
|---|
| 2578 |  | 
|---|
| 2579 | if (ipv4_is_lbcast(addr: fl4->daddr)) { | 
|---|
| 2580 | type = RTN_BROADCAST; | 
|---|
| 2581 |  | 
|---|
| 2582 | /* reset fi to prevent gateway resolution */ | 
|---|
| 2583 | fi = NULL; | 
|---|
| 2584 | } else if (ipv4_is_multicast(addr: fl4->daddr)) { | 
|---|
| 2585 | type = RTN_MULTICAST; | 
|---|
| 2586 | } else if (ipv4_is_zeronet(addr: fl4->daddr)) { | 
|---|
| 2587 | return ERR_PTR(error: -EINVAL); | 
|---|
| 2588 | } | 
|---|
| 2589 |  | 
|---|
| 2590 | if (dev_out->flags & IFF_LOOPBACK) | 
|---|
| 2591 | flags |= RTCF_LOCAL; | 
|---|
| 2592 |  | 
|---|
| 2593 | do_cache = true; | 
|---|
| 2594 | if (type == RTN_BROADCAST) { | 
|---|
| 2595 | flags |= RTCF_BROADCAST | RTCF_LOCAL; | 
|---|
| 2596 | } else if (type == RTN_MULTICAST) { | 
|---|
| 2597 | flags |= RTCF_MULTICAST | RTCF_LOCAL; | 
|---|
| 2598 | if (!ip_check_mc_rcu(dev: in_dev, mc_addr: fl4->daddr, src_addr: fl4->saddr, | 
|---|
| 2599 | proto: fl4->flowi4_proto)) | 
|---|
| 2600 | flags &= ~RTCF_LOCAL; | 
|---|
| 2601 | else | 
|---|
| 2602 | do_cache = false; | 
|---|
| 2603 | /* If multicast route do not exist use | 
|---|
| 2604 | * default one, but do not gateway in this case. | 
|---|
| 2605 | * Yes, it is hack. | 
|---|
| 2606 | */ | 
|---|
| 2607 | if (fi && res->prefixlen < 4) | 
|---|
| 2608 | fi = NULL; | 
|---|
| 2609 | } else if ((type == RTN_LOCAL) && (orig_oif != 0) && | 
|---|
| 2610 | (orig_oif != dev_out->ifindex)) { | 
|---|
| 2611 | /* For local routes that require a particular output interface | 
|---|
| 2612 | * we do not want to cache the result.  Caching the result | 
|---|
| 2613 | * causes incorrect behaviour when there are multiple source | 
|---|
| 2614 | * addresses on the interface, the end result being that if the | 
|---|
| 2615 | * intended recipient is waiting on that interface for the | 
|---|
| 2616 | * packet he won't receive it because it will be delivered on | 
|---|
| 2617 | * the loopback interface and the IP_PKTINFO ipi_ifindex will | 
|---|
| 2618 | * be set to the loopback interface as well. | 
|---|
| 2619 | */ | 
|---|
| 2620 | do_cache = false; | 
|---|
| 2621 | } | 
|---|
| 2622 |  | 
|---|
| 2623 | fnhe = NULL; | 
|---|
| 2624 | do_cache &= fi != NULL; | 
|---|
| 2625 | if (fi) { | 
|---|
| 2626 | struct fib_nh_common *nhc = FIB_RES_NHC(*res); | 
|---|
| 2627 | struct rtable __rcu **prth; | 
|---|
| 2628 |  | 
|---|
| 2629 | fnhe = find_exception(nhc, daddr: fl4->daddr); | 
|---|
| 2630 | if (!do_cache) | 
|---|
| 2631 | goto add; | 
|---|
| 2632 | if (fnhe) { | 
|---|
| 2633 | prth = &fnhe->fnhe_rth_output; | 
|---|
| 2634 | } else { | 
|---|
| 2635 | if (unlikely(fl4->flowi4_flags & | 
|---|
| 2636 | FLOWI_FLAG_KNOWN_NH && | 
|---|
| 2637 | !(nhc->nhc_gw_family && | 
|---|
| 2638 | nhc->nhc_scope == RT_SCOPE_LINK))) { | 
|---|
| 2639 | do_cache = false; | 
|---|
| 2640 | goto add; | 
|---|
| 2641 | } | 
|---|
| 2642 | prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); | 
|---|
| 2643 | } | 
|---|
| 2644 | rth = rcu_dereference(*prth); | 
|---|
| 2645 | if (rt_cache_valid(rt: rth) && dst_hold_safe(dst: &rth->dst)) | 
|---|
| 2646 | return rth; | 
|---|
| 2647 | } | 
|---|
| 2648 |  | 
|---|
| 2649 | add: | 
|---|
| 2650 | rth = rt_dst_alloc(dev_out, flags, type, | 
|---|
| 2651 | IN_DEV_ORCONF(in_dev, NOXFRM)); | 
|---|
| 2652 | if (!rth) | 
|---|
| 2653 | return ERR_PTR(error: -ENOBUFS); | 
|---|
| 2654 |  | 
|---|
| 2655 | rth->rt_iif = orig_oif; | 
|---|
| 2656 |  | 
|---|
| 2657 | RT_CACHE_STAT_INC(out_slow_tot); | 
|---|
| 2658 |  | 
|---|
| 2659 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 
|---|
| 2660 | if (flags & RTCF_LOCAL && | 
|---|
| 2661 | !(dev_out->flags & IFF_LOOPBACK)) { | 
|---|
| 2662 | rth->dst.output = ip_mc_output; | 
|---|
| 2663 | RT_CACHE_STAT_INC(out_slow_mc); | 
|---|
| 2664 | } | 
|---|
| 2665 | #ifdef CONFIG_IP_MROUTE | 
|---|
| 2666 | if (type == RTN_MULTICAST) { | 
|---|
| 2667 | if (IN_DEV_MFORWARD(in_dev) && | 
|---|
| 2668 | !ipv4_is_local_multicast(addr: fl4->daddr)) { | 
|---|
| 2669 | rth->dst.input = ip_mr_input; | 
|---|
| 2670 | rth->dst.output = ip_mr_output; | 
|---|
| 2671 | } | 
|---|
| 2672 | } | 
|---|
| 2673 | #endif | 
|---|
| 2674 | } | 
|---|
| 2675 |  | 
|---|
| 2676 | rt_set_nexthop(rt: rth, daddr: fl4->daddr, res, fnhe, fi, type, itag: 0, do_cache); | 
|---|
| 2677 | lwtunnel_set_redirect(dst: &rth->dst); | 
|---|
| 2678 |  | 
|---|
| 2679 | return rth; | 
|---|
| 2680 | } | 
|---|
| 2681 |  | 
|---|
| 2682 | /* | 
|---|
| 2683 | * Major route resolver routine. | 
|---|
| 2684 | */ | 
|---|
| 2685 |  | 
|---|
| 2686 | struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, | 
|---|
| 2687 | const struct sk_buff *skb) | 
|---|
| 2688 | { | 
|---|
| 2689 | struct fib_result res = { | 
|---|
| 2690 | .type		= RTN_UNSPEC, | 
|---|
| 2691 | .fi		= NULL, | 
|---|
| 2692 | .table		= NULL, | 
|---|
| 2693 | .tclassid	= 0, | 
|---|
| 2694 | }; | 
|---|
| 2695 | struct rtable *rth; | 
|---|
| 2696 |  | 
|---|
| 2697 | fl4->flowi4_iif = LOOPBACK_IFINDEX; | 
|---|
| 2698 |  | 
|---|
| 2699 | rcu_read_lock(); | 
|---|
| 2700 | rth = ip_route_output_key_hash_rcu(net, flp: fl4, res: &res, skb); | 
|---|
| 2701 | rcu_read_unlock(); | 
|---|
| 2702 |  | 
|---|
| 2703 | return rth; | 
|---|
| 2704 | } | 
|---|
| 2705 | EXPORT_SYMBOL_GPL(ip_route_output_key_hash); | 
|---|
| 2706 |  | 
|---|
| 2707 | struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, | 
|---|
| 2708 | struct fib_result *res, | 
|---|
| 2709 | const struct sk_buff *skb) | 
|---|
| 2710 | { | 
|---|
| 2711 | struct net_device *dev_out = NULL; | 
|---|
| 2712 | int orig_oif = fl4->flowi4_oif; | 
|---|
| 2713 | unsigned int flags = 0; | 
|---|
| 2714 | struct rtable *rth; | 
|---|
| 2715 | int err; | 
|---|
| 2716 |  | 
|---|
| 2717 | if (fl4->saddr) { | 
|---|
| 2718 | if (ipv4_is_multicast(addr: fl4->saddr) || | 
|---|
| 2719 | ipv4_is_lbcast(addr: fl4->saddr)) { | 
|---|
| 2720 | rth = ERR_PTR(error: -EINVAL); | 
|---|
| 2721 | goto out; | 
|---|
| 2722 | } | 
|---|
| 2723 |  | 
|---|
| 2724 | rth = ERR_PTR(error: -ENETUNREACH); | 
|---|
| 2725 |  | 
|---|
| 2726 | /* I removed check for oif == dev_out->oif here. | 
|---|
| 2727 | * It was wrong for two reasons: | 
|---|
| 2728 | * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr | 
|---|
| 2729 | *    is assigned to multiple interfaces. | 
|---|
| 2730 | * 2. Moreover, we are allowed to send packets with saddr | 
|---|
| 2731 | *    of another iface. --ANK | 
|---|
| 2732 | */ | 
|---|
| 2733 |  | 
|---|
| 2734 | if (fl4->flowi4_oif == 0 && | 
|---|
| 2735 | (ipv4_is_multicast(addr: fl4->daddr) || | 
|---|
| 2736 | ipv4_is_lbcast(addr: fl4->daddr))) { | 
|---|
| 2737 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 
|---|
| 2738 | dev_out = __ip_dev_find(net, addr: fl4->saddr, devref: false); | 
|---|
| 2739 | if (!dev_out) | 
|---|
| 2740 | goto out; | 
|---|
| 2741 |  | 
|---|
| 2742 | /* Special hack: user can direct multicasts | 
|---|
| 2743 | * and limited broadcast via necessary interface | 
|---|
| 2744 | * without fiddling with IP_MULTICAST_IF or IP_PKTINFO. | 
|---|
| 2745 | * This hack is not just for fun, it allows | 
|---|
| 2746 | * vic,vat and friends to work. | 
|---|
| 2747 | * They bind socket to loopback, set ttl to zero | 
|---|
| 2748 | * and expect that it will work. | 
|---|
| 2749 | * From the viewpoint of routing cache they are broken, | 
|---|
| 2750 | * because we are not allowed to build multicast path | 
|---|
| 2751 | * with loopback source addr (look, routing cache | 
|---|
| 2752 | * cannot know, that ttl is zero, so that packet | 
|---|
| 2753 | * will not leave this host and route is valid). | 
|---|
| 2754 | * Luckily, this hack is good workaround. | 
|---|
| 2755 | */ | 
|---|
| 2756 |  | 
|---|
| 2757 | fl4->flowi4_oif = dev_out->ifindex; | 
|---|
| 2758 | goto make_route; | 
|---|
| 2759 | } | 
|---|
| 2760 |  | 
|---|
| 2761 | if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { | 
|---|
| 2762 | /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ | 
|---|
| 2763 | if (!__ip_dev_find(net, addr: fl4->saddr, devref: false)) | 
|---|
| 2764 | goto out; | 
|---|
| 2765 | } | 
|---|
| 2766 | } | 
|---|
| 2767 |  | 
|---|
| 2768 |  | 
|---|
| 2769 | if (fl4->flowi4_oif) { | 
|---|
| 2770 | dev_out = dev_get_by_index_rcu(net, ifindex: fl4->flowi4_oif); | 
|---|
| 2771 | rth = ERR_PTR(error: -ENODEV); | 
|---|
| 2772 | if (!dev_out) | 
|---|
| 2773 | goto out; | 
|---|
| 2774 |  | 
|---|
| 2775 | /* RACE: Check return value of inet_select_addr instead. */ | 
|---|
| 2776 | if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev: dev_out)) { | 
|---|
| 2777 | rth = ERR_PTR(error: -ENETUNREACH); | 
|---|
| 2778 | goto out; | 
|---|
| 2779 | } | 
|---|
| 2780 | if (ipv4_is_local_multicast(addr: fl4->daddr) || | 
|---|
| 2781 | ipv4_is_lbcast(addr: fl4->daddr) || | 
|---|
| 2782 | fl4->flowi4_proto == IPPROTO_IGMP) { | 
|---|
| 2783 | if (!fl4->saddr) | 
|---|
| 2784 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, | 
|---|
| 2785 | scope: RT_SCOPE_LINK); | 
|---|
| 2786 | goto make_route; | 
|---|
| 2787 | } | 
|---|
| 2788 | if (!fl4->saddr) { | 
|---|
| 2789 | if (ipv4_is_multicast(addr: fl4->daddr)) | 
|---|
| 2790 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, | 
|---|
| 2791 | scope: fl4->flowi4_scope); | 
|---|
| 2792 | else if (!fl4->daddr) | 
|---|
| 2793 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, | 
|---|
| 2794 | scope: RT_SCOPE_HOST); | 
|---|
| 2795 | } | 
|---|
| 2796 | } | 
|---|
| 2797 |  | 
|---|
| 2798 | if (!fl4->daddr) { | 
|---|
| 2799 | fl4->daddr = fl4->saddr; | 
|---|
| 2800 | if (!fl4->daddr) | 
|---|
| 2801 | fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); | 
|---|
| 2802 | dev_out = net->loopback_dev; | 
|---|
| 2803 | fl4->flowi4_oif = LOOPBACK_IFINDEX; | 
|---|
| 2804 | res->type = RTN_LOCAL; | 
|---|
| 2805 | flags |= RTCF_LOCAL; | 
|---|
| 2806 | goto make_route; | 
|---|
| 2807 | } | 
|---|
| 2808 |  | 
|---|
| 2809 | err = fib_lookup(net, flp: fl4, res, flags: 0); | 
|---|
| 2810 | if (err) { | 
|---|
| 2811 | res->fi = NULL; | 
|---|
| 2812 | res->table = NULL; | 
|---|
| 2813 | if (fl4->flowi4_oif && | 
|---|
| 2814 | (ipv4_is_multicast(addr: fl4->daddr) || !fl4->flowi4_l3mdev)) { | 
|---|
| 2815 | /* Apparently, routing tables are wrong. Assume, | 
|---|
| 2816 | * that the destination is on link. | 
|---|
| 2817 | * | 
|---|
| 2818 | * WHY? DW. | 
|---|
| 2819 | * Because we are allowed to send to iface | 
|---|
| 2820 | * even if it has NO routes and NO assigned | 
|---|
| 2821 | * addresses. When oif is specified, routing | 
|---|
| 2822 | * tables are looked up with only one purpose: | 
|---|
| 2823 | * to catch if destination is gatewayed, rather than | 
|---|
| 2824 | * direct. Moreover, if MSG_DONTROUTE is set, | 
|---|
| 2825 | * we send packet, ignoring both routing tables | 
|---|
| 2826 | * and ifaddr state. --ANK | 
|---|
| 2827 | * | 
|---|
| 2828 | * | 
|---|
| 2829 | * We could make it even if oif is unknown, | 
|---|
| 2830 | * likely IPv6, but we do not. | 
|---|
| 2831 | */ | 
|---|
| 2832 |  | 
|---|
| 2833 | if (fl4->saddr == 0) | 
|---|
| 2834 | fl4->saddr = inet_select_addr(dev: dev_out, dst: 0, | 
|---|
| 2835 | scope: RT_SCOPE_LINK); | 
|---|
| 2836 | res->type = RTN_UNICAST; | 
|---|
| 2837 | goto make_route; | 
|---|
| 2838 | } | 
|---|
| 2839 | rth = ERR_PTR(error: err); | 
|---|
| 2840 | goto out; | 
|---|
| 2841 | } | 
|---|
| 2842 |  | 
|---|
| 2843 | if (res->type == RTN_LOCAL) { | 
|---|
| 2844 | if (!fl4->saddr) { | 
|---|
| 2845 | if (res->fi->fib_prefsrc) | 
|---|
| 2846 | fl4->saddr = res->fi->fib_prefsrc; | 
|---|
| 2847 | else | 
|---|
| 2848 | fl4->saddr = fl4->daddr; | 
|---|
| 2849 | } | 
|---|
| 2850 |  | 
|---|
| 2851 | /* L3 master device is the loopback for that domain */ | 
|---|
| 2852 | dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : | 
|---|
| 2853 | net->loopback_dev; | 
|---|
| 2854 |  | 
|---|
| 2855 | /* make sure orig_oif points to fib result device even | 
|---|
| 2856 | * though packet rx/tx happens over loopback or l3mdev | 
|---|
| 2857 | */ | 
|---|
| 2858 | orig_oif = FIB_RES_OIF(*res); | 
|---|
| 2859 |  | 
|---|
| 2860 | fl4->flowi4_oif = dev_out->ifindex; | 
|---|
| 2861 | flags |= RTCF_LOCAL; | 
|---|
| 2862 | goto make_route; | 
|---|
| 2863 | } | 
|---|
| 2864 |  | 
|---|
| 2865 | fib_select_path(net, res, fl4, skb); | 
|---|
| 2866 |  | 
|---|
| 2867 | dev_out = FIB_RES_DEV(*res); | 
|---|
| 2868 |  | 
|---|
| 2869 | make_route: | 
|---|
| 2870 | rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); | 
|---|
| 2871 |  | 
|---|
| 2872 | out: | 
|---|
| 2873 | return rth; | 
|---|
| 2874 | } | 
|---|
| 2875 |  | 
|---|
| 2876 | static struct dst_ops ipv4_dst_blackhole_ops = { | 
|---|
| 2877 | .family			= AF_INET, | 
|---|
| 2878 | .default_advmss		= ipv4_default_advmss, | 
|---|
| 2879 | .neigh_lookup		= ipv4_neigh_lookup, | 
|---|
| 2880 | .check			= dst_blackhole_check, | 
|---|
| 2881 | .cow_metrics		= dst_blackhole_cow_metrics, | 
|---|
| 2882 | .update_pmtu		= dst_blackhole_update_pmtu, | 
|---|
| 2883 | .redirect		= dst_blackhole_redirect, | 
|---|
| 2884 | .mtu			= dst_blackhole_mtu, | 
|---|
| 2885 | }; | 
|---|
| 2886 |  | 
|---|
| 2887 | struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) | 
|---|
| 2888 | { | 
|---|
| 2889 | struct rtable *ort = dst_rtable(dst_orig); | 
|---|
| 2890 | struct rtable *rt; | 
|---|
| 2891 |  | 
|---|
| 2892 | rt = dst_alloc(ops: &ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, flags: 0); | 
|---|
| 2893 | if (rt) { | 
|---|
| 2894 | struct dst_entry *new = &rt->dst; | 
|---|
| 2895 |  | 
|---|
| 2896 | new->__use = 1; | 
|---|
| 2897 | new->input = dst_discard; | 
|---|
| 2898 | new->output = dst_discard_out; | 
|---|
| 2899 |  | 
|---|
| 2900 | new->dev = net->loopback_dev; | 
|---|
| 2901 | netdev_hold(dev: new->dev, tracker: &new->dev_tracker, GFP_ATOMIC); | 
|---|
| 2902 |  | 
|---|
| 2903 | rt->rt_is_input = ort->rt_is_input; | 
|---|
| 2904 | rt->rt_iif = ort->rt_iif; | 
|---|
| 2905 | rt->rt_pmtu = ort->rt_pmtu; | 
|---|
| 2906 | rt->rt_mtu_locked = ort->rt_mtu_locked; | 
|---|
| 2907 |  | 
|---|
| 2908 | rt->rt_genid = rt_genid_ipv4(net); | 
|---|
| 2909 | rt->rt_flags = ort->rt_flags; | 
|---|
| 2910 | rt->rt_type = ort->rt_type; | 
|---|
| 2911 | rt->rt_uses_gateway = ort->rt_uses_gateway; | 
|---|
| 2912 | rt->rt_gw_family = ort->rt_gw_family; | 
|---|
| 2913 | if (rt->rt_gw_family == AF_INET) | 
|---|
| 2914 | rt->rt_gw4 = ort->rt_gw4; | 
|---|
| 2915 | else if (rt->rt_gw_family == AF_INET6) | 
|---|
| 2916 | rt->rt_gw6 = ort->rt_gw6; | 
|---|
| 2917 | } | 
|---|
| 2918 |  | 
|---|
| 2919 | dst_release(dst: dst_orig); | 
|---|
| 2920 |  | 
|---|
| 2921 | return rt ? &rt->dst : ERR_PTR(error: -ENOMEM); | 
|---|
| 2922 | } | 
|---|
| 2923 |  | 
|---|
| 2924 | struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, | 
|---|
| 2925 | const struct sock *sk) | 
|---|
| 2926 | { | 
|---|
| 2927 | struct rtable *rt = __ip_route_output_key(net, flp: flp4); | 
|---|
| 2928 |  | 
|---|
| 2929 | if (IS_ERR(ptr: rt)) | 
|---|
| 2930 | return rt; | 
|---|
| 2931 |  | 
|---|
| 2932 | if (flp4->flowi4_proto) { | 
|---|
| 2933 | flp4->flowi4_oif = rt->dst.dev->ifindex; | 
|---|
| 2934 | rt = dst_rtable(xfrm_lookup_route(net, &rt->dst, | 
|---|
| 2935 | flowi4_to_flowi(flp4), | 
|---|
| 2936 | sk, 0)); | 
|---|
| 2937 | } | 
|---|
| 2938 |  | 
|---|
| 2939 | return rt; | 
|---|
| 2940 | } | 
|---|
| 2941 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 
|---|
| 2942 |  | 
|---|
| 2943 | /* called with rcu_read_lock held */ | 
|---|
| 2944 | static int rt_fill_info(struct net *net, __be32 dst, __be32 src, | 
|---|
| 2945 | struct rtable *rt, u32 table_id, dscp_t dscp, | 
|---|
| 2946 | struct flowi4 *fl4, struct sk_buff *skb, u32 portid, | 
|---|
| 2947 | u32 seq, unsigned int flags) | 
|---|
| 2948 | { | 
|---|
| 2949 | struct rtmsg *r; | 
|---|
| 2950 | struct nlmsghdr *nlh; | 
|---|
| 2951 | unsigned long expires = 0; | 
|---|
| 2952 | u32 error; | 
|---|
| 2953 | u32 metrics[RTAX_MAX]; | 
|---|
| 2954 |  | 
|---|
| 2955 | nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, payload: sizeof(*r), flags); | 
|---|
| 2956 | if (!nlh) | 
|---|
| 2957 | return -EMSGSIZE; | 
|---|
| 2958 |  | 
|---|
| 2959 | r = nlmsg_data(nlh); | 
|---|
| 2960 | r->rtm_family	 = AF_INET; | 
|---|
| 2961 | r->rtm_dst_len	= 32; | 
|---|
| 2962 | r->rtm_src_len	= 0; | 
|---|
| 2963 | r->rtm_tos	= inet_dscp_to_dsfield(dscp); | 
|---|
| 2964 | r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT; | 
|---|
| 2965 | if (nla_put_u32(skb, attrtype: RTA_TABLE, value: table_id)) | 
|---|
| 2966 | goto nla_put_failure; | 
|---|
| 2967 | r->rtm_type	= rt->rt_type; | 
|---|
| 2968 | r->rtm_scope	= RT_SCOPE_UNIVERSE; | 
|---|
| 2969 | r->rtm_protocol = RTPROT_UNSPEC; | 
|---|
| 2970 | r->rtm_flags	= (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; | 
|---|
| 2971 | if (rt->rt_flags & RTCF_NOTIFY) | 
|---|
| 2972 | r->rtm_flags |= RTM_F_NOTIFY; | 
|---|
| 2973 | if (IPCB(skb)->flags & IPSKB_DOREDIRECT) | 
|---|
| 2974 | r->rtm_flags |= RTCF_DOREDIRECT; | 
|---|
| 2975 |  | 
|---|
| 2976 | if (nla_put_in_addr(skb, attrtype: RTA_DST, addr: dst)) | 
|---|
| 2977 | goto nla_put_failure; | 
|---|
| 2978 | if (src) { | 
|---|
| 2979 | r->rtm_src_len = 32; | 
|---|
| 2980 | if (nla_put_in_addr(skb, attrtype: RTA_SRC, addr: src)) | 
|---|
| 2981 | goto nla_put_failure; | 
|---|
| 2982 | } | 
|---|
| 2983 | if (rt->dst.dev && | 
|---|
| 2984 | nla_put_u32(skb, attrtype: RTA_OIF, value: rt->dst.dev->ifindex)) | 
|---|
| 2985 | goto nla_put_failure; | 
|---|
| 2986 | if (lwtunnel_fill_encap(skb, lwtstate: rt->dst.lwtstate, encap_attr: RTA_ENCAP, encap_type_attr: RTA_ENCAP_TYPE) < 0) | 
|---|
| 2987 | goto nla_put_failure; | 
|---|
| 2988 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 2989 | if (rt->dst.tclassid && | 
|---|
| 2990 | nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) | 
|---|
| 2991 | goto nla_put_failure; | 
|---|
| 2992 | #endif | 
|---|
| 2993 | if (fl4 && !rt_is_input_route(rt) && | 
|---|
| 2994 | fl4->saddr != src) { | 
|---|
| 2995 | if (nla_put_in_addr(skb, attrtype: RTA_PREFSRC, addr: fl4->saddr)) | 
|---|
| 2996 | goto nla_put_failure; | 
|---|
| 2997 | } | 
|---|
| 2998 | if (rt->rt_uses_gateway) { | 
|---|
| 2999 | if (rt->rt_gw_family == AF_INET && | 
|---|
| 3000 | nla_put_in_addr(skb, attrtype: RTA_GATEWAY, addr: rt->rt_gw4)) { | 
|---|
| 3001 | goto nla_put_failure; | 
|---|
| 3002 | } else if (rt->rt_gw_family == AF_INET6) { | 
|---|
| 3003 | int alen = sizeof(struct in6_addr); | 
|---|
| 3004 | struct nlattr *nla; | 
|---|
| 3005 | struct rtvia *via; | 
|---|
| 3006 |  | 
|---|
| 3007 | nla = nla_reserve(skb, attrtype: RTA_VIA, attrlen: alen + 2); | 
|---|
| 3008 | if (!nla) | 
|---|
| 3009 | goto nla_put_failure; | 
|---|
| 3010 |  | 
|---|
| 3011 | via = nla_data(nla); | 
|---|
| 3012 | via->rtvia_family = AF_INET6; | 
|---|
| 3013 | memcpy(to: via->rtvia_addr, from: &rt->rt_gw6, len: alen); | 
|---|
| 3014 | } | 
|---|
| 3015 | } | 
|---|
| 3016 |  | 
|---|
| 3017 | expires = READ_ONCE(rt->dst.expires); | 
|---|
| 3018 | if (expires) { | 
|---|
| 3019 | unsigned long now = jiffies; | 
|---|
| 3020 |  | 
|---|
| 3021 | if (time_before(now, expires)) | 
|---|
| 3022 | expires -= now; | 
|---|
| 3023 | else | 
|---|
| 3024 | expires = 0; | 
|---|
| 3025 | } | 
|---|
| 3026 |  | 
|---|
| 3027 | memcpy(to: metrics, from: dst_metrics_ptr(dst: &rt->dst), len: sizeof(metrics)); | 
|---|
| 3028 | if (rt->rt_pmtu && expires) | 
|---|
| 3029 | metrics[RTAX_MTU - 1] = rt->rt_pmtu; | 
|---|
| 3030 | if (rt->rt_mtu_locked && expires) | 
|---|
| 3031 | metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); | 
|---|
| 3032 | if (rtnetlink_put_metrics(skb, metrics) < 0) | 
|---|
| 3033 | goto nla_put_failure; | 
|---|
| 3034 |  | 
|---|
| 3035 | if (fl4) { | 
|---|
| 3036 | if (fl4->flowi4_mark && | 
|---|
| 3037 | nla_put_u32(skb, attrtype: RTA_MARK, value: fl4->flowi4_mark)) | 
|---|
| 3038 | goto nla_put_failure; | 
|---|
| 3039 |  | 
|---|
| 3040 | if (!uid_eq(left: fl4->flowi4_uid, INVALID_UID) && | 
|---|
| 3041 | nla_put_u32(skb, attrtype: RTA_UID, | 
|---|
| 3042 | value: from_kuid_munged(to: current_user_ns(), | 
|---|
| 3043 | kuid: fl4->flowi4_uid))) | 
|---|
| 3044 | goto nla_put_failure; | 
|---|
| 3045 |  | 
|---|
| 3046 | if (rt_is_input_route(rt)) { | 
|---|
| 3047 | #ifdef CONFIG_IP_MROUTE | 
|---|
| 3048 | if (ipv4_is_multicast(addr: dst) && | 
|---|
| 3049 | !ipv4_is_local_multicast(addr: dst) && | 
|---|
| 3050 | IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) { | 
|---|
| 3051 | int err = ipmr_get_route(net, skb, | 
|---|
| 3052 | saddr: fl4->saddr, daddr: fl4->daddr, | 
|---|
| 3053 | rtm: r, portid); | 
|---|
| 3054 |  | 
|---|
| 3055 | if (err <= 0) { | 
|---|
| 3056 | if (err == 0) | 
|---|
| 3057 | return 0; | 
|---|
| 3058 | goto nla_put_failure; | 
|---|
| 3059 | } | 
|---|
| 3060 | } else | 
|---|
| 3061 | #endif | 
|---|
| 3062 | if (nla_put_u32(skb, attrtype: RTA_IIF, value: fl4->flowi4_iif)) | 
|---|
| 3063 | goto nla_put_failure; | 
|---|
| 3064 | } | 
|---|
| 3065 | } | 
|---|
| 3066 |  | 
|---|
| 3067 | error = rt->dst.error; | 
|---|
| 3068 |  | 
|---|
| 3069 | if (rtnl_put_cacheinfo(skb, dst: &rt->dst, id: 0, expires, error) < 0) | 
|---|
| 3070 | goto nla_put_failure; | 
|---|
| 3071 |  | 
|---|
| 3072 | nlmsg_end(skb, nlh); | 
|---|
| 3073 | return 0; | 
|---|
| 3074 |  | 
|---|
| 3075 | nla_put_failure: | 
|---|
| 3076 | nlmsg_cancel(skb, nlh); | 
|---|
| 3077 | return -EMSGSIZE; | 
|---|
| 3078 | } | 
|---|
| 3079 |  | 
|---|
| 3080 | static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, | 
|---|
| 3081 | struct netlink_callback *cb, u32 table_id, | 
|---|
| 3082 | struct fnhe_hash_bucket *bucket, int genid, | 
|---|
| 3083 | int *fa_index, int fa_start, unsigned int flags) | 
|---|
| 3084 | { | 
|---|
| 3085 | int i; | 
|---|
| 3086 |  | 
|---|
| 3087 | for (i = 0; i < FNHE_HASH_SIZE; i++) { | 
|---|
| 3088 | struct fib_nh_exception *fnhe; | 
|---|
| 3089 |  | 
|---|
| 3090 | for (fnhe = rcu_dereference(bucket[i].chain); fnhe; | 
|---|
| 3091 | fnhe = rcu_dereference(fnhe->fnhe_next)) { | 
|---|
| 3092 | struct rtable *rt; | 
|---|
| 3093 | int err; | 
|---|
| 3094 |  | 
|---|
| 3095 | if (*fa_index < fa_start) | 
|---|
| 3096 | goto next; | 
|---|
| 3097 |  | 
|---|
| 3098 | if (fnhe->fnhe_genid != genid) | 
|---|
| 3099 | goto next; | 
|---|
| 3100 |  | 
|---|
| 3101 | if (fnhe->fnhe_expires && | 
|---|
| 3102 | time_after(jiffies, fnhe->fnhe_expires)) | 
|---|
| 3103 | goto next; | 
|---|
| 3104 |  | 
|---|
| 3105 | rt = rcu_dereference(fnhe->fnhe_rth_input); | 
|---|
| 3106 | if (!rt) | 
|---|
| 3107 | rt = rcu_dereference(fnhe->fnhe_rth_output); | 
|---|
| 3108 | if (!rt) | 
|---|
| 3109 | goto next; | 
|---|
| 3110 |  | 
|---|
| 3111 | err = rt_fill_info(net, dst: fnhe->fnhe_daddr, src: 0, rt, | 
|---|
| 3112 | table_id, dscp: 0, NULL, skb, | 
|---|
| 3113 | NETLINK_CB(cb->skb).portid, | 
|---|
| 3114 | seq: cb->nlh->nlmsg_seq, flags); | 
|---|
| 3115 | if (err) | 
|---|
| 3116 | return err; | 
|---|
| 3117 | next: | 
|---|
| 3118 | (*fa_index)++; | 
|---|
| 3119 | } | 
|---|
| 3120 | } | 
|---|
| 3121 |  | 
|---|
| 3122 | return 0; | 
|---|
| 3123 | } | 
|---|
| 3124 |  | 
|---|
| 3125 | int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, | 
|---|
| 3126 | u32 table_id, struct fib_info *fi, | 
|---|
| 3127 | int *fa_index, int fa_start, unsigned int flags) | 
|---|
| 3128 | { | 
|---|
| 3129 | struct net *net = sock_net(sk: cb->skb->sk); | 
|---|
| 3130 | int nhsel, genid = fnhe_genid(net); | 
|---|
| 3131 |  | 
|---|
| 3132 | for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { | 
|---|
| 3133 | struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); | 
|---|
| 3134 | struct fnhe_hash_bucket *bucket; | 
|---|
| 3135 | int err; | 
|---|
| 3136 |  | 
|---|
| 3137 | if (nhc->nhc_flags & RTNH_F_DEAD) | 
|---|
| 3138 | continue; | 
|---|
| 3139 |  | 
|---|
| 3140 | rcu_read_lock(); | 
|---|
| 3141 | bucket = rcu_dereference(nhc->nhc_exceptions); | 
|---|
| 3142 | err = 0; | 
|---|
| 3143 | if (bucket) | 
|---|
| 3144 | err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, | 
|---|
| 3145 | genid, fa_index, fa_start, | 
|---|
| 3146 | flags); | 
|---|
| 3147 | rcu_read_unlock(); | 
|---|
| 3148 | if (err) | 
|---|
| 3149 | return err; | 
|---|
| 3150 | } | 
|---|
| 3151 |  | 
|---|
| 3152 | return 0; | 
|---|
| 3153 | } | 
|---|
| 3154 |  | 
|---|
| 3155 | static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, | 
|---|
| 3156 | u8 ip_proto, __be16 sport, | 
|---|
| 3157 | __be16 dport) | 
|---|
| 3158 | { | 
|---|
| 3159 | struct sk_buff *skb; | 
|---|
| 3160 | struct iphdr *iph; | 
|---|
| 3161 |  | 
|---|
| 3162 | skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); | 
|---|
| 3163 | if (!skb) | 
|---|
| 3164 | return NULL; | 
|---|
| 3165 |  | 
|---|
| 3166 | /* Reserve room for dummy headers, this skb can pass | 
|---|
| 3167 | * through good chunk of routing engine. | 
|---|
| 3168 | */ | 
|---|
| 3169 | skb_reset_mac_header(skb); | 
|---|
| 3170 | skb_reset_network_header(skb); | 
|---|
| 3171 | skb->protocol = htons(ETH_P_IP); | 
|---|
| 3172 | iph = skb_put(skb, len: sizeof(struct iphdr)); | 
|---|
| 3173 | iph->protocol = ip_proto; | 
|---|
| 3174 | iph->saddr = src; | 
|---|
| 3175 | iph->daddr = dst; | 
|---|
| 3176 | iph->version = 0x4; | 
|---|
| 3177 | iph->frag_off = 0; | 
|---|
| 3178 | iph->ihl = 0x5; | 
|---|
| 3179 | skb_set_transport_header(skb, offset: skb->len); | 
|---|
| 3180 |  | 
|---|
| 3181 | switch (iph->protocol) { | 
|---|
| 3182 | case IPPROTO_UDP: { | 
|---|
| 3183 | struct udphdr *udph; | 
|---|
| 3184 |  | 
|---|
| 3185 | udph = skb_put_zero(skb, len: sizeof(struct udphdr)); | 
|---|
| 3186 | udph->source = sport; | 
|---|
| 3187 | udph->dest = dport; | 
|---|
| 3188 | udph->len = htons(sizeof(struct udphdr)); | 
|---|
| 3189 | udph->check = 0; | 
|---|
| 3190 | break; | 
|---|
| 3191 | } | 
|---|
| 3192 | case IPPROTO_TCP: { | 
|---|
| 3193 | struct tcphdr *tcph; | 
|---|
| 3194 |  | 
|---|
| 3195 | tcph = skb_put_zero(skb, len: sizeof(struct tcphdr)); | 
|---|
| 3196 | tcph->source	= sport; | 
|---|
| 3197 | tcph->dest	= dport; | 
|---|
| 3198 | tcph->doff	= sizeof(struct tcphdr) / 4; | 
|---|
| 3199 | tcph->rst = 1; | 
|---|
| 3200 | tcph->check = ~tcp_v4_check(len: sizeof(struct tcphdr), | 
|---|
| 3201 | saddr: src, daddr: dst, base: 0); | 
|---|
| 3202 | break; | 
|---|
| 3203 | } | 
|---|
| 3204 | case IPPROTO_ICMP: { | 
|---|
| 3205 | struct icmphdr *icmph; | 
|---|
| 3206 |  | 
|---|
| 3207 | icmph = skb_put_zero(skb, len: sizeof(struct icmphdr)); | 
|---|
| 3208 | icmph->type = ICMP_ECHO; | 
|---|
| 3209 | icmph->code = 0; | 
|---|
| 3210 | } | 
|---|
| 3211 | } | 
|---|
| 3212 |  | 
|---|
| 3213 | return skb; | 
|---|
| 3214 | } | 
|---|
| 3215 |  | 
|---|
| 3216 | static int inet_rtm_valid_getroute_req(struct sk_buff *skb, | 
|---|
| 3217 | const struct nlmsghdr *nlh, | 
|---|
| 3218 | struct nlattr **tb, | 
|---|
| 3219 | struct netlink_ext_ack *extack) | 
|---|
| 3220 | { | 
|---|
| 3221 | struct rtmsg *rtm; | 
|---|
| 3222 | int i, err; | 
|---|
| 3223 |  | 
|---|
| 3224 | rtm = nlmsg_payload(nlh, len: sizeof(*rtm)); | 
|---|
| 3225 | if (!rtm) { | 
|---|
| 3226 | NL_SET_ERR_MSG(extack, | 
|---|
| 3227 | "ipv4: Invalid header for route get request"); | 
|---|
| 3228 | return -EINVAL; | 
|---|
| 3229 | } | 
|---|
| 3230 |  | 
|---|
| 3231 | if (!netlink_strict_get_check(skb)) | 
|---|
| 3232 | return nlmsg_parse_deprecated(nlh, hdrlen: sizeof(*rtm), tb, RTA_MAX, | 
|---|
| 3233 | policy: rtm_ipv4_policy, extack); | 
|---|
| 3234 |  | 
|---|
| 3235 | if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || | 
|---|
| 3236 | (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || | 
|---|
| 3237 | rtm->rtm_table || rtm->rtm_protocol || | 
|---|
| 3238 | rtm->rtm_scope || rtm->rtm_type) { | 
|---|
| 3239 | NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); | 
|---|
| 3240 | return -EINVAL; | 
|---|
| 3241 | } | 
|---|
| 3242 |  | 
|---|
| 3243 | if (rtm->rtm_flags & ~(RTM_F_NOTIFY | | 
|---|
| 3244 | RTM_F_LOOKUP_TABLE | | 
|---|
| 3245 | RTM_F_FIB_MATCH)) { | 
|---|
| 3246 | NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); | 
|---|
| 3247 | return -EINVAL; | 
|---|
| 3248 | } | 
|---|
| 3249 |  | 
|---|
| 3250 | err = nlmsg_parse_deprecated_strict(nlh, hdrlen: sizeof(*rtm), tb, RTA_MAX, | 
|---|
| 3251 | policy: rtm_ipv4_policy, extack); | 
|---|
| 3252 | if (err) | 
|---|
| 3253 | return err; | 
|---|
| 3254 |  | 
|---|
| 3255 | if ((tb[RTA_SRC] && !rtm->rtm_src_len) || | 
|---|
| 3256 | (tb[RTA_DST] && !rtm->rtm_dst_len)) { | 
|---|
| 3257 | NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); | 
|---|
| 3258 | return -EINVAL; | 
|---|
| 3259 | } | 
|---|
| 3260 |  | 
|---|
| 3261 | for (i = 0; i <= RTA_MAX; i++) { | 
|---|
| 3262 | if (!tb[i]) | 
|---|
| 3263 | continue; | 
|---|
| 3264 |  | 
|---|
| 3265 | switch (i) { | 
|---|
| 3266 | case RTA_IIF: | 
|---|
| 3267 | case RTA_OIF: | 
|---|
| 3268 | case RTA_SRC: | 
|---|
| 3269 | case RTA_DST: | 
|---|
| 3270 | case RTA_IP_PROTO: | 
|---|
| 3271 | case RTA_SPORT: | 
|---|
| 3272 | case RTA_DPORT: | 
|---|
| 3273 | case RTA_MARK: | 
|---|
| 3274 | case RTA_UID: | 
|---|
| 3275 | break; | 
|---|
| 3276 | default: | 
|---|
| 3277 | NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); | 
|---|
| 3278 | return -EINVAL; | 
|---|
| 3279 | } | 
|---|
| 3280 | } | 
|---|
| 3281 |  | 
|---|
| 3282 | return 0; | 
|---|
| 3283 | } | 
|---|
| 3284 |  | 
|---|
| 3285 | static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, | 
|---|
| 3286 | struct netlink_ext_ack *extack) | 
|---|
| 3287 | { | 
|---|
| 3288 | struct net *net = sock_net(sk: in_skb->sk); | 
|---|
| 3289 | struct nlattr *tb[RTA_MAX+1]; | 
|---|
| 3290 | u32 table_id = RT_TABLE_MAIN; | 
|---|
| 3291 | __be16 sport = 0, dport = 0; | 
|---|
| 3292 | struct fib_result res = {}; | 
|---|
| 3293 | u8 ip_proto = IPPROTO_UDP; | 
|---|
| 3294 | struct rtable *rt = NULL; | 
|---|
| 3295 | struct sk_buff *skb; | 
|---|
| 3296 | struct rtmsg *rtm; | 
|---|
| 3297 | struct flowi4 fl4 = {}; | 
|---|
| 3298 | __be32 dst = 0; | 
|---|
| 3299 | __be32 src = 0; | 
|---|
| 3300 | dscp_t dscp; | 
|---|
| 3301 | kuid_t uid; | 
|---|
| 3302 | u32 iif; | 
|---|
| 3303 | int err; | 
|---|
| 3304 | int mark; | 
|---|
| 3305 |  | 
|---|
| 3306 | err = inet_rtm_valid_getroute_req(skb: in_skb, nlh, tb, extack); | 
|---|
| 3307 | if (err < 0) | 
|---|
| 3308 | return err; | 
|---|
| 3309 |  | 
|---|
| 3310 | rtm = nlmsg_data(nlh); | 
|---|
| 3311 | src = nla_get_in_addr_default(nla: tb[RTA_SRC], defvalue: 0); | 
|---|
| 3312 | dst = nla_get_in_addr_default(nla: tb[RTA_DST], defvalue: 0); | 
|---|
| 3313 | iif = nla_get_u32_default(nla: tb[RTA_IIF], defvalue: 0); | 
|---|
| 3314 | mark = nla_get_u32_default(nla: tb[RTA_MARK], defvalue: 0); | 
|---|
| 3315 | dscp = inet_dsfield_to_dscp(dsfield: rtm->rtm_tos); | 
|---|
| 3316 | if (tb[RTA_UID]) | 
|---|
| 3317 | uid = make_kuid(from: current_user_ns(), uid: nla_get_u32(nla: tb[RTA_UID])); | 
|---|
| 3318 | else | 
|---|
| 3319 | uid = (iif ? INVALID_UID : current_uid()); | 
|---|
| 3320 |  | 
|---|
| 3321 | if (tb[RTA_IP_PROTO]) { | 
|---|
| 3322 | err = rtm_getroute_parse_ip_proto(attr: tb[RTA_IP_PROTO], | 
|---|
| 3323 | ip_proto: &ip_proto, AF_INET, extack); | 
|---|
| 3324 | if (err) | 
|---|
| 3325 | return err; | 
|---|
| 3326 | } | 
|---|
| 3327 |  | 
|---|
| 3328 | if (tb[RTA_SPORT]) | 
|---|
| 3329 | sport = nla_get_be16(nla: tb[RTA_SPORT]); | 
|---|
| 3330 |  | 
|---|
| 3331 | if (tb[RTA_DPORT]) | 
|---|
| 3332 | dport = nla_get_be16(nla: tb[RTA_DPORT]); | 
|---|
| 3333 |  | 
|---|
| 3334 | skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); | 
|---|
| 3335 | if (!skb) | 
|---|
| 3336 | return -ENOBUFS; | 
|---|
| 3337 |  | 
|---|
| 3338 | fl4.daddr = dst; | 
|---|
| 3339 | fl4.saddr = src; | 
|---|
| 3340 | fl4.flowi4_dscp = dscp; | 
|---|
| 3341 | fl4.flowi4_oif = nla_get_u32_default(nla: tb[RTA_OIF], defvalue: 0); | 
|---|
| 3342 | fl4.flowi4_mark = mark; | 
|---|
| 3343 | fl4.flowi4_uid = uid; | 
|---|
| 3344 | if (sport) | 
|---|
| 3345 | fl4.fl4_sport = sport; | 
|---|
| 3346 | if (dport) | 
|---|
| 3347 | fl4.fl4_dport = dport; | 
|---|
| 3348 | fl4.flowi4_proto = ip_proto; | 
|---|
| 3349 |  | 
|---|
| 3350 | rcu_read_lock(); | 
|---|
| 3351 |  | 
|---|
| 3352 | if (iif) { | 
|---|
| 3353 | struct net_device *dev; | 
|---|
| 3354 |  | 
|---|
| 3355 | dev = dev_get_by_index_rcu(net, ifindex: iif); | 
|---|
| 3356 | if (!dev) { | 
|---|
| 3357 | err = -ENODEV; | 
|---|
| 3358 | goto errout_rcu; | 
|---|
| 3359 | } | 
|---|
| 3360 |  | 
|---|
| 3361 | fl4.flowi4_iif = iif; /* for rt_fill_info */ | 
|---|
| 3362 | skb->dev	= dev; | 
|---|
| 3363 | skb->mark	= mark; | 
|---|
| 3364 | err = ip_route_input_rcu(skb, daddr: dst, saddr: src, dscp, dev, | 
|---|
| 3365 | res: &res) ? -EINVAL : 0; | 
|---|
| 3366 |  | 
|---|
| 3367 | rt = skb_rtable(skb); | 
|---|
| 3368 | if (err == 0 && rt->dst.error) | 
|---|
| 3369 | err = -rt->dst.error; | 
|---|
| 3370 | } else { | 
|---|
| 3371 | fl4.flowi4_iif = LOOPBACK_IFINDEX; | 
|---|
| 3372 | skb->dev = net->loopback_dev; | 
|---|
| 3373 | rt = ip_route_output_key_hash_rcu(net, fl4: &fl4, res: &res, skb); | 
|---|
| 3374 | err = 0; | 
|---|
| 3375 | if (IS_ERR(ptr: rt)) | 
|---|
| 3376 | err = PTR_ERR(ptr: rt); | 
|---|
| 3377 | else | 
|---|
| 3378 | skb_dst_set(skb, dst: &rt->dst); | 
|---|
| 3379 | } | 
|---|
| 3380 |  | 
|---|
| 3381 | if (err) | 
|---|
| 3382 | goto errout_rcu; | 
|---|
| 3383 |  | 
|---|
| 3384 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 
|---|
| 3385 | rt->rt_flags |= RTCF_NOTIFY; | 
|---|
| 3386 |  | 
|---|
| 3387 | if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) | 
|---|
| 3388 | table_id = res.table ? res.table->tb_id : 0; | 
|---|
| 3389 |  | 
|---|
| 3390 | /* reset skb for netlink reply msg */ | 
|---|
| 3391 | skb_trim(skb, len: 0); | 
|---|
| 3392 | skb_reset_network_header(skb); | 
|---|
| 3393 | skb_reset_transport_header(skb); | 
|---|
| 3394 | skb_reset_mac_header(skb); | 
|---|
| 3395 |  | 
|---|
| 3396 | if (rtm->rtm_flags & RTM_F_FIB_MATCH) { | 
|---|
| 3397 | struct fib_rt_info fri; | 
|---|
| 3398 |  | 
|---|
| 3399 | if (!res.fi) { | 
|---|
| 3400 | err = fib_props[res.type].error; | 
|---|
| 3401 | if (!err) | 
|---|
| 3402 | err = -EHOSTUNREACH; | 
|---|
| 3403 | goto errout_rcu; | 
|---|
| 3404 | } | 
|---|
| 3405 | fri.fi = res.fi; | 
|---|
| 3406 | fri.tb_id = table_id; | 
|---|
| 3407 | fri.dst = res.prefix; | 
|---|
| 3408 | fri.dst_len = res.prefixlen; | 
|---|
| 3409 | fri.dscp = res.dscp; | 
|---|
| 3410 | fri.type = rt->rt_type; | 
|---|
| 3411 | fri.offload = 0; | 
|---|
| 3412 | fri.trap = 0; | 
|---|
| 3413 | fri.offload_failed = 0; | 
|---|
| 3414 | if (res.fa_head) { | 
|---|
| 3415 | struct fib_alias *fa; | 
|---|
| 3416 |  | 
|---|
| 3417 | hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) { | 
|---|
| 3418 | u8 slen = 32 - fri.dst_len; | 
|---|
| 3419 |  | 
|---|
| 3420 | if (fa->fa_slen == slen && | 
|---|
| 3421 | fa->tb_id == fri.tb_id && | 
|---|
| 3422 | fa->fa_dscp == fri.dscp && | 
|---|
| 3423 | fa->fa_info == res.fi && | 
|---|
| 3424 | fa->fa_type == fri.type) { | 
|---|
| 3425 | fri.offload = READ_ONCE(fa->offload); | 
|---|
| 3426 | fri.trap = READ_ONCE(fa->trap); | 
|---|
| 3427 | fri.offload_failed = | 
|---|
| 3428 | READ_ONCE(fa->offload_failed); | 
|---|
| 3429 | break; | 
|---|
| 3430 | } | 
|---|
| 3431 | } | 
|---|
| 3432 | } | 
|---|
| 3433 | err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, | 
|---|
| 3434 | seq: nlh->nlmsg_seq, RTM_NEWROUTE, fri: &fri, flags: 0); | 
|---|
| 3435 | } else { | 
|---|
| 3436 | err = rt_fill_info(net, dst, src, rt, table_id, dscp: res.dscp, fl4: &fl4, | 
|---|
| 3437 | skb, NETLINK_CB(in_skb).portid, | 
|---|
| 3438 | seq: nlh->nlmsg_seq, flags: 0); | 
|---|
| 3439 | } | 
|---|
| 3440 | if (err < 0) | 
|---|
| 3441 | goto errout_rcu; | 
|---|
| 3442 |  | 
|---|
| 3443 | rcu_read_unlock(); | 
|---|
| 3444 |  | 
|---|
| 3445 | err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); | 
|---|
| 3446 |  | 
|---|
| 3447 | errout_free: | 
|---|
| 3448 | return err; | 
|---|
| 3449 | errout_rcu: | 
|---|
| 3450 | rcu_read_unlock(); | 
|---|
| 3451 | kfree_skb(skb); | 
|---|
| 3452 | goto errout_free; | 
|---|
| 3453 | } | 
|---|
| 3454 |  | 
|---|
| 3455 | void ip_rt_multicast_event(struct in_device *in_dev) | 
|---|
| 3456 | { | 
|---|
| 3457 | rt_cache_flush(net: dev_net(dev: in_dev->dev)); | 
|---|
| 3458 | } | 
|---|
| 3459 |  | 
|---|
| 3460 | #ifdef CONFIG_SYSCTL | 
|---|
| 3461 | static int ip_rt_gc_interval __read_mostly  = 60 * HZ; | 
|---|
| 3462 | static int ip_rt_gc_min_interval __read_mostly	= HZ / 2; | 
|---|
| 3463 | static int ip_rt_gc_elasticity __read_mostly	= 8; | 
|---|
| 3464 | static int ip_min_valid_pmtu __read_mostly	= IPV4_MIN_MTU; | 
|---|
| 3465 |  | 
|---|
| 3466 | static int ipv4_sysctl_rtcache_flush(const struct ctl_table *__ctl, int write, | 
|---|
| 3467 | void *buffer, size_t *lenp, loff_t *ppos) | 
|---|
| 3468 | { | 
|---|
| 3469 | struct net *net = (struct net *)__ctl->extra1; | 
|---|
| 3470 |  | 
|---|
| 3471 | if (write) { | 
|---|
| 3472 | rt_cache_flush(net); | 
|---|
| 3473 | fnhe_genid_bump(net); | 
|---|
| 3474 | return 0; | 
|---|
| 3475 | } | 
|---|
| 3476 |  | 
|---|
| 3477 | return -EINVAL; | 
|---|
| 3478 | } | 
|---|
| 3479 |  | 
|---|
| 3480 | static struct ctl_table ipv4_route_table[] = { | 
|---|
| 3481 | { | 
|---|
| 3482 | .procname	= "gc_thresh", | 
|---|
| 3483 | .data		= &ipv4_dst_ops.gc_thresh, | 
|---|
| 3484 | .maxlen		= sizeof(int), | 
|---|
| 3485 | .mode		= 0644, | 
|---|
| 3486 | .proc_handler	= proc_dointvec, | 
|---|
| 3487 | }, | 
|---|
| 3488 | { | 
|---|
| 3489 | .procname	= "max_size", | 
|---|
| 3490 | .data		= &ip_rt_max_size, | 
|---|
| 3491 | .maxlen		= sizeof(int), | 
|---|
| 3492 | .mode		= 0644, | 
|---|
| 3493 | .proc_handler	= proc_dointvec, | 
|---|
| 3494 | }, | 
|---|
| 3495 | { | 
|---|
| 3496 | /*  Deprecated. Use gc_min_interval_ms */ | 
|---|
| 3497 |  | 
|---|
| 3498 | .procname	= "gc_min_interval", | 
|---|
| 3499 | .data		= &ip_rt_gc_min_interval, | 
|---|
| 3500 | .maxlen		= sizeof(int), | 
|---|
| 3501 | .mode		= 0644, | 
|---|
| 3502 | .proc_handler	= proc_dointvec_jiffies, | 
|---|
| 3503 | }, | 
|---|
| 3504 | { | 
|---|
| 3505 | .procname	= "gc_min_interval_ms", | 
|---|
| 3506 | .data		= &ip_rt_gc_min_interval, | 
|---|
| 3507 | .maxlen		= sizeof(int), | 
|---|
| 3508 | .mode		= 0644, | 
|---|
| 3509 | .proc_handler	= proc_dointvec_ms_jiffies, | 
|---|
| 3510 | }, | 
|---|
| 3511 | { | 
|---|
| 3512 | .procname	= "gc_timeout", | 
|---|
| 3513 | .data		= &ip_rt_gc_timeout, | 
|---|
| 3514 | .maxlen		= sizeof(int), | 
|---|
| 3515 | .mode		= 0644, | 
|---|
| 3516 | .proc_handler	= proc_dointvec_jiffies, | 
|---|
| 3517 | }, | 
|---|
| 3518 | { | 
|---|
| 3519 | .procname	= "gc_interval", | 
|---|
| 3520 | .data		= &ip_rt_gc_interval, | 
|---|
| 3521 | .maxlen		= sizeof(int), | 
|---|
| 3522 | .mode		= 0644, | 
|---|
| 3523 | .proc_handler	= proc_dointvec_jiffies, | 
|---|
| 3524 | }, | 
|---|
| 3525 | { | 
|---|
| 3526 | .procname	= "redirect_load", | 
|---|
| 3527 | .data		= &ip_rt_redirect_load, | 
|---|
| 3528 | .maxlen		= sizeof(int), | 
|---|
| 3529 | .mode		= 0644, | 
|---|
| 3530 | .proc_handler	= proc_dointvec, | 
|---|
| 3531 | }, | 
|---|
| 3532 | { | 
|---|
| 3533 | .procname	= "redirect_number", | 
|---|
| 3534 | .data		= &ip_rt_redirect_number, | 
|---|
| 3535 | .maxlen		= sizeof(int), | 
|---|
| 3536 | .mode		= 0644, | 
|---|
| 3537 | .proc_handler	= proc_dointvec, | 
|---|
| 3538 | }, | 
|---|
| 3539 | { | 
|---|
| 3540 | .procname	= "redirect_silence", | 
|---|
| 3541 | .data		= &ip_rt_redirect_silence, | 
|---|
| 3542 | .maxlen		= sizeof(int), | 
|---|
| 3543 | .mode		= 0644, | 
|---|
| 3544 | .proc_handler	= proc_dointvec, | 
|---|
| 3545 | }, | 
|---|
| 3546 | { | 
|---|
| 3547 | .procname	= "error_cost", | 
|---|
| 3548 | .data		= &ip_rt_error_cost, | 
|---|
| 3549 | .maxlen		= sizeof(int), | 
|---|
| 3550 | .mode		= 0644, | 
|---|
| 3551 | .proc_handler	= proc_dointvec, | 
|---|
| 3552 | }, | 
|---|
| 3553 | { | 
|---|
| 3554 | .procname	= "error_burst", | 
|---|
| 3555 | .data		= &ip_rt_error_burst, | 
|---|
| 3556 | .maxlen		= sizeof(int), | 
|---|
| 3557 | .mode		= 0644, | 
|---|
| 3558 | .proc_handler	= proc_dointvec, | 
|---|
| 3559 | }, | 
|---|
| 3560 | { | 
|---|
| 3561 | .procname	= "gc_elasticity", | 
|---|
| 3562 | .data		= &ip_rt_gc_elasticity, | 
|---|
| 3563 | .maxlen		= sizeof(int), | 
|---|
| 3564 | .mode		= 0644, | 
|---|
| 3565 | .proc_handler	= proc_dointvec, | 
|---|
| 3566 | }, | 
|---|
| 3567 | }; | 
|---|
| 3568 |  | 
|---|
| 3569 | static const char ipv4_route_flush_procname[] = "flush"; | 
|---|
| 3570 |  | 
|---|
| 3571 | static struct ctl_table ipv4_route_netns_table[] = { | 
|---|
| 3572 | { | 
|---|
| 3573 | .procname	= ipv4_route_flush_procname, | 
|---|
| 3574 | .maxlen		= sizeof(int), | 
|---|
| 3575 | .mode		= 0200, | 
|---|
| 3576 | .proc_handler	= ipv4_sysctl_rtcache_flush, | 
|---|
| 3577 | }, | 
|---|
| 3578 | { | 
|---|
| 3579 | .procname       = "min_pmtu", | 
|---|
| 3580 | .data           = &init_net.ipv4.ip_rt_min_pmtu, | 
|---|
| 3581 | .maxlen         = sizeof(int), | 
|---|
| 3582 | .mode           = 0644, | 
|---|
| 3583 | .proc_handler   = proc_dointvec_minmax, | 
|---|
| 3584 | .extra1         = &ip_min_valid_pmtu, | 
|---|
| 3585 | }, | 
|---|
| 3586 | { | 
|---|
| 3587 | .procname       = "mtu_expires", | 
|---|
| 3588 | .data           = &init_net.ipv4.ip_rt_mtu_expires, | 
|---|
| 3589 | .maxlen         = sizeof(int), | 
|---|
| 3590 | .mode           = 0644, | 
|---|
| 3591 | .proc_handler   = proc_dointvec_jiffies, | 
|---|
| 3592 | }, | 
|---|
| 3593 | { | 
|---|
| 3594 | .procname   = "min_adv_mss", | 
|---|
| 3595 | .data       = &init_net.ipv4.ip_rt_min_advmss, | 
|---|
| 3596 | .maxlen     = sizeof(int), | 
|---|
| 3597 | .mode       = 0644, | 
|---|
| 3598 | .proc_handler   = proc_dointvec, | 
|---|
| 3599 | }, | 
|---|
| 3600 | }; | 
|---|
| 3601 |  | 
|---|
| 3602 | static __net_init int sysctl_route_net_init(struct net *net) | 
|---|
| 3603 | { | 
|---|
| 3604 | struct ctl_table *tbl; | 
|---|
| 3605 | size_t table_size = ARRAY_SIZE(ipv4_route_netns_table); | 
|---|
| 3606 |  | 
|---|
| 3607 | tbl = ipv4_route_netns_table; | 
|---|
| 3608 | if (!net_eq(net1: net, net2: &init_net)) { | 
|---|
| 3609 | int i; | 
|---|
| 3610 |  | 
|---|
| 3611 | tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL); | 
|---|
| 3612 | if (!tbl) | 
|---|
| 3613 | goto err_dup; | 
|---|
| 3614 |  | 
|---|
| 3615 | /* Don't export non-whitelisted sysctls to unprivileged users */ | 
|---|
| 3616 | if (net->user_ns != &init_user_ns) { | 
|---|
| 3617 | if (tbl[0].procname != ipv4_route_flush_procname) | 
|---|
| 3618 | table_size = 0; | 
|---|
| 3619 | } | 
|---|
| 3620 |  | 
|---|
| 3621 | /* Update the variables to point into the current struct net | 
|---|
| 3622 | * except for the first element flush | 
|---|
| 3623 | */ | 
|---|
| 3624 | for (i = 1; i < table_size; i++) | 
|---|
| 3625 | tbl[i].data += (void *)net - (void *)&init_net; | 
|---|
| 3626 | } | 
|---|
| 3627 | tbl[0].extra1 = net; | 
|---|
| 3628 |  | 
|---|
| 3629 | net->ipv4.route_hdr = register_net_sysctl_sz(net, path: "net/ipv4/route", | 
|---|
| 3630 | table: tbl, table_size); | 
|---|
| 3631 | if (!net->ipv4.route_hdr) | 
|---|
| 3632 | goto err_reg; | 
|---|
| 3633 | return 0; | 
|---|
| 3634 |  | 
|---|
| 3635 | err_reg: | 
|---|
| 3636 | if (tbl != ipv4_route_netns_table) | 
|---|
| 3637 | kfree(objp: tbl); | 
|---|
| 3638 | err_dup: | 
|---|
| 3639 | return -ENOMEM; | 
|---|
| 3640 | } | 
|---|
| 3641 |  | 
|---|
| 3642 | static __net_exit void sysctl_route_net_exit(struct net *net) | 
|---|
| 3643 | { | 
|---|
| 3644 | const struct ctl_table *tbl; | 
|---|
| 3645 |  | 
|---|
| 3646 | tbl = net->ipv4.route_hdr->ctl_table_arg; | 
|---|
| 3647 | unregister_net_sysctl_table(header: net->ipv4.route_hdr); | 
|---|
| 3648 | BUG_ON(tbl == ipv4_route_netns_table); | 
|---|
| 3649 | kfree(objp: tbl); | 
|---|
| 3650 | } | 
|---|
| 3651 |  | 
|---|
| 3652 | static __net_initdata struct pernet_operations sysctl_route_ops = { | 
|---|
| 3653 | .init = sysctl_route_net_init, | 
|---|
| 3654 | .exit = sysctl_route_net_exit, | 
|---|
| 3655 | }; | 
|---|
| 3656 | #endif | 
|---|
| 3657 |  | 
|---|
| 3658 | static __net_init int netns_ip_rt_init(struct net *net) | 
|---|
| 3659 | { | 
|---|
| 3660 | /* Set default value for namespaceified sysctls */ | 
|---|
| 3661 | net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; | 
|---|
| 3662 | net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; | 
|---|
| 3663 | net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; | 
|---|
| 3664 | return 0; | 
|---|
| 3665 | } | 
|---|
| 3666 |  | 
|---|
| 3667 | static struct pernet_operations __net_initdata ip_rt_ops = { | 
|---|
| 3668 | .init = netns_ip_rt_init, | 
|---|
| 3669 | }; | 
|---|
| 3670 |  | 
|---|
| 3671 | static __net_init int rt_genid_init(struct net *net) | 
|---|
| 3672 | { | 
|---|
| 3673 | atomic_set(v: &net->ipv4.rt_genid, i: 0); | 
|---|
| 3674 | atomic_set(v: &net->fnhe_genid, i: 0); | 
|---|
| 3675 | atomic_set(v: &net->ipv4.dev_addr_genid, i: get_random_u32()); | 
|---|
| 3676 | return 0; | 
|---|
| 3677 | } | 
|---|
| 3678 |  | 
|---|
| 3679 | static __net_initdata struct pernet_operations rt_genid_ops = { | 
|---|
| 3680 | .init = rt_genid_init, | 
|---|
| 3681 | }; | 
|---|
| 3682 |  | 
|---|
| 3683 | static int __net_init ipv4_inetpeer_init(struct net *net) | 
|---|
| 3684 | { | 
|---|
| 3685 | struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); | 
|---|
| 3686 |  | 
|---|
| 3687 | if (!bp) | 
|---|
| 3688 | return -ENOMEM; | 
|---|
| 3689 | inet_peer_base_init(bp); | 
|---|
| 3690 | net->ipv4.peers = bp; | 
|---|
| 3691 | return 0; | 
|---|
| 3692 | } | 
|---|
| 3693 |  | 
|---|
| 3694 | static void __net_exit ipv4_inetpeer_exit(struct net *net) | 
|---|
| 3695 | { | 
|---|
| 3696 | struct inet_peer_base *bp = net->ipv4.peers; | 
|---|
| 3697 |  | 
|---|
| 3698 | net->ipv4.peers = NULL; | 
|---|
| 3699 | inetpeer_invalidate_tree(bp); | 
|---|
| 3700 | kfree(objp: bp); | 
|---|
| 3701 | } | 
|---|
| 3702 |  | 
|---|
| 3703 | static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { | 
|---|
| 3704 | .init	=	ipv4_inetpeer_init, | 
|---|
| 3705 | .exit	=	ipv4_inetpeer_exit, | 
|---|
| 3706 | }; | 
|---|
| 3707 |  | 
|---|
| 3708 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 3709 | struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; | 
|---|
| 3710 | #endif /* CONFIG_IP_ROUTE_CLASSID */ | 
|---|
| 3711 |  | 
|---|
| 3712 | static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = { | 
|---|
| 3713 | {.protocol = PF_INET, .msgtype = RTM_GETROUTE, | 
|---|
| 3714 | .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, | 
|---|
| 3715 | }; | 
|---|
| 3716 |  | 
|---|
| 3717 | int __init ip_rt_init(void) | 
|---|
| 3718 | { | 
|---|
| 3719 | void *idents_hash; | 
|---|
| 3720 | int cpu; | 
|---|
| 3721 |  | 
|---|
| 3722 | /* For modern hosts, this will use 2 MB of memory */ | 
|---|
| 3723 | idents_hash = alloc_large_system_hash(tablename: "IP idents", | 
|---|
| 3724 | bucketsize: sizeof(*ip_idents) + sizeof(*ip_tstamps), | 
|---|
| 3725 | numentries: 0, | 
|---|
| 3726 | scale: 16, /* one bucket per 64 KB */ | 
|---|
| 3727 | HASH_ZERO, | 
|---|
| 3728 | NULL, | 
|---|
| 3729 | hash_mask: &ip_idents_mask, | 
|---|
| 3730 | low_limit: 2048, | 
|---|
| 3731 | high_limit: 256*1024); | 
|---|
| 3732 |  | 
|---|
| 3733 | ip_idents = idents_hash; | 
|---|
| 3734 |  | 
|---|
| 3735 | get_random_bytes(buf: ip_idents, len: (ip_idents_mask + 1) * sizeof(*ip_idents)); | 
|---|
| 3736 |  | 
|---|
| 3737 | ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents); | 
|---|
| 3738 |  | 
|---|
| 3739 | for_each_possible_cpu(cpu) { | 
|---|
| 3740 | struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); | 
|---|
| 3741 |  | 
|---|
| 3742 | INIT_LIST_HEAD(list: &ul->head); | 
|---|
| 3743 | spin_lock_init(&ul->lock); | 
|---|
| 3744 | } | 
|---|
| 3745 | #ifdef CONFIG_IP_ROUTE_CLASSID | 
|---|
| 3746 | ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); | 
|---|
| 3747 | if (!ip_rt_acct) | 
|---|
| 3748 | panic( "IP: failed to allocate ip_rt_acct\n"); | 
|---|
| 3749 | #endif | 
|---|
| 3750 |  | 
|---|
| 3751 | ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable, | 
|---|
| 3752 | SLAB_HWCACHE_ALIGN | SLAB_PANIC); | 
|---|
| 3753 |  | 
|---|
| 3754 | ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; | 
|---|
| 3755 |  | 
|---|
| 3756 | if (dst_entries_init(dst: &ipv4_dst_ops) < 0) | 
|---|
| 3757 | panic(fmt: "IP: failed to allocate ipv4_dst_ops counter\n"); | 
|---|
| 3758 |  | 
|---|
| 3759 | if (dst_entries_init(dst: &ipv4_dst_blackhole_ops) < 0) | 
|---|
| 3760 | panic(fmt: "IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); | 
|---|
| 3761 |  | 
|---|
| 3762 | ipv4_dst_ops.gc_thresh = ~0; | 
|---|
| 3763 | ip_rt_max_size = INT_MAX; | 
|---|
| 3764 |  | 
|---|
| 3765 | devinet_init(); | 
|---|
| 3766 | ip_fib_init(); | 
|---|
| 3767 |  | 
|---|
| 3768 | if (ip_rt_proc_init()) | 
|---|
| 3769 | pr_err( "Unable to create route proc files\n"); | 
|---|
| 3770 | #ifdef CONFIG_XFRM | 
|---|
| 3771 | xfrm_init(); | 
|---|
| 3772 | xfrm4_init(); | 
|---|
| 3773 | #endif | 
|---|
| 3774 | rtnl_register_many(ip_rt_rtnl_msg_handlers); | 
|---|
| 3775 |  | 
|---|
| 3776 | #ifdef CONFIG_SYSCTL | 
|---|
| 3777 | register_pernet_subsys(&sysctl_route_ops); | 
|---|
| 3778 | #endif | 
|---|
| 3779 | register_pernet_subsys(&ip_rt_ops); | 
|---|
| 3780 | register_pernet_subsys(&rt_genid_ops); | 
|---|
| 3781 | register_pernet_subsys(&ipv4_inetpeer_ops); | 
|---|
| 3782 | return 0; | 
|---|
| 3783 | } | 
|---|
| 3784 |  | 
|---|
| 3785 | #ifdef CONFIG_SYSCTL | 
|---|
| 3786 | /* | 
|---|
| 3787 | * We really need to sanitize the damn ipv4 init order, then all | 
|---|
| 3788 | * this nonsense will go away. | 
|---|
| 3789 | */ | 
|---|
| 3790 | void __init ip_static_sysctl_init(void) | 
|---|
| 3791 | { | 
|---|
| 3792 | register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); | 
|---|
| 3793 | } | 
|---|
| 3794 | #endif | 
|---|
| 3795 |  | 
|---|