1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Internet Control Message Protocol (ICMPv6)
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on net/ipv4/icmp.c
10 *
11 * RFC 1885
12 */
13
14/*
15 * Changes:
16 *
17 * Andi Kleen : exception handling
18 * Andi Kleen add rate limits. never reply to a icmp.
19 * add more length checks and other fixes.
20 * yoshfuji : ensure to sent parameter problem for
21 * fragments.
22 * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
23 * Randy Dunlap and
24 * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
25 * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
26 */
27
28#define pr_fmt(fmt) "IPv6: " fmt
29
30#include <linux/module.h>
31#include <linux/errno.h>
32#include <linux/types.h>
33#include <linux/socket.h>
34#include <linux/in.h>
35#include <linux/kernel.h>
36#include <linux/sockios.h>
37#include <linux/net.h>
38#include <linux/skbuff.h>
39#include <linux/init.h>
40#include <linux/netfilter.h>
41#include <linux/slab.h>
42
43#ifdef CONFIG_SYSCTL
44#include <linux/sysctl.h>
45#endif
46
47#include <linux/inet.h>
48#include <linux/netdevice.h>
49#include <linux/icmpv6.h>
50
51#include <net/ip.h>
52#include <net/sock.h>
53
54#include <net/ipv6.h>
55#include <net/ip6_checksum.h>
56#include <net/ping.h>
57#include <net/protocol.h>
58#include <net/raw.h>
59#include <net/rawv6.h>
60#include <net/seg6.h>
61#include <net/transp_v6.h>
62#include <net/ip6_route.h>
63#include <net/addrconf.h>
64#include <net/icmp.h>
65#include <net/xfrm.h>
66#include <net/inet_common.h>
67#include <net/dsfield.h>
68#include <net/l3mdev.h>
69
70#include <linux/uaccess.h>
71
72static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
74static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75 u8 type, u8 code, int offset, __be32 info)
76{
77 /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78 struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79 struct net *net = dev_net_rcu(dev: skb->dev);
80
81 if (type == ICMPV6_PKT_TOOBIG)
82 ip6_update_pmtu(skb, net, mtu: info, oif: skb->dev->ifindex, mark: 0, uid: sock_net_uid(net, NULL));
83 else if (type == NDISC_REDIRECT)
84 ip6_redirect(skb, net, oif: skb->dev->ifindex, mark: 0,
85 uid: sock_net_uid(net, NULL));
86
87 if (!(type & ICMPV6_INFOMSG_MASK))
88 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89 ping_err(skb, offset, ntohl(info));
90
91 return 0;
92}
93
94static int icmpv6_rcv(struct sk_buff *skb);
95
96static const struct inet6_protocol icmpv6_protocol = {
97 .handler = icmpv6_rcv,
98 .err_handler = icmpv6_err,
99 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100};
101
102/* Called with BH disabled */
103static struct sock *icmpv6_xmit_lock(struct net *net)
104{
105 struct sock *sk;
106
107 sk = this_cpu_read(ipv6_icmp_sk);
108 if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109 /* This can happen if the output path (f.e. SIT or
110 * ip6ip6 tunnel) signals dst_link_failure() for an
111 * outgoing ICMP6 packet.
112 */
113 return NULL;
114 }
115 sock_net_set(sk, net);
116 return sk;
117}
118
119static void icmpv6_xmit_unlock(struct sock *sk)
120{
121 sock_net_set(sk, net: &init_net);
122 spin_unlock(lock: &sk->sk_lock.slock);
123}
124
125/*
126 * Figure out, may we reply to this packet with icmp error.
127 *
128 * We do not reply, if:
129 * - it was icmp error message.
130 * - it is truncated, so that it is known, that protocol is ICMPV6
131 * (i.e. in the middle of some exthdr)
132 *
133 * --ANK (980726)
134 */
135
136static bool is_ineligible(const struct sk_buff *skb)
137{
138 int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139 int len = skb->len - ptr;
140 __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141 __be16 frag_off;
142
143 if (len < 0)
144 return true;
145
146 ptr = ipv6_skip_exthdr(skb, start: ptr, nexthdrp: &nexthdr, frag_offp: &frag_off);
147 if (ptr < 0)
148 return false;
149 if (nexthdr == IPPROTO_ICMPV6) {
150 u8 _type, *tp;
151 tp = skb_header_pointer(skb,
152 offset: ptr+offsetof(struct icmp6hdr, icmp6_type),
153 len: sizeof(_type), buffer: &_type);
154
155 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156 * false if this is a fragment packet with no icmp header info.
157 */
158 if (!tp && frag_off != 0)
159 return false;
160 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161 return true;
162 }
163 return false;
164}
165
166static bool icmpv6_mask_allow(struct net *net, int type)
167{
168 if (type > ICMPV6_MSG_MAX)
169 return true;
170
171 /* Limit if icmp type is set in ratemask. */
172 if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173 return true;
174
175 return false;
176}
177
178static bool icmpv6_global_allow(struct net *net, int type,
179 bool *apply_ratelimit)
180{
181 if (icmpv6_mask_allow(net, type))
182 return true;
183
184 if (icmp_global_allow(net)) {
185 *apply_ratelimit = true;
186 return true;
187 }
188 __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
189 return false;
190}
191
192/*
193 * Check the ICMP output rate limit
194 */
195static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
196 struct flowi6 *fl6, bool apply_ratelimit)
197{
198 struct net *net = sock_net(sk);
199 struct net_device *dev;
200 struct dst_entry *dst;
201 bool res = false;
202
203 if (!apply_ratelimit)
204 return true;
205
206 /*
207 * Look up the output route.
208 * XXX: perhaps the expire for routing entries cloned by
209 * this lookup should be more aggressive (not longer than timeout).
210 */
211 dst = ip6_route_output(net, sk, fl6);
212 rcu_read_lock();
213 dev = dst_dev_rcu(dst);
214 if (dst->error) {
215 IP6_INC_STATS(net, ip6_dst_idev(dst),
216 IPSTATS_MIB_OUTNOROUTES);
217 } else if (dev && (dev->flags & IFF_LOOPBACK)) {
218 res = true;
219 } else {
220 struct rt6_info *rt = dst_rt6_info(dst);
221 int tmo = net->ipv6.sysctl.icmpv6_time;
222 struct inet_peer *peer;
223
224 /* Give more bandwidth to wider prefixes. */
225 if (rt->rt6i_dst.plen < 128)
226 tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
227
228 peer = inet_getpeer_v6(base: net->ipv6.peers, v6daddr: &fl6->daddr);
229 res = inet_peer_xrlim_allow(peer, timeout: tmo);
230 }
231 rcu_read_unlock();
232 if (!res)
233 __ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST);
234 else
235 icmp_global_consume(net);
236 dst_release(dst);
237 return res;
238}
239
240static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
241 struct flowi6 *fl6)
242{
243 struct net *net = sock_net(sk);
244 struct dst_entry *dst;
245 bool res = false;
246
247 dst = ip6_route_output(net, sk, fl6);
248 if (!dst->error) {
249 struct rt6_info *rt = dst_rt6_info(dst);
250 struct in6_addr prefsrc;
251
252 rt6_get_prefsrc(rt, addr: &prefsrc);
253 res = !ipv6_addr_any(a: &prefsrc);
254 }
255 dst_release(dst);
256 return res;
257}
258
259/*
260 * an inline helper for the "simple" if statement below
261 * checks if parameter problem report is caused by an
262 * unrecognized IPv6 option that has the Option Type
263 * highest-order two bits set to 10
264 */
265
266static bool opt_unrec(struct sk_buff *skb, __u32 offset)
267{
268 u8 _optval, *op;
269
270 offset += skb_network_offset(skb);
271 op = skb_header_pointer(skb, offset, len: sizeof(_optval), buffer: &_optval);
272 if (!op)
273 return true;
274 return (*op & 0xC0) == 0x80;
275}
276
277void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
278 struct icmp6hdr *thdr, int len)
279{
280 struct sk_buff *skb;
281 struct icmp6hdr *icmp6h;
282
283 skb = skb_peek(list_: &sk->sk_write_queue);
284 if (!skb)
285 return;
286
287 icmp6h = icmp6_hdr(skb);
288 memcpy(to: icmp6h, from: thdr, len: sizeof(struct icmp6hdr));
289 icmp6h->icmp6_cksum = 0;
290
291 if (skb_queue_len(list_: &sk->sk_write_queue) == 1) {
292 skb->csum = csum_partial(buff: icmp6h,
293 len: sizeof(struct icmp6hdr), sum: skb->csum);
294 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr: &fl6->saddr,
295 daddr: &fl6->daddr,
296 len, proto: fl6->flowi6_proto,
297 sum: skb->csum);
298 } else {
299 __wsum tmp_csum = 0;
300
301 skb_queue_walk(&sk->sk_write_queue, skb) {
302 tmp_csum = csum_add(csum: tmp_csum, addend: skb->csum);
303 }
304
305 tmp_csum = csum_partial(buff: icmp6h,
306 len: sizeof(struct icmp6hdr), sum: tmp_csum);
307 icmp6h->icmp6_cksum = csum_ipv6_magic(saddr: &fl6->saddr,
308 daddr: &fl6->daddr,
309 len, proto: fl6->flowi6_proto,
310 sum: tmp_csum);
311 }
312 ip6_push_pending_frames(sk);
313}
314
315struct icmpv6_msg {
316 struct sk_buff *skb;
317 int offset;
318 uint8_t type;
319};
320
321static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
322{
323 struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
324 struct sk_buff *org_skb = msg->skb;
325 __wsum csum;
326
327 csum = skb_copy_and_csum_bits(skb: org_skb, offset: msg->offset + offset,
328 to, len);
329 skb->csum = csum_block_add(csum: skb->csum, csum2: csum, offset: odd);
330 if (!(msg->type & ICMPV6_INFOMSG_MASK))
331 nf_ct_attach(skb, org_skb);
332 return 0;
333}
334
335#if IS_ENABLED(CONFIG_IPV6_MIP6)
336static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
337{
338 struct ipv6hdr *iph = ipv6_hdr(skb);
339 struct ipv6_destopt_hao *hao;
340 int off;
341
342 if (opt->dsthao) {
343 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
344 if (likely(off >= 0)) {
345 hao = (struct ipv6_destopt_hao *)
346 (skb_network_header(skb) + off);
347 swap(iph->saddr, hao->addr);
348 }
349 }
350}
351#else
352static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
353#endif
354
355static struct dst_entry *icmpv6_route_lookup(struct net *net,
356 struct sk_buff *skb,
357 struct sock *sk,
358 struct flowi6 *fl6)
359{
360 struct dst_entry *dst, *dst2;
361 struct flowi6 fl2;
362 int err;
363
364 err = ip6_dst_lookup(net, sk, dst: &dst, fl6);
365 if (err)
366 return ERR_PTR(error: err);
367
368 /*
369 * We won't send icmp if the destination is known
370 * anycast unless we need to treat anycast as unicast.
371 */
372 if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
373 ipv6_anycast_destination(dst, daddr: &fl6->daddr)) {
374 net_dbg_ratelimited("icmp6_send: acast source\n");
375 dst_release(dst);
376 return ERR_PTR(error: -EINVAL);
377 }
378
379 /* No need to clone since we're just using its address. */
380 dst2 = dst;
381
382 dst = xfrm_lookup(net, dst_orig: dst, fl: flowi6_to_flowi(fl6), sk, flags: 0);
383 if (!IS_ERR(ptr: dst)) {
384 if (dst != dst2)
385 return dst;
386 } else {
387 if (PTR_ERR(ptr: dst) == -EPERM)
388 dst = NULL;
389 else
390 return dst;
391 }
392
393 err = xfrm_decode_session_reverse(net, skb, fl: flowi6_to_flowi(fl6: &fl2), AF_INET6);
394 if (err)
395 goto relookup_failed;
396
397 err = ip6_dst_lookup(net, sk, dst: &dst2, fl6: &fl2);
398 if (err)
399 goto relookup_failed;
400
401 dst2 = xfrm_lookup(net, dst_orig: dst2, fl: flowi6_to_flowi(fl6: &fl2), sk, flags: XFRM_LOOKUP_ICMP);
402 if (!IS_ERR(ptr: dst2)) {
403 dst_release(dst);
404 dst = dst2;
405 } else {
406 err = PTR_ERR(ptr: dst2);
407 if (err == -EPERM) {
408 dst_release(dst);
409 return dst2;
410 } else
411 goto relookup_failed;
412 }
413
414relookup_failed:
415 if (dst)
416 return dst;
417 return ERR_PTR(error: err);
418}
419
420static struct net_device *icmp6_dev(const struct sk_buff *skb)
421{
422 struct net_device *dev = skb->dev;
423
424 /* for local traffic to local address, skb dev is the loopback
425 * device. Check if there is a dst attached to the skb and if so
426 * get the real device index. Same is needed for replies to a link
427 * local address on a device enslaved to an L3 master device
428 */
429 if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
430 const struct rt6_info *rt6 = skb_rt6_info(skb);
431
432 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
433 * and ip6_null_entry could be set to skb if no route is found.
434 */
435 if (rt6 && rt6->rt6i_idev)
436 dev = rt6->rt6i_idev->dev;
437 }
438
439 return dev;
440}
441
442static int icmp6_iif(const struct sk_buff *skb)
443{
444 return icmp6_dev(skb)->ifindex;
445}
446
447/*
448 * Send an ICMP message in response to a packet in error
449 */
450void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
451 const struct in6_addr *force_saddr,
452 const struct inet6_skb_parm *parm)
453{
454 struct inet6_dev *idev = NULL;
455 struct ipv6hdr *hdr = ipv6_hdr(skb);
456 struct sock *sk;
457 struct net *net;
458 struct ipv6_pinfo *np;
459 const struct in6_addr *saddr = NULL;
460 bool apply_ratelimit = false;
461 struct dst_entry *dst;
462 struct icmp6hdr tmp_hdr;
463 struct flowi6 fl6;
464 struct icmpv6_msg msg;
465 struct ipcm6_cookie ipc6;
466 int iif = 0;
467 int addr_type = 0;
468 int len;
469 u32 mark;
470
471 if ((u8 *)hdr < skb->head ||
472 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
473 return;
474
475 if (!skb->dev)
476 return;
477
478 rcu_read_lock();
479
480 net = dev_net_rcu(dev: skb->dev);
481 mark = IP6_REPLY_MARK(net, skb->mark);
482 /*
483 * Make sure we respect the rules
484 * i.e. RFC 1885 2.4(e)
485 * Rule (e.1) is enforced by not using icmp6_send
486 * in any code that processes icmp errors.
487 */
488 addr_type = ipv6_addr_type(addr: &hdr->daddr);
489
490 if (ipv6_chk_addr(net, addr: &hdr->daddr, dev: skb->dev, strict: 0) ||
491 ipv6_chk_acast_addr_src(net, dev: skb->dev, addr: &hdr->daddr))
492 saddr = &hdr->daddr;
493
494 /*
495 * Dest addr check
496 */
497
498 if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
499 if (type != ICMPV6_PKT_TOOBIG &&
500 !(type == ICMPV6_PARAMPROB &&
501 code == ICMPV6_UNK_OPTION &&
502 (opt_unrec(skb, offset: info))))
503 goto out;
504
505 saddr = NULL;
506 }
507
508 addr_type = ipv6_addr_type(addr: &hdr->saddr);
509
510 /*
511 * Source addr check
512 */
513
514 if (__ipv6_addr_needs_scope_id(type: addr_type)) {
515 iif = icmp6_iif(skb);
516 } else {
517 /*
518 * The source device is used for looking up which routing table
519 * to use for sending an ICMP error.
520 */
521 iif = l3mdev_master_ifindex(dev: skb->dev);
522 }
523
524 /*
525 * Must not send error if the source does not uniquely
526 * identify a single node (RFC2463 Section 2.4).
527 * We check unspecified / multicast addresses here,
528 * and anycast addresses will be checked later.
529 */
530 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
531 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
532 &hdr->saddr, &hdr->daddr);
533 goto out;
534 }
535
536 /*
537 * Never answer to a ICMP packet.
538 */
539 if (is_ineligible(skb)) {
540 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
541 &hdr->saddr, &hdr->daddr);
542 goto out;
543 }
544
545 /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
546 local_bh_disable();
547
548 /* Check global sysctl_icmp_msgs_per_sec ratelimit */
549 if (!(skb->dev->flags & IFF_LOOPBACK) &&
550 !icmpv6_global_allow(net, type, apply_ratelimit: &apply_ratelimit))
551 goto out_bh_enable;
552
553 mip6_addr_swap(skb, opt: parm);
554
555 sk = icmpv6_xmit_lock(net);
556 if (!sk)
557 goto out_bh_enable;
558
559 memset(s: &fl6, c: 0, n: sizeof(fl6));
560 fl6.flowi6_proto = IPPROTO_ICMPV6;
561 fl6.daddr = hdr->saddr;
562 if (force_saddr)
563 saddr = force_saddr;
564 if (saddr) {
565 fl6.saddr = *saddr;
566 } else if (!icmpv6_rt_has_prefsrc(sk, type, fl6: &fl6)) {
567 /* select a more meaningful saddr from input if */
568 struct net_device *in_netdev;
569
570 in_netdev = dev_get_by_index(net, ifindex: parm->iif);
571 if (in_netdev) {
572 ipv6_dev_get_saddr(net, dev: in_netdev, daddr: &fl6.daddr,
573 srcprefs: inet6_sk(sk: sk)->srcprefs,
574 saddr: &fl6.saddr);
575 dev_put(dev: in_netdev);
576 }
577 }
578 fl6.flowi6_mark = mark;
579 fl6.flowi6_oif = iif;
580 fl6.fl6_icmp_type = type;
581 fl6.fl6_icmp_code = code;
582 fl6.flowi6_uid = sock_net_uid(net, NULL);
583 fl6.mp_hash = rt6_multipath_hash(net, fl6: &fl6, skb, NULL);
584 security_skb_classify_flow(skb, flic: flowi6_to_flowi_common(fl6: &fl6));
585
586 np = inet6_sk(sk: sk);
587
588 if (!icmpv6_xrlim_allow(sk, type, fl6: &fl6, apply_ratelimit))
589 goto out_unlock;
590
591 tmp_hdr.icmp6_type = type;
592 tmp_hdr.icmp6_code = code;
593 tmp_hdr.icmp6_cksum = 0;
594 tmp_hdr.icmp6_pointer = htonl(info);
595
596 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(addr: &fl6.daddr))
597 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
598 else if (!fl6.flowi6_oif)
599 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
600
601 ipcm6_init_sk(ipc6: &ipc6, sk);
602 ipc6.sockc.mark = mark;
603 fl6.flowlabel = ip6_make_flowinfo(tclass: ipc6.tclass, flowlabel: fl6.flowlabel);
604
605 dst = icmpv6_route_lookup(net, skb, sk, fl6: &fl6);
606 if (IS_ERR(ptr: dst))
607 goto out_unlock;
608
609 ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6: &fl6, dst);
610
611 msg.skb = skb;
612 msg.offset = skb_network_offset(skb);
613 msg.type = type;
614
615 len = skb->len - msg.offset;
616 len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
617 if (len < 0) {
618 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
619 &hdr->saddr, &hdr->daddr);
620 goto out_dst_release;
621 }
622
623 idev = __in6_dev_get(dev: skb->dev);
624
625 if (ip6_append_data(sk, getfrag: icmpv6_getfrag, from: &msg,
626 length: len + sizeof(struct icmp6hdr),
627 transhdrlen: sizeof(struct icmp6hdr),
628 ipc6: &ipc6, fl6: &fl6, dst_rt6_info(dst),
629 MSG_DONTWAIT)) {
630 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
631 ip6_flush_pending_frames(sk);
632 } else {
633 icmpv6_push_pending_frames(sk, fl6: &fl6, thdr: &tmp_hdr,
634 len: len + sizeof(struct icmp6hdr));
635 }
636
637out_dst_release:
638 dst_release(dst);
639out_unlock:
640 icmpv6_xmit_unlock(sk);
641out_bh_enable:
642 local_bh_enable();
643out:
644 rcu_read_unlock();
645}
646EXPORT_SYMBOL(icmp6_send);
647
648/* Slightly more convenient version of icmp6_send with drop reasons.
649 */
650void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
651 enum skb_drop_reason reason)
652{
653 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
654 kfree_skb_reason(skb, reason);
655}
656
657/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
658 * if sufficient data bytes are available
659 * @nhs is the size of the tunnel header(s) :
660 * Either an IPv4 header for SIT encap
661 * an IPv4 header + GRE header for GRE encap
662 */
663int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
664 unsigned int data_len)
665{
666 struct in6_addr temp_saddr;
667 struct rt6_info *rt;
668 struct sk_buff *skb2;
669 u32 info = 0;
670
671 if (!pskb_may_pull(skb, len: nhs + sizeof(struct ipv6hdr) + 8))
672 return 1;
673
674 /* RFC 4884 (partial) support for ICMP extensions */
675 if (data_len < 128 || (data_len & 7) || skb->len < data_len)
676 data_len = 0;
677
678 skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
679
680 if (!skb2)
681 return 1;
682
683 skb_dst_drop(skb: skb2);
684 skb_pull(skb: skb2, len: nhs);
685 skb_reset_network_header(skb: skb2);
686
687 rt = rt6_lookup(net: dev_net_rcu(dev: skb->dev), daddr: &ipv6_hdr(skb: skb2)->saddr,
688 NULL, oif: 0, skb, flags: 0);
689
690 if (rt && rt->dst.dev)
691 skb2->dev = rt->dst.dev;
692
693 ipv6_addr_set_v4mapped(addr: ip_hdr(skb)->saddr, v4mapped: &temp_saddr);
694
695 if (data_len) {
696 /* RFC 4884 (partial) support :
697 * insert 0 padding at the end, before the extensions
698 */
699 __skb_push(skb: skb2, len: nhs);
700 skb_reset_network_header(skb: skb2);
701 memmove(dest: skb2->data, src: skb2->data + nhs, count: data_len - nhs);
702 memset(s: skb2->data + data_len - nhs, c: 0, n: nhs);
703 /* RFC 4884 4.5 : Length is measured in 64-bit words,
704 * and stored in reserved[0]
705 */
706 info = (data_len/8) << 24;
707 }
708 if (type == ICMP_TIME_EXCEEDED)
709 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
710 info, &temp_saddr, IP6CB(skb2));
711 else
712 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
713 info, &temp_saddr, IP6CB(skb2));
714 if (rt)
715 ip6_rt_put(rt);
716
717 kfree_skb(skb: skb2);
718
719 return 0;
720}
721EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
722
723static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
724{
725 struct net *net = dev_net_rcu(dev: skb->dev);
726 struct sock *sk;
727 struct inet6_dev *idev;
728 struct ipv6_pinfo *np;
729 const struct in6_addr *saddr = NULL;
730 struct icmp6hdr *icmph = icmp6_hdr(skb);
731 bool apply_ratelimit = false;
732 struct icmp6hdr tmp_hdr;
733 struct flowi6 fl6;
734 struct icmpv6_msg msg;
735 struct dst_entry *dst;
736 struct ipcm6_cookie ipc6;
737 u32 mark = IP6_REPLY_MARK(net, skb->mark);
738 SKB_DR(reason);
739 bool acast;
740 u8 type;
741
742 if (ipv6_addr_is_multicast(addr: &ipv6_hdr(skb)->daddr) &&
743 net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
744 return reason;
745
746 saddr = &ipv6_hdr(skb)->daddr;
747
748 acast = ipv6_anycast_destination(dst: skb_dst(skb), daddr: saddr);
749 if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
750 return reason;
751
752 if (!ipv6_unicast_destination(skb) &&
753 !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
754 saddr = NULL;
755
756 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
757 type = ICMPV6_EXT_ECHO_REPLY;
758 else
759 type = ICMPV6_ECHO_REPLY;
760
761 memcpy(to: &tmp_hdr, from: icmph, len: sizeof(tmp_hdr));
762 tmp_hdr.icmp6_type = type;
763
764 memset(s: &fl6, c: 0, n: sizeof(fl6));
765 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
766 fl6.flowlabel = ip6_flowlabel(hdr: ipv6_hdr(skb));
767
768 fl6.flowi6_proto = IPPROTO_ICMPV6;
769 fl6.daddr = ipv6_hdr(skb)->saddr;
770 if (saddr)
771 fl6.saddr = *saddr;
772 fl6.flowi6_oif = icmp6_iif(skb);
773 fl6.fl6_icmp_type = type;
774 fl6.flowi6_mark = mark;
775 fl6.flowi6_uid = sock_net_uid(net, NULL);
776 security_skb_classify_flow(skb, flic: flowi6_to_flowi_common(fl6: &fl6));
777
778 local_bh_disable();
779 sk = icmpv6_xmit_lock(net);
780 if (!sk)
781 goto out_bh_enable;
782 np = inet6_sk(sk: sk);
783
784 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(addr: &fl6.daddr))
785 fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
786 else if (!fl6.flowi6_oif)
787 fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
788
789 if (ip6_dst_lookup(net, sk, dst: &dst, fl6: &fl6))
790 goto out;
791 dst = xfrm_lookup(net, dst_orig: dst, fl: flowi6_to_flowi(fl6: &fl6), sk, flags: 0);
792 if (IS_ERR(ptr: dst))
793 goto out;
794
795 /* Check the ratelimit */
796 if ((!(skb->dev->flags & IFF_LOOPBACK) &&
797 !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, apply_ratelimit: &apply_ratelimit)) ||
798 !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, fl6: &fl6, apply_ratelimit))
799 goto out_dst_release;
800
801 idev = __in6_dev_get(dev: skb->dev);
802
803 msg.skb = skb;
804 msg.offset = 0;
805 msg.type = type;
806
807 ipcm6_init_sk(ipc6: &ipc6, sk);
808 ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6: &fl6, dst);
809 ipc6.tclass = ipv6_get_dsfield(ipv6h: ipv6_hdr(skb));
810 ipc6.sockc.mark = mark;
811
812 if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
813 if (!icmp_build_probe(skb, icmphdr: (struct icmphdr *)&tmp_hdr))
814 goto out_dst_release;
815
816 if (ip6_append_data(sk, getfrag: icmpv6_getfrag, from: &msg,
817 length: skb->len + sizeof(struct icmp6hdr),
818 transhdrlen: sizeof(struct icmp6hdr), ipc6: &ipc6, fl6: &fl6,
819 dst_rt6_info(dst), MSG_DONTWAIT)) {
820 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
821 ip6_flush_pending_frames(sk);
822 } else {
823 icmpv6_push_pending_frames(sk, fl6: &fl6, thdr: &tmp_hdr,
824 len: skb->len + sizeof(struct icmp6hdr));
825 reason = SKB_CONSUMED;
826 }
827out_dst_release:
828 dst_release(dst);
829out:
830 icmpv6_xmit_unlock(sk);
831out_bh_enable:
832 local_bh_enable();
833 return reason;
834}
835
836enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
837 u8 code, __be32 info)
838{
839 struct inet6_skb_parm *opt = IP6CB(skb);
840 struct net *net = dev_net_rcu(dev: skb->dev);
841 const struct inet6_protocol *ipprot;
842 enum skb_drop_reason reason;
843 int inner_offset;
844 __be16 frag_off;
845 u8 nexthdr;
846
847 reason = pskb_may_pull_reason(skb, len: sizeof(struct ipv6hdr));
848 if (reason != SKB_NOT_DROPPED_YET)
849 goto out;
850
851 seg6_icmp_srh(skb, opt);
852
853 nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
854 if (ipv6_ext_hdr(nexthdr)) {
855 /* now skip over extension headers */
856 inner_offset = ipv6_skip_exthdr(skb, start: sizeof(struct ipv6hdr),
857 nexthdrp: &nexthdr, frag_offp: &frag_off);
858 if (inner_offset < 0) {
859 SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
860 goto out;
861 }
862 } else {
863 inner_offset = sizeof(struct ipv6hdr);
864 }
865
866 /* Checkin header including 8 bytes of inner protocol header. */
867 reason = pskb_may_pull_reason(skb, len: inner_offset + 8);
868 if (reason != SKB_NOT_DROPPED_YET)
869 goto out;
870
871 /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
872 Without this we will not able f.e. to make source routed
873 pmtu discovery.
874 Corresponding argument (opt) to notifiers is already added.
875 --ANK (980726)
876 */
877
878 ipprot = rcu_dereference(inet6_protos[nexthdr]);
879 if (ipprot && ipprot->err_handler)
880 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
881
882 raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
883 return SKB_CONSUMED;
884
885out:
886 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
887 return reason;
888}
889
890/*
891 * Handle icmp messages
892 */
893
894static int icmpv6_rcv(struct sk_buff *skb)
895{
896 enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
897 struct net *net = dev_net_rcu(dev: skb->dev);
898 struct net_device *dev = icmp6_dev(skb);
899 struct inet6_dev *idev = __in6_dev_get(dev);
900 const struct in6_addr *saddr, *daddr;
901 struct icmp6hdr *hdr;
902 u8 type;
903
904 if (!xfrm6_policy_check(NULL, dir: XFRM_POLICY_IN, skb)) {
905 struct sec_path *sp = skb_sec_path(skb);
906 int nh;
907
908 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
909 XFRM_STATE_ICMP)) {
910 reason = SKB_DROP_REASON_XFRM_POLICY;
911 goto drop_no_count;
912 }
913
914 if (!pskb_may_pull(skb, len: sizeof(*hdr) + sizeof(struct ipv6hdr)))
915 goto drop_no_count;
916
917 nh = skb_network_offset(skb);
918 skb_set_network_header(skb, offset: sizeof(*hdr));
919
920 if (!xfrm6_policy_check_reverse(NULL, dir: XFRM_POLICY_IN,
921 skb)) {
922 reason = SKB_DROP_REASON_XFRM_POLICY;
923 goto drop_no_count;
924 }
925
926 skb_set_network_header(skb, offset: nh);
927 }
928
929 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
930
931 saddr = &ipv6_hdr(skb)->saddr;
932 daddr = &ipv6_hdr(skb)->daddr;
933
934 if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
935 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
936 saddr, daddr);
937 goto csum_error;
938 }
939
940 if (!pskb_pull(skb, len: sizeof(*hdr)))
941 goto discard_it;
942
943 hdr = icmp6_hdr(skb);
944
945 type = hdr->icmp6_type;
946
947 ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
948
949 switch (type) {
950 case ICMPV6_ECHO_REQUEST:
951 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
952 reason = icmpv6_echo_reply(skb);
953 break;
954 case ICMPV6_EXT_ECHO_REQUEST:
955 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
956 READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
957 reason = icmpv6_echo_reply(skb);
958 break;
959
960 case ICMPV6_ECHO_REPLY:
961 case ICMPV6_EXT_ECHO_REPLY:
962 ping_rcv(skb);
963 return 0;
964
965 case ICMPV6_PKT_TOOBIG:
966 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
967 standard destination cache. Seems, only "advanced"
968 destination cache will allow to solve this problem
969 --ANK (980726)
970 */
971 if (!pskb_may_pull(skb, len: sizeof(struct ipv6hdr)))
972 goto discard_it;
973 hdr = icmp6_hdr(skb);
974
975 /* to notify */
976 fallthrough;
977 case ICMPV6_DEST_UNREACH:
978 case ICMPV6_TIME_EXCEED:
979 case ICMPV6_PARAMPROB:
980 reason = icmpv6_notify(skb, type, code: hdr->icmp6_code,
981 info: hdr->icmp6_mtu);
982 break;
983
984 case NDISC_ROUTER_SOLICITATION:
985 case NDISC_ROUTER_ADVERTISEMENT:
986 case NDISC_NEIGHBOUR_SOLICITATION:
987 case NDISC_NEIGHBOUR_ADVERTISEMENT:
988 case NDISC_REDIRECT:
989 reason = ndisc_rcv(skb);
990 break;
991
992 case ICMPV6_MGM_QUERY:
993 igmp6_event_query(skb);
994 return 0;
995
996 case ICMPV6_MGM_REPORT:
997 igmp6_event_report(skb);
998 return 0;
999
1000 case ICMPV6_MGM_REDUCTION:
1001 case ICMPV6_NI_QUERY:
1002 case ICMPV6_NI_REPLY:
1003 case ICMPV6_MLD2_REPORT:
1004 case ICMPV6_DHAAD_REQUEST:
1005 case ICMPV6_DHAAD_REPLY:
1006 case ICMPV6_MOBILE_PREFIX_SOL:
1007 case ICMPV6_MOBILE_PREFIX_ADV:
1008 break;
1009
1010 default:
1011 /* informational */
1012 if (type & ICMPV6_INFOMSG_MASK)
1013 break;
1014
1015 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1016 saddr, daddr);
1017
1018 /*
1019 * error of unknown type.
1020 * must pass to upper level
1021 */
1022
1023 reason = icmpv6_notify(skb, type, code: hdr->icmp6_code,
1024 info: hdr->icmp6_mtu);
1025 }
1026
1027 /* until the v6 path can be better sorted assume failure and
1028 * preserve the status quo behaviour for the rest of the paths to here
1029 */
1030 if (reason)
1031 kfree_skb_reason(skb, reason);
1032 else
1033 consume_skb(skb);
1034
1035 return 0;
1036
1037csum_error:
1038 reason = SKB_DROP_REASON_ICMP_CSUM;
1039 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
1040discard_it:
1041 __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
1042drop_no_count:
1043 kfree_skb_reason(skb, reason);
1044 return 0;
1045}
1046
1047void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1048 const struct in6_addr *saddr,
1049 const struct in6_addr *daddr, int oif)
1050{
1051 memset(s: fl6, c: 0, n: sizeof(*fl6));
1052 fl6->saddr = *saddr;
1053 fl6->daddr = *daddr;
1054 fl6->flowi6_proto = IPPROTO_ICMPV6;
1055 fl6->fl6_icmp_type = type;
1056 fl6->fl6_icmp_code = 0;
1057 fl6->flowi6_oif = oif;
1058 security_sk_classify_flow(sk, flic: flowi6_to_flowi_common(fl6));
1059}
1060
1061int __init icmpv6_init(void)
1062{
1063 struct sock *sk;
1064 int err, i;
1065
1066 for_each_possible_cpu(i) {
1067 err = inet_ctl_sock_create(sk: &sk, PF_INET6,
1068 type: SOCK_RAW, IPPROTO_ICMPV6, net: &init_net);
1069 if (err < 0) {
1070 pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1071 err);
1072 return err;
1073 }
1074
1075 per_cpu(ipv6_icmp_sk, i) = sk;
1076
1077 /* Enough space for 2 64K ICMP packets, including
1078 * sk_buff struct overhead.
1079 */
1080 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1081 }
1082
1083 err = -EAGAIN;
1084 if (inet6_add_protocol(prot: &icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1085 goto fail;
1086
1087 err = inet6_register_icmp_sender(fn: icmp6_send);
1088 if (err)
1089 goto sender_reg_err;
1090 return 0;
1091
1092sender_reg_err:
1093 inet6_del_protocol(prot: &icmpv6_protocol, IPPROTO_ICMPV6);
1094fail:
1095 pr_err("Failed to register ICMP6 protocol\n");
1096 return err;
1097}
1098
1099void icmpv6_cleanup(void)
1100{
1101 inet6_unregister_icmp_sender(fn: icmp6_send);
1102 inet6_del_protocol(prot: &icmpv6_protocol, IPPROTO_ICMPV6);
1103}
1104
1105
1106static const struct icmp6_err {
1107 int err;
1108 int fatal;
1109} tab_unreach[] = {
1110 { /* NOROUTE */
1111 .err = ENETUNREACH,
1112 .fatal = 0,
1113 },
1114 { /* ADM_PROHIBITED */
1115 .err = EACCES,
1116 .fatal = 1,
1117 },
1118 { /* Was NOT_NEIGHBOUR, now reserved */
1119 .err = EHOSTUNREACH,
1120 .fatal = 0,
1121 },
1122 { /* ADDR_UNREACH */
1123 .err = EHOSTUNREACH,
1124 .fatal = 0,
1125 },
1126 { /* PORT_UNREACH */
1127 .err = ECONNREFUSED,
1128 .fatal = 1,
1129 },
1130 { /* POLICY_FAIL */
1131 .err = EACCES,
1132 .fatal = 1,
1133 },
1134 { /* REJECT_ROUTE */
1135 .err = EACCES,
1136 .fatal = 1,
1137 },
1138};
1139
1140int icmpv6_err_convert(u8 type, u8 code, int *err)
1141{
1142 int fatal = 0;
1143
1144 *err = EPROTO;
1145
1146 switch (type) {
1147 case ICMPV6_DEST_UNREACH:
1148 fatal = 1;
1149 if (code < ARRAY_SIZE(tab_unreach)) {
1150 *err = tab_unreach[code].err;
1151 fatal = tab_unreach[code].fatal;
1152 }
1153 break;
1154
1155 case ICMPV6_PKT_TOOBIG:
1156 *err = EMSGSIZE;
1157 break;
1158
1159 case ICMPV6_PARAMPROB:
1160 *err = EPROTO;
1161 fatal = 1;
1162 break;
1163
1164 case ICMPV6_TIME_EXCEED:
1165 *err = EHOSTUNREACH;
1166 break;
1167 }
1168
1169 return fatal;
1170}
1171EXPORT_SYMBOL(icmpv6_err_convert);
1172
1173#ifdef CONFIG_SYSCTL
1174static struct ctl_table ipv6_icmp_table_template[] = {
1175 {
1176 .procname = "ratelimit",
1177 .data = &init_net.ipv6.sysctl.icmpv6_time,
1178 .maxlen = sizeof(int),
1179 .mode = 0644,
1180 .proc_handler = proc_dointvec_ms_jiffies,
1181 },
1182 {
1183 .procname = "echo_ignore_all",
1184 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1185 .maxlen = sizeof(u8),
1186 .mode = 0644,
1187 .proc_handler = proc_dou8vec_minmax,
1188 },
1189 {
1190 .procname = "echo_ignore_multicast",
1191 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1192 .maxlen = sizeof(u8),
1193 .mode = 0644,
1194 .proc_handler = proc_dou8vec_minmax,
1195 },
1196 {
1197 .procname = "echo_ignore_anycast",
1198 .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1199 .maxlen = sizeof(u8),
1200 .mode = 0644,
1201 .proc_handler = proc_dou8vec_minmax,
1202 },
1203 {
1204 .procname = "ratemask",
1205 .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1206 .maxlen = ICMPV6_MSG_MAX + 1,
1207 .mode = 0644,
1208 .proc_handler = proc_do_large_bitmap,
1209 },
1210 {
1211 .procname = "error_anycast_as_unicast",
1212 .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1213 .maxlen = sizeof(u8),
1214 .mode = 0644,
1215 .proc_handler = proc_dou8vec_minmax,
1216 .extra1 = SYSCTL_ZERO,
1217 .extra2 = SYSCTL_ONE,
1218 },
1219};
1220
1221struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1222{
1223 struct ctl_table *table;
1224
1225 table = kmemdup(ipv6_icmp_table_template,
1226 sizeof(ipv6_icmp_table_template),
1227 GFP_KERNEL);
1228
1229 if (table) {
1230 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1231 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1232 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1233 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1234 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1235 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1236 }
1237 return table;
1238}
1239
1240size_t ipv6_icmp_sysctl_table_size(void)
1241{
1242 return ARRAY_SIZE(ipv6_icmp_table_template);
1243}
1244#endif
1245