1/*
2 * IPv6 specific functions of netfilter core
3 *
4 * Rusty Russell (C) 2000 -- This code is GPL.
5 * Patrick McHardy (C) 2006-2012
6 */
7#include <linux/kernel.h>
8#include <linux/init.h>
9#include <linux/ipv6.h>
10#include <linux/netfilter.h>
11#include <linux/netfilter_ipv6.h>
12#include <linux/export.h>
13#include <net/addrconf.h>
14#include <net/dst.h>
15#include <net/ipv6.h>
16#include <net/ip6_route.h>
17#include <net/xfrm.h>
18#include <net/netfilter/nf_queue.h>
19#include <net/netfilter/nf_conntrack_bridge.h>
20#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
21#include "../bridge/br_private.h"
22
23int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
24{
25 const struct ipv6hdr *iph = ipv6_hdr(skb);
26 struct sock *sk = sk_to_full_sk(sk: sk_partial);
27 struct net_device *dev = skb_dst_dev(skb);
28 struct flow_keys flkeys;
29 unsigned int hh_len;
30 struct dst_entry *dst;
31 int strict = (ipv6_addr_type(addr: &iph->daddr) &
32 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
33 struct flowi6 fl6 = {
34 .flowi6_l3mdev = l3mdev_master_ifindex(dev),
35 .flowi6_mark = skb->mark,
36 .flowi6_uid = sock_net_uid(net, sk),
37 .daddr = iph->daddr,
38 .saddr = iph->saddr,
39 .flowlabel = ip6_flowinfo(hdr: iph),
40 };
41 int err;
42
43 if (sk && sk->sk_bound_dev_if)
44 fl6.flowi6_oif = sk->sk_bound_dev_if;
45 else if (strict)
46 fl6.flowi6_oif = dev->ifindex;
47
48 fib6_rules_early_flow_dissect(net, skb, fl6: &fl6, flkeys: &flkeys);
49 dst = ip6_route_output(net, sk, fl6: &fl6);
50 err = dst->error;
51 if (err) {
52 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
53 net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
54 dst_release(dst);
55 return err;
56 }
57
58 /* Drop old route. */
59 skb_dst_drop(skb);
60
61 skb_dst_set(skb, dst);
62
63#ifdef CONFIG_XFRM
64 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
65 xfrm_decode_session(net, skb, fl: flowi6_to_flowi(fl6: &fl6), AF_INET6) == 0) {
66 /* ignore return value from skb_dstref_steal, xfrm_lookup takes
67 * care of dropping the refcnt if needed.
68 */
69 skb_dstref_steal(skb);
70 dst = xfrm_lookup(net, dst_orig: dst, fl: flowi6_to_flowi(fl6: &fl6), sk, flags: 0);
71 if (IS_ERR(ptr: dst))
72 return PTR_ERR(ptr: dst);
73 skb_dst_set(skb, dst);
74 }
75#endif
76
77 /* Change in oif may mean change in hh_len. */
78 hh_len = skb_dst_dev(skb)->hard_header_len;
79 if (skb_headroom(skb) < hh_len &&
80 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
81 ntail: 0, GFP_ATOMIC))
82 return -ENOMEM;
83
84 return 0;
85}
86EXPORT_SYMBOL(ip6_route_me_harder);
87
88static int nf_ip6_reroute(struct sk_buff *skb,
89 const struct nf_queue_entry *entry)
90{
91 struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
92
93 if (entry->state.hook == NF_INET_LOCAL_OUT) {
94 const struct ipv6hdr *iph = ipv6_hdr(skb);
95 if (!ipv6_addr_equal(a1: &iph->daddr, a2: &rt_info->daddr) ||
96 !ipv6_addr_equal(a1: &iph->saddr, a2: &rt_info->saddr) ||
97 skb->mark != rt_info->mark)
98 return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
99 }
100 return 0;
101}
102
103int __nf_ip6_route(struct net *net, struct dst_entry **dst,
104 struct flowi *fl, bool strict)
105{
106 static const struct ipv6_pinfo fake_pinfo;
107 static const struct inet_sock fake_sk = {
108 /* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
109 .sk.sk_bound_dev_if = 1,
110 .pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
111 };
112 const void *sk = strict ? &fake_sk : NULL;
113 struct dst_entry *result;
114 int err;
115
116 result = ip6_route_output(net, sk, fl6: &fl->u.ip6);
117 err = result->error;
118 if (err)
119 dst_release(dst: result);
120 else
121 *dst = result;
122 return err;
123}
124EXPORT_SYMBOL_GPL(__nf_ip6_route);
125
126int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
127 struct nf_bridge_frag_data *data,
128 int (*output)(struct net *, struct sock *sk,
129 const struct nf_bridge_frag_data *data,
130 struct sk_buff *))
131{
132 int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
133 u8 tstamp_type = skb->tstamp_type;
134 ktime_t tstamp = skb->tstamp;
135 struct ip6_frag_state state;
136 u8 *prevhdr, nexthdr = 0;
137 unsigned int mtu, hlen;
138 int hroom, err = 0;
139 __be32 frag_id;
140
141 err = ip6_find_1stfragopt(skb, nexthdr: &prevhdr);
142 if (err < 0)
143 goto blackhole;
144 hlen = err;
145 nexthdr = *prevhdr;
146
147 mtu = skb->dev->mtu;
148 if (frag_max_size > mtu ||
149 frag_max_size < IPV6_MIN_MTU)
150 goto blackhole;
151
152 mtu = frag_max_size;
153 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
154 goto blackhole;
155 mtu -= hlen + sizeof(struct frag_hdr);
156
157 frag_id = ipv6_select_ident(net, daddr: &ipv6_hdr(skb)->daddr,
158 saddr: &ipv6_hdr(skb)->saddr);
159
160 if (skb->ip_summed == CHECKSUM_PARTIAL &&
161 (err = skb_checksum_help(skb)))
162 goto blackhole;
163
164 hroom = LL_RESERVED_SPACE(skb->dev);
165 if (skb_has_frag_list(skb)) {
166 unsigned int first_len = skb_pagelen(skb);
167 struct ip6_fraglist_iter iter;
168 struct sk_buff *frag2;
169
170 if (first_len - hlen > mtu)
171 goto blackhole;
172
173 if (skb_cloned(skb) ||
174 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
175 goto slow_path;
176
177 skb_walk_frags(skb, frag2) {
178 if (frag2->len > mtu)
179 goto blackhole;
180
181 /* Partially cloned skb? */
182 if (skb_shared(skb: frag2) ||
183 skb_headroom(skb: frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
184 goto slow_path;
185 }
186
187 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
188 iter: &iter);
189 if (err < 0)
190 goto blackhole;
191
192 for (;;) {
193 /* Prepare header of the next frame,
194 * before previous one went down.
195 */
196 if (iter.frag)
197 ip6_fraglist_prepare(skb, iter: &iter);
198
199 skb_set_delivery_time(skb, kt: tstamp, tstamp_type);
200 err = output(net, sk, data, skb);
201 if (err || !iter.frag)
202 break;
203
204 skb = ip6_fraglist_next(iter: &iter);
205 }
206
207 kfree(objp: iter.tmp_hdr);
208 if (!err)
209 return 0;
210
211 kfree_skb_list(segs: iter.frag);
212 return err;
213 }
214slow_path:
215 /* This is a linearized skbuff, the original geometry is lost for us.
216 * This may also be a clone skbuff, we could preserve the geometry for
217 * the copies but probably not worth the effort.
218 */
219 ip6_frag_init(skb, hlen, mtu, needed_tailroom: skb->dev->needed_tailroom,
220 LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
221 state: &state);
222
223 while (state.left > 0) {
224 struct sk_buff *skb2;
225
226 skb2 = ip6_frag_next(skb, state: &state);
227 if (IS_ERR(ptr: skb2)) {
228 err = PTR_ERR(ptr: skb2);
229 goto blackhole;
230 }
231
232 skb_set_delivery_time(skb: skb2, kt: tstamp, tstamp_type);
233 err = output(net, sk, data, skb2);
234 if (err)
235 goto blackhole;
236 }
237 consume_skb(skb);
238 return err;
239
240blackhole:
241 kfree_skb(skb);
242 return 0;
243}
244EXPORT_SYMBOL_GPL(br_ip6_fragment);
245
246static const struct nf_ipv6_ops ipv6ops = {
247#if IS_MODULE(CONFIG_IPV6)
248 .chk_addr = ipv6_chk_addr,
249 .route_me_harder = ip6_route_me_harder,
250 .dev_get_saddr = ipv6_dev_get_saddr,
251 .route = __nf_ip6_route,
252#if IS_ENABLED(CONFIG_SYN_COOKIES)
253 .cookie_init_sequence = __cookie_v6_init_sequence,
254 .cookie_v6_check = __cookie_v6_check,
255#endif
256#endif
257 .route_input = ip6_route_input,
258 .fragment = ip6_fragment,
259 .reroute = nf_ip6_reroute,
260#if IS_MODULE(CONFIG_IPV6)
261 .br_fragment = br_ip6_fragment,
262#endif
263};
264
265int __init ipv6_netfilter_init(void)
266{
267 RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
268 return 0;
269}
270
271/* This can be called from inet6_init() on errors, so it cannot
272 * be marked __exit. -DaveM
273 */
274void ipv6_netfilter_fini(void)
275{
276 RCU_INIT_POINTER(nf_ipv6_ops, NULL);
277}
278