1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * xfrm_nat_keepalive.c
4 *
5 * (c) 2024 Eyal Birger <eyal.birger@gmail.com>
6 */
7
8#include <net/inet_common.h>
9#include <net/ip6_checksum.h>
10#include <net/xfrm.h>
11
12static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv4) = {
13 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
14};
15#if IS_ENABLED(CONFIG_IPV6)
16static DEFINE_PER_CPU(struct sock_bh_locked, nat_keepalive_sk_ipv6) = {
17 .bh_lock = INIT_LOCAL_LOCK(bh_lock),
18};
19#endif
20
21struct nat_keepalive {
22 struct net *net;
23 u16 family;
24 xfrm_address_t saddr;
25 xfrm_address_t daddr;
26 __be16 encap_sport;
27 __be16 encap_dport;
28 __u32 smark;
29};
30
31static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x)
32{
33 ka->net = xs_net(x);
34 ka->family = x->props.family;
35 ka->saddr = x->props.saddr;
36 ka->daddr = x->id.daddr;
37 ka->encap_sport = x->encap->encap_sport;
38 ka->encap_dport = x->encap->encap_dport;
39 ka->smark = xfrm_smark_get(mark: 0, x);
40}
41
42static int nat_keepalive_send_ipv4(struct sk_buff *skb,
43 struct nat_keepalive *ka)
44{
45 struct net *net = ka->net;
46 struct flowi4 fl4;
47 struct rtable *rt;
48 struct sock *sk;
49 __u8 tos = 0;
50 int err;
51
52 flowi4_init_output(fl4: &fl4, oif: 0 /* oif */, mark: skb->mark, tos,
53 scope: RT_SCOPE_UNIVERSE, IPPROTO_UDP, flags: 0,
54 daddr: ka->daddr.a4, saddr: ka->saddr.a4, dport: ka->encap_dport,
55 sport: ka->encap_sport, uid: sock_net_uid(net, NULL));
56
57 rt = ip_route_output_key(net, flp: &fl4);
58 if (IS_ERR(ptr: rt))
59 return PTR_ERR(ptr: rt);
60
61 skb_dst_set(skb, dst: &rt->dst);
62
63 local_lock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
64 sk = this_cpu_read(nat_keepalive_sk_ipv4.sock);
65 sock_net_set(sk, net);
66 err = ip_build_and_send_pkt(skb, sk, saddr: fl4.saddr, daddr: fl4.daddr, NULL, tos);
67 sock_net_set(sk, net: &init_net);
68 local_unlock_nested_bh(&nat_keepalive_sk_ipv4.bh_lock);
69 return err;
70}
71
72#if IS_ENABLED(CONFIG_IPV6)
73static int nat_keepalive_send_ipv6(struct sk_buff *skb,
74 struct nat_keepalive *ka,
75 struct udphdr *uh)
76{
77 struct net *net = ka->net;
78 struct dst_entry *dst;
79 struct flowi6 fl6;
80 struct sock *sk;
81 __wsum csum;
82 int err;
83
84 csum = skb_checksum(skb, offset: 0, len: skb->len, csum: 0);
85 uh->check = csum_ipv6_magic(saddr: &ka->saddr.in6, daddr: &ka->daddr.in6,
86 len: skb->len, IPPROTO_UDP, sum: csum);
87 if (uh->check == 0)
88 uh->check = CSUM_MANGLED_0;
89
90 memset(s: &fl6, c: 0, n: sizeof(fl6));
91 fl6.flowi6_mark = skb->mark;
92 fl6.saddr = ka->saddr.in6;
93 fl6.daddr = ka->daddr.in6;
94 fl6.flowi6_proto = IPPROTO_UDP;
95 fl6.fl6_sport = ka->encap_sport;
96 fl6.fl6_dport = ka->encap_dport;
97
98 local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
99 sk = this_cpu_read(nat_keepalive_sk_ipv6.sock);
100 sock_net_set(sk, net);
101 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL);
102 if (IS_ERR(ptr: dst)) {
103 local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
104 return PTR_ERR(ptr: dst);
105 }
106
107 skb_dst_set(skb, dst);
108 err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0);
109 sock_net_set(sk, net: &init_net);
110 local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock);
111 return err;
112}
113#endif
114
115static void nat_keepalive_send(struct nat_keepalive *ka)
116{
117 const int nat_ka_hdrs_len = max(sizeof(struct iphdr),
118 sizeof(struct ipv6hdr)) +
119 sizeof(struct udphdr);
120 const u8 nat_ka_payload = 0xFF;
121 int err = -EAFNOSUPPORT;
122 struct sk_buff *skb;
123 struct udphdr *uh;
124
125 skb = alloc_skb(size: nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC);
126 if (unlikely(!skb))
127 return;
128
129 skb_reserve(skb, len: nat_ka_hdrs_len);
130
131 skb_put_u8(skb, val: nat_ka_payload);
132
133 uh = skb_push(skb, len: sizeof(*uh));
134 uh->source = ka->encap_sport;
135 uh->dest = ka->encap_dport;
136 uh->len = htons(skb->len);
137 uh->check = 0;
138
139 skb->mark = ka->smark;
140
141 switch (ka->family) {
142 case AF_INET:
143 err = nat_keepalive_send_ipv4(skb, ka);
144 break;
145#if IS_ENABLED(CONFIG_IPV6)
146 case AF_INET6:
147 err = nat_keepalive_send_ipv6(skb, ka, uh);
148 break;
149#endif
150 }
151 if (err)
152 kfree_skb(skb);
153}
154
155struct nat_keepalive_work_ctx {
156 time64_t next_run;
157 time64_t now;
158};
159
160static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr)
161{
162 struct nat_keepalive_work_ctx *ctx = ptr;
163 bool send_keepalive = false;
164 struct nat_keepalive ka;
165 time64_t next_run;
166 u32 interval;
167 int delta;
168
169 interval = x->nat_keepalive_interval;
170 if (!interval)
171 return 0;
172
173 spin_lock(lock: &x->lock);
174
175 delta = (int)(ctx->now - x->lastused);
176 if (delta < interval) {
177 x->nat_keepalive_expiration = ctx->now + interval - delta;
178 next_run = x->nat_keepalive_expiration;
179 } else if (x->nat_keepalive_expiration > ctx->now) {
180 next_run = x->nat_keepalive_expiration;
181 } else {
182 next_run = ctx->now + interval;
183 nat_keepalive_init(ka: &ka, x);
184 send_keepalive = true;
185 }
186
187 spin_unlock(lock: &x->lock);
188
189 if (send_keepalive)
190 nat_keepalive_send(ka: &ka);
191
192 if (!ctx->next_run || next_run < ctx->next_run)
193 ctx->next_run = next_run;
194 return 0;
195}
196
197static void nat_keepalive_work(struct work_struct *work)
198{
199 struct nat_keepalive_work_ctx ctx;
200 struct xfrm_state_walk walk;
201 struct net *net;
202
203 ctx.next_run = 0;
204 ctx.now = ktime_get_real_seconds();
205
206 net = container_of(work, struct net, xfrm.nat_keepalive_work.work);
207 xfrm_state_walk_init(walk: &walk, IPPROTO_ESP, NULL);
208 xfrm_state_walk(net, walk: &walk, func: nat_keepalive_work_single, &ctx);
209 xfrm_state_walk_done(walk: &walk, net);
210 if (ctx.next_run)
211 schedule_delayed_work(dwork: &net->xfrm.nat_keepalive_work,
212 delay: (ctx.next_run - ctx.now) * HZ);
213}
214
215static int nat_keepalive_sk_init(struct sock_bh_locked __percpu *socks,
216 unsigned short family)
217{
218 struct sock *sk;
219 int err, i;
220
221 for_each_possible_cpu(i) {
222 err = inet_ctl_sock_create(sk: &sk, family, type: SOCK_RAW, IPPROTO_UDP,
223 net: &init_net);
224 if (err < 0)
225 goto err;
226
227 per_cpu_ptr(socks, i)->sock = sk;
228 }
229
230 return 0;
231err:
232 for_each_possible_cpu(i)
233 inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
234 return err;
235}
236
237static void nat_keepalive_sk_fini(struct sock_bh_locked __percpu *socks)
238{
239 int i;
240
241 for_each_possible_cpu(i)
242 inet_ctl_sock_destroy(per_cpu_ptr(socks, i)->sock);
243}
244
245void xfrm_nat_keepalive_state_updated(struct xfrm_state *x)
246{
247 struct net *net;
248
249 if (!x->nat_keepalive_interval)
250 return;
251
252 net = xs_net(x);
253 schedule_delayed_work(dwork: &net->xfrm.nat_keepalive_work, delay: 0);
254}
255
256int __net_init xfrm_nat_keepalive_net_init(struct net *net)
257{
258 INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work);
259 return 0;
260}
261
262int xfrm_nat_keepalive_net_fini(struct net *net)
263{
264 cancel_delayed_work_sync(dwork: &net->xfrm.nat_keepalive_work);
265 return 0;
266}
267
268int xfrm_nat_keepalive_init(unsigned short family)
269{
270 int err = -EAFNOSUPPORT;
271
272 switch (family) {
273 case AF_INET:
274 err = nat_keepalive_sk_init(socks: &nat_keepalive_sk_ipv4, PF_INET);
275 break;
276#if IS_ENABLED(CONFIG_IPV6)
277 case AF_INET6:
278 err = nat_keepalive_sk_init(socks: &nat_keepalive_sk_ipv6, PF_INET6);
279 break;
280#endif
281 }
282
283 if (err)
284 pr_err("xfrm nat keepalive init: failed to init err:%d\n", err);
285 return err;
286}
287EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init);
288
289void xfrm_nat_keepalive_fini(unsigned short family)
290{
291 switch (family) {
292 case AF_INET:
293 nat_keepalive_sk_fini(socks: &nat_keepalive_sk_ipv4);
294 break;
295#if IS_ENABLED(CONFIG_IPV6)
296 case AF_INET6:
297 nat_keepalive_sk_fini(socks: &nat_keepalive_sk_ipv6);
298 break;
299#endif
300 }
301}
302EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini);
303