1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
21da177e4SLinus Torvalds #ifndef _INET_ECN_H_
31da177e4SLinus Torvalds #define _INET_ECN_H_
41da177e4SLinus Torvalds
51da177e4SLinus Torvalds #include <linux/ip.h>
62566a509SThomas Graf #include <linux/skbuff.h>
7d7bf2ebeSToke Høiland-Jørgensen #include <linux/if_vlan.h>
814c85021SArnaldo Carvalho de Melo
914c85021SArnaldo Carvalho de Melo #include <net/inet_sock.h>
101da177e4SLinus Torvalds #include <net/dsfield.h>
110780b414SToke Høiland-Jørgensen #include <net/checksum.h>
121da177e4SLinus Torvalds
131da177e4SLinus Torvalds enum {
141da177e4SLinus Torvalds INET_ECN_NOT_ECT = 0,
151da177e4SLinus Torvalds INET_ECN_ECT_1 = 1,
161da177e4SLinus Torvalds INET_ECN_ECT_0 = 2,
171da177e4SLinus Torvalds INET_ECN_CE = 3,
181da177e4SLinus Torvalds INET_ECN_MASK = 3,
191da177e4SLinus Torvalds };
201da177e4SLinus Torvalds
21eccc1bb8Sstephen hemminger extern int sysctl_tunnel_ecn_log;
22eccc1bb8Sstephen hemminger
INET_ECN_is_ce(__u8 dsfield)231da177e4SLinus Torvalds static inline int INET_ECN_is_ce(__u8 dsfield)
241da177e4SLinus Torvalds {
251da177e4SLinus Torvalds return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
261da177e4SLinus Torvalds }
271da177e4SLinus Torvalds
INET_ECN_is_not_ect(__u8 dsfield)281da177e4SLinus Torvalds static inline int INET_ECN_is_not_ect(__u8 dsfield)
291da177e4SLinus Torvalds {
301da177e4SLinus Torvalds return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
311da177e4SLinus Torvalds }
321da177e4SLinus Torvalds
INET_ECN_is_capable(__u8 dsfield)331da177e4SLinus Torvalds static inline int INET_ECN_is_capable(__u8 dsfield)
341da177e4SLinus Torvalds {
35a02cec21SEric Dumazet return dsfield & INET_ECN_ECT_0;
361da177e4SLinus Torvalds }
371da177e4SLinus Torvalds
38b5d9c9c2SEric Dumazet /*
39b5d9c9c2SEric Dumazet * RFC 3168 9.1.1
40b5d9c9c2SEric Dumazet * The full-functionality option for ECN encapsulation is to copy the
41b5d9c9c2SEric Dumazet * ECN codepoint of the inside header to the outside header on
42b5d9c9c2SEric Dumazet * encapsulation if the inside header is not-ECT or ECT, and to set the
43b5d9c9c2SEric Dumazet * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
44b5d9c9c2SEric Dumazet * the inside header is CE.
45b5d9c9c2SEric Dumazet */
INET_ECN_encapsulate(__u8 outer,__u8 inner)461da177e4SLinus Torvalds static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
471da177e4SLinus Torvalds {
481da177e4SLinus Torvalds outer &= ~INET_ECN_MASK;
491da177e4SLinus Torvalds outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
501da177e4SLinus Torvalds INET_ECN_ECT_0;
511da177e4SLinus Torvalds return outer;
521da177e4SLinus Torvalds }
531da177e4SLinus Torvalds
INET_ECN_xmit(struct sock * sk)54ca067070SSteinar H. Gunderson static inline void INET_ECN_xmit(struct sock *sk)
55ca067070SSteinar H. Gunderson {
56ca067070SSteinar H. Gunderson inet_sk(sk)->tos |= INET_ECN_ECT_0;
57ca067070SSteinar H. Gunderson if (inet6_sk(sk) != NULL)
58ca067070SSteinar H. Gunderson inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
59ca067070SSteinar H. Gunderson }
60ca067070SSteinar H. Gunderson
INET_ECN_dontxmit(struct sock * sk)61ca067070SSteinar H. Gunderson static inline void INET_ECN_dontxmit(struct sock *sk)
62ca067070SSteinar H. Gunderson {
63ca067070SSteinar H. Gunderson inet_sk(sk)->tos &= ~INET_ECN_MASK;
64ca067070SSteinar H. Gunderson if (inet6_sk(sk) != NULL)
65ca067070SSteinar H. Gunderson inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
66ca067070SSteinar H. Gunderson }
671da177e4SLinus Torvalds
681da177e4SLinus Torvalds #define IP6_ECN_flow_init(label) do { \
691da177e4SLinus Torvalds (label) &= ~htonl(INET_ECN_MASK << 20); \
701da177e4SLinus Torvalds } while (0)
711da177e4SLinus Torvalds
721da177e4SLinus Torvalds #define IP6_ECN_flow_xmit(sk, label) do { \
73e9df2e8fSYOSHIFUJI Hideaki if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \
7495026cd2SAl Viro (label) |= htonl(INET_ECN_ECT_0 << 20); \
751da177e4SLinus Torvalds } while (0)
761da177e4SLinus Torvalds
IP_ECN_set_ce(struct iphdr * iph)772566a509SThomas Graf static inline int IP_ECN_set_ce(struct iphdr *iph)
781da177e4SLinus Torvalds {
791da177e4SLinus Torvalds u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
800780b414SToke Høiland-Jørgensen __be16 check_add;
811da177e4SLinus Torvalds
821da177e4SLinus Torvalds /*
831da177e4SLinus Torvalds * After the last operation we have (in binary):
841da177e4SLinus Torvalds * INET_ECN_NOT_ECT => 01
851da177e4SLinus Torvalds * INET_ECN_ECT_1 => 10
861da177e4SLinus Torvalds * INET_ECN_ECT_0 => 11
871da177e4SLinus Torvalds * INET_ECN_CE => 00
881da177e4SLinus Torvalds */
891da177e4SLinus Torvalds if (!(ecn & 2))
902566a509SThomas Graf return !ecn;
911da177e4SLinus Torvalds
921da177e4SLinus Torvalds /*
931da177e4SLinus Torvalds * The following gives us:
941da177e4SLinus Torvalds * INET_ECN_ECT_1 => check += htons(0xFFFD)
951da177e4SLinus Torvalds * INET_ECN_ECT_0 => check += htons(0xFFFE)
961da177e4SLinus Torvalds */
970780b414SToke Høiland-Jørgensen check_add = (__force __be16)((__force u16)htons(0xFFFB) +
980780b414SToke Høiland-Jørgensen (__force u16)htons(ecn));
991da177e4SLinus Torvalds
1000780b414SToke Høiland-Jørgensen iph->check = csum16_add(iph->check, check_add);
1011da177e4SLinus Torvalds iph->tos |= INET_ECN_CE;
1022566a509SThomas Graf return 1;
1031da177e4SLinus Torvalds }
1041da177e4SLinus Torvalds
IP_ECN_set_ect1(struct iphdr * iph)105b7237487SToke Høiland-Jørgensen static inline int IP_ECN_set_ect1(struct iphdr *iph)
106b7237487SToke Høiland-Jørgensen {
107b7237487SToke Høiland-Jørgensen if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
108b7237487SToke Høiland-Jørgensen return 0;
109b7237487SToke Høiland-Jørgensen
1100780b414SToke Høiland-Jørgensen iph->check = csum16_add(iph->check, htons(0x1));
111b7237487SToke Høiland-Jørgensen iph->tos ^= INET_ECN_MASK;
112b7237487SToke Høiland-Jørgensen return 1;
113b7237487SToke Høiland-Jørgensen }
114b7237487SToke Høiland-Jørgensen
IP_ECN_clear(struct iphdr * iph)1151da177e4SLinus Torvalds static inline void IP_ECN_clear(struct iphdr *iph)
1161da177e4SLinus Torvalds {
1171da177e4SLinus Torvalds iph->tos &= ~INET_ECN_MASK;
1181da177e4SLinus Torvalds }
1191da177e4SLinus Torvalds
ipv4_copy_dscp(unsigned int dscp,struct iphdr * inner)12029bb43b4SHerbert Xu static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
1211da177e4SLinus Torvalds {
12229bb43b4SHerbert Xu dscp &= ~INET_ECN_MASK;
1231da177e4SLinus Torvalds ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
1241da177e4SLinus Torvalds }
1251da177e4SLinus Torvalds
1261da177e4SLinus Torvalds struct ipv6hdr;
1271da177e4SLinus Torvalds
12834ae6a1aSEric Dumazet /* Note:
12934ae6a1aSEric Dumazet * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
13034ae6a1aSEric Dumazet * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
13134ae6a1aSEric Dumazet * In IPv6 case, no checksum compensates the change in IPv6 header,
13234ae6a1aSEric Dumazet * so we have to update skb->csum.
13334ae6a1aSEric Dumazet */
IP6_ECN_set_ce(struct sk_buff * skb,struct ipv6hdr * iph)13434ae6a1aSEric Dumazet static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
1351da177e4SLinus Torvalds {
13634ae6a1aSEric Dumazet __be32 from, to;
13734ae6a1aSEric Dumazet
1381da177e4SLinus Torvalds if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
1392566a509SThomas Graf return 0;
14034ae6a1aSEric Dumazet
14134ae6a1aSEric Dumazet from = *(__be32 *)iph;
14234ae6a1aSEric Dumazet to = from | htonl(INET_ECN_CE << 20);
14334ae6a1aSEric Dumazet *(__be32 *)iph = to;
14434ae6a1aSEric Dumazet if (skb->ip_summed == CHECKSUM_COMPLETE)
145c15c0ab1SJohannes Berg skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
146c15c0ab1SJohannes Berg (__force __wsum)to);
1472566a509SThomas Graf return 1;
1481da177e4SLinus Torvalds }
1491da177e4SLinus Torvalds
IP6_ECN_set_ect1(struct sk_buff * skb,struct ipv6hdr * iph)150b7237487SToke Høiland-Jørgensen static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
151b7237487SToke Høiland-Jørgensen {
152b7237487SToke Høiland-Jørgensen __be32 from, to;
153b7237487SToke Høiland-Jørgensen
154b7237487SToke Høiland-Jørgensen if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
155b7237487SToke Høiland-Jørgensen return 0;
156b7237487SToke Høiland-Jørgensen
157b7237487SToke Høiland-Jørgensen from = *(__be32 *)iph;
158b7237487SToke Høiland-Jørgensen to = from ^ htonl(INET_ECN_MASK << 20);
159b7237487SToke Høiland-Jørgensen *(__be32 *)iph = to;
160b7237487SToke Høiland-Jørgensen if (skb->ip_summed == CHECKSUM_COMPLETE)
161b7237487SToke Høiland-Jørgensen skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
162b7237487SToke Høiland-Jørgensen (__force __wsum)to);
163b7237487SToke Høiland-Jørgensen return 1;
164b7237487SToke Høiland-Jørgensen }
165b7237487SToke Høiland-Jørgensen
ipv6_copy_dscp(unsigned int dscp,struct ipv6hdr * inner)16629bb43b4SHerbert Xu static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
1671da177e4SLinus Torvalds {
16829bb43b4SHerbert Xu dscp &= ~INET_ECN_MASK;
1691da177e4SLinus Torvalds ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
1701da177e4SLinus Torvalds }
1711da177e4SLinus Torvalds
INET_ECN_set_ce(struct sk_buff * skb)1722566a509SThomas Graf static inline int INET_ECN_set_ce(struct sk_buff *skb)
1732566a509SThomas Graf {
174d7bf2ebeSToke Høiland-Jørgensen switch (skb_protocol(skb, true)) {
175f3a7c66bSHarvey Harrison case cpu_to_be16(ETH_P_IP):
176ced14f68SSimon Horman if (skb_network_header(skb) + sizeof(struct iphdr) <=
177ced14f68SSimon Horman skb_tail_pointer(skb))
178eddc9ec5SArnaldo Carvalho de Melo return IP_ECN_set_ce(ip_hdr(skb));
1792566a509SThomas Graf break;
1802566a509SThomas Graf
181f3a7c66bSHarvey Harrison case cpu_to_be16(ETH_P_IPV6):
182ced14f68SSimon Horman if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
183ced14f68SSimon Horman skb_tail_pointer(skb))
18434ae6a1aSEric Dumazet return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
1852566a509SThomas Graf break;
1862566a509SThomas Graf }
1872566a509SThomas Graf
1882566a509SThomas Graf return 0;
1892566a509SThomas Graf }
1902566a509SThomas Graf
skb_get_dsfield(struct sk_buff * skb)191*70e939ddSEric Dumazet static inline int skb_get_dsfield(struct sk_buff *skb)
192*70e939ddSEric Dumazet {
193*70e939ddSEric Dumazet switch (skb_protocol(skb, true)) {
194*70e939ddSEric Dumazet case cpu_to_be16(ETH_P_IP):
195*70e939ddSEric Dumazet if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
196*70e939ddSEric Dumazet break;
197*70e939ddSEric Dumazet return ipv4_get_dsfield(ip_hdr(skb));
198*70e939ddSEric Dumazet
199*70e939ddSEric Dumazet case cpu_to_be16(ETH_P_IPV6):
200*70e939ddSEric Dumazet if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
201*70e939ddSEric Dumazet break;
202*70e939ddSEric Dumazet return ipv6_get_dsfield(ipv6_hdr(skb));
203*70e939ddSEric Dumazet }
204*70e939ddSEric Dumazet
205*70e939ddSEric Dumazet return -1;
206*70e939ddSEric Dumazet }
207*70e939ddSEric Dumazet
INET_ECN_set_ect1(struct sk_buff * skb)208b7237487SToke Høiland-Jørgensen static inline int INET_ECN_set_ect1(struct sk_buff *skb)
209b7237487SToke Høiland-Jørgensen {
210d7bf2ebeSToke Høiland-Jørgensen switch (skb_protocol(skb, true)) {
211b7237487SToke Høiland-Jørgensen case cpu_to_be16(ETH_P_IP):
212b7237487SToke Høiland-Jørgensen if (skb_network_header(skb) + sizeof(struct iphdr) <=
213b7237487SToke Høiland-Jørgensen skb_tail_pointer(skb))
214b7237487SToke Høiland-Jørgensen return IP_ECN_set_ect1(ip_hdr(skb));
215b7237487SToke Høiland-Jørgensen break;
216b7237487SToke Høiland-Jørgensen
217b7237487SToke Høiland-Jørgensen case cpu_to_be16(ETH_P_IPV6):
218b7237487SToke Høiland-Jørgensen if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
219b7237487SToke Høiland-Jørgensen skb_tail_pointer(skb))
220b7237487SToke Høiland-Jørgensen return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
221b7237487SToke Høiland-Jørgensen break;
222b7237487SToke Høiland-Jørgensen }
223b7237487SToke Høiland-Jørgensen
224b7237487SToke Høiland-Jørgensen return 0;
225b7237487SToke Høiland-Jørgensen }
226b7237487SToke Høiland-Jørgensen
227eccc1bb8Sstephen hemminger /*
228d28071d1SNeal Cardwell * RFC 6040 4.2
229eccc1bb8Sstephen hemminger * To decapsulate the inner header at the tunnel egress, a compliant
230eccc1bb8Sstephen hemminger * tunnel egress MUST set the outgoing ECN field to the codepoint at the
231eccc1bb8Sstephen hemminger * intersection of the appropriate arriving inner header (row) and outer
232eccc1bb8Sstephen hemminger * header (column) in Figure 4
233eccc1bb8Sstephen hemminger *
234eccc1bb8Sstephen hemminger * +---------+------------------------------------------------+
235eccc1bb8Sstephen hemminger * |Arriving | Arriving Outer Header |
236eccc1bb8Sstephen hemminger * | Inner +---------+------------+------------+------------+
237eccc1bb8Sstephen hemminger * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
238eccc1bb8Sstephen hemminger * +---------+---------+------------+------------+------------+
239eccc1bb8Sstephen hemminger * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
240eccc1bb8Sstephen hemminger * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE |
241eccc1bb8Sstephen hemminger * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE |
242eccc1bb8Sstephen hemminger * | CE | CE | CE | CE(!!!)| CE |
243eccc1bb8Sstephen hemminger * +---------+---------+------------+------------+------------+
244eccc1bb8Sstephen hemminger *
245eccc1bb8Sstephen hemminger * Figure 4: New IP in IP Decapsulation Behaviour
246eccc1bb8Sstephen hemminger *
247eccc1bb8Sstephen hemminger * returns 0 on success
248eccc1bb8Sstephen hemminger * 1 if something is broken and should be logged (!!! above)
249eccc1bb8Sstephen hemminger * 2 if packet should be dropped
250eccc1bb8Sstephen hemminger */
__INET_ECN_decapsulate(__u8 outer,__u8 inner,bool * set_ce)25128e45033SIdo Schimmel static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
252eccc1bb8Sstephen hemminger {
253eccc1bb8Sstephen hemminger if (INET_ECN_is_not_ect(inner)) {
254eccc1bb8Sstephen hemminger switch (outer & INET_ECN_MASK) {
255eccc1bb8Sstephen hemminger case INET_ECN_NOT_ECT:
256eccc1bb8Sstephen hemminger return 0;
257eccc1bb8Sstephen hemminger case INET_ECN_ECT_0:
258eccc1bb8Sstephen hemminger case INET_ECN_ECT_1:
259eccc1bb8Sstephen hemminger return 1;
260eccc1bb8Sstephen hemminger case INET_ECN_CE:
261eccc1bb8Sstephen hemminger return 2;
262eccc1bb8Sstephen hemminger }
263eccc1bb8Sstephen hemminger }
264eccc1bb8Sstephen hemminger
26528e45033SIdo Schimmel *set_ce = INET_ECN_is_ce(outer);
26628e45033SIdo Schimmel return 0;
26728e45033SIdo Schimmel }
26828e45033SIdo Schimmel
INET_ECN_decapsulate(struct sk_buff * skb,__u8 outer,__u8 inner)26928e45033SIdo Schimmel static inline int INET_ECN_decapsulate(struct sk_buff *skb,
27028e45033SIdo Schimmel __u8 outer, __u8 inner)
27128e45033SIdo Schimmel {
27228e45033SIdo Schimmel bool set_ce = false;
27328e45033SIdo Schimmel int rc;
27428e45033SIdo Schimmel
27528e45033SIdo Schimmel rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
276b7237487SToke Høiland-Jørgensen if (!rc) {
277b7237487SToke Høiland-Jørgensen if (set_ce)
278eccc1bb8Sstephen hemminger INET_ECN_set_ce(skb);
279b7237487SToke Høiland-Jørgensen else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
280b7237487SToke Høiland-Jørgensen INET_ECN_set_ect1(skb);
281b7237487SToke Høiland-Jørgensen }
282eccc1bb8Sstephen hemminger
28328e45033SIdo Schimmel return rc;
284eccc1bb8Sstephen hemminger }
285eccc1bb8Sstephen hemminger
IP_ECN_decapsulate(const struct iphdr * oiph,struct sk_buff * skb)286eccc1bb8Sstephen hemminger static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
287eccc1bb8Sstephen hemminger struct sk_buff *skb)
288eccc1bb8Sstephen hemminger {
289eccc1bb8Sstephen hemminger __u8 inner;
290eccc1bb8Sstephen hemminger
291d7bf2ebeSToke Høiland-Jørgensen switch (skb_protocol(skb, true)) {
292d7bf2ebeSToke Høiland-Jørgensen case htons(ETH_P_IP):
293eccc1bb8Sstephen hemminger inner = ip_hdr(skb)->tos;
294d7bf2ebeSToke Høiland-Jørgensen break;
295d7bf2ebeSToke Høiland-Jørgensen case htons(ETH_P_IPV6):
296eccc1bb8Sstephen hemminger inner = ipv6_get_dsfield(ipv6_hdr(skb));
297d7bf2ebeSToke Høiland-Jørgensen break;
298d7bf2ebeSToke Høiland-Jørgensen default:
299eccc1bb8Sstephen hemminger return 0;
300d7bf2ebeSToke Høiland-Jørgensen }
301eccc1bb8Sstephen hemminger
302eccc1bb8Sstephen hemminger return INET_ECN_decapsulate(skb, oiph->tos, inner);
303eccc1bb8Sstephen hemminger }
304eccc1bb8Sstephen hemminger
IP6_ECN_decapsulate(const struct ipv6hdr * oipv6h,struct sk_buff * skb)305eccc1bb8Sstephen hemminger static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
306eccc1bb8Sstephen hemminger struct sk_buff *skb)
307eccc1bb8Sstephen hemminger {
308eccc1bb8Sstephen hemminger __u8 inner;
309eccc1bb8Sstephen hemminger
310d7bf2ebeSToke Høiland-Jørgensen switch (skb_protocol(skb, true)) {
311d7bf2ebeSToke Høiland-Jørgensen case htons(ETH_P_IP):
312eccc1bb8Sstephen hemminger inner = ip_hdr(skb)->tos;
313d7bf2ebeSToke Høiland-Jørgensen break;
314d7bf2ebeSToke Høiland-Jørgensen case htons(ETH_P_IPV6):
315eccc1bb8Sstephen hemminger inner = ipv6_get_dsfield(ipv6_hdr(skb));
316d7bf2ebeSToke Høiland-Jørgensen break;
317d7bf2ebeSToke Høiland-Jørgensen default:
318eccc1bb8Sstephen hemminger return 0;
319d7bf2ebeSToke Høiland-Jørgensen }
320eccc1bb8Sstephen hemminger
321eccc1bb8Sstephen hemminger return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
322eccc1bb8Sstephen hemminger }
3231da177e4SLinus Torvalds #endif
324