1187d0738Sbrakmo /* SPDX-License-Identifier: GPL-2.0
2187d0738Sbrakmo *
3187d0738Sbrakmo * Copyright (c) 2019 Facebook
4187d0738Sbrakmo *
5187d0738Sbrakmo * This program is free software; you can redistribute it and/or
6187d0738Sbrakmo * modify it under the terms of version 2 of the GNU General Public
7187d0738Sbrakmo * License as published by the Free Software Foundation.
8187d0738Sbrakmo *
9187d0738Sbrakmo * Include file for sample Host Bandwidth Manager (HBM) BPF programs
10187d0738Sbrakmo */
11187d0738Sbrakmo #define KBUILD_MODNAME "foo"
12187d0738Sbrakmo #include <uapi/linux/bpf.h>
13187d0738Sbrakmo #include <uapi/linux/if_ether.h>
14187d0738Sbrakmo #include <uapi/linux/if_packet.h>
15187d0738Sbrakmo #include <uapi/linux/ip.h>
16187d0738Sbrakmo #include <uapi/linux/ipv6.h>
17187d0738Sbrakmo #include <uapi/linux/in.h>
18187d0738Sbrakmo #include <uapi/linux/tcp.h>
19187d0738Sbrakmo #include <uapi/linux/filter.h>
20187d0738Sbrakmo #include <uapi/linux/pkt_cls.h>
21187d0738Sbrakmo #include <net/ipv6.h>
22187d0738Sbrakmo #include <net/inet_ecn.h>
237cf245a3SToke Høiland-Jørgensen #include <bpf/bpf_endian.h>
247cf245a3SToke Høiland-Jørgensen #include <bpf/bpf_helpers.h>
25187d0738Sbrakmo #include "hbm.h"
26187d0738Sbrakmo
27187d0738Sbrakmo #define DROP_PKT 0
28187d0738Sbrakmo #define ALLOW_PKT 1
29187d0738Sbrakmo #define TCP_ECN_OK 1
3071634d7fSbrakmo #define CWR 2
31187d0738Sbrakmo
32c87f60a7SMichal Rostecki #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging
33c87f60a7SMichal Rostecki #undef bpf_printk
34187d0738Sbrakmo #define bpf_printk(fmt, ...)
35187d0738Sbrakmo #endif
36187d0738Sbrakmo
37187d0738Sbrakmo #define INITIAL_CREDIT_PACKETS 100
38187d0738Sbrakmo #define MAX_BYTES_PER_PACKET 1500
39187d0738Sbrakmo #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET)
40187d0738Sbrakmo #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET)
41187d0738Sbrakmo #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
42187d0738Sbrakmo #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH)
43187d0738Sbrakmo #define LARGE_PKT_THRESH 120
44187d0738Sbrakmo #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET)
45187d0738Sbrakmo #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
46187d0738Sbrakmo
4771634d7fSbrakmo // Time base accounting for fq's EDT
4871634d7fSbrakmo #define BURST_SIZE_NS 100000 // 100us
4971634d7fSbrakmo #define MARK_THRESH_NS 50000 // 50us
5071634d7fSbrakmo #define DROP_THRESH_NS 500000 // 500us
5171634d7fSbrakmo // Reserve 20us of queuing for small packets (less than 120 bytes)
5271634d7fSbrakmo #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
5371634d7fSbrakmo #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
5471634d7fSbrakmo
55187d0738Sbrakmo // rate in bytes per ns << 20
56187d0738Sbrakmo #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
5771634d7fSbrakmo #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
5871634d7fSbrakmo #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
59187d0738Sbrakmo
6036b5d471SAndrii Nakryiko struct {
6136b5d471SAndrii Nakryiko __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
6236b5d471SAndrii Nakryiko __type(key, struct bpf_cgroup_storage_key);
6336b5d471SAndrii Nakryiko __type(value, struct hbm_vqueue);
6436b5d471SAndrii Nakryiko } queue_state SEC(".maps");
65187d0738Sbrakmo
6636b5d471SAndrii Nakryiko struct {
6736b5d471SAndrii Nakryiko __uint(type, BPF_MAP_TYPE_ARRAY);
6836b5d471SAndrii Nakryiko __uint(max_entries, 1);
6936b5d471SAndrii Nakryiko __type(key, u32);
70*c5815ac7SDaniel T. Lee __type(value, struct hbm_queue_stats);
7136b5d471SAndrii Nakryiko } queue_stats SEC(".maps");
72187d0738Sbrakmo
73187d0738Sbrakmo struct hbm_pkt_info {
74d58c6f72Sbrakmo int cwnd;
75d58c6f72Sbrakmo int rtt;
7671634d7fSbrakmo int packets_out;
77187d0738Sbrakmo bool is_ip;
78187d0738Sbrakmo bool is_tcp;
79187d0738Sbrakmo short ecn;
80187d0738Sbrakmo };
81187d0738Sbrakmo
get_tcp_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)82d58c6f72Sbrakmo static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
83d58c6f72Sbrakmo {
84d58c6f72Sbrakmo struct bpf_sock *sk;
85d58c6f72Sbrakmo struct bpf_tcp_sock *tp;
86d58c6f72Sbrakmo
87d58c6f72Sbrakmo sk = skb->sk;
88d58c6f72Sbrakmo if (sk) {
89d58c6f72Sbrakmo sk = bpf_sk_fullsock(sk);
90d58c6f72Sbrakmo if (sk) {
91d58c6f72Sbrakmo if (sk->protocol == IPPROTO_TCP) {
92d58c6f72Sbrakmo tp = bpf_tcp_sock(sk);
93d58c6f72Sbrakmo if (tp) {
94d58c6f72Sbrakmo pkti->cwnd = tp->snd_cwnd;
95d58c6f72Sbrakmo pkti->rtt = tp->srtt_us >> 3;
9671634d7fSbrakmo pkti->packets_out = tp->packets_out;
97d58c6f72Sbrakmo return 0;
98d58c6f72Sbrakmo }
99d58c6f72Sbrakmo }
100d58c6f72Sbrakmo }
101d58c6f72Sbrakmo }
10271634d7fSbrakmo pkti->cwnd = 0;
10371634d7fSbrakmo pkti->rtt = 0;
10471634d7fSbrakmo pkti->packets_out = 0;
105d58c6f72Sbrakmo return 1;
106d58c6f72Sbrakmo }
107d58c6f72Sbrakmo
hbm_get_pkt_info(struct __sk_buff * skb,struct hbm_pkt_info * pkti)10871634d7fSbrakmo static void hbm_get_pkt_info(struct __sk_buff *skb,
109187d0738Sbrakmo struct hbm_pkt_info *pkti)
110187d0738Sbrakmo {
111187d0738Sbrakmo struct iphdr iph;
112187d0738Sbrakmo struct ipv6hdr *ip6h;
113187d0738Sbrakmo
114d58c6f72Sbrakmo pkti->cwnd = 0;
115d58c6f72Sbrakmo pkti->rtt = 0;
116187d0738Sbrakmo bpf_skb_load_bytes(skb, 0, &iph, 12);
117187d0738Sbrakmo if (iph.version == 6) {
118187d0738Sbrakmo ip6h = (struct ipv6hdr *)&iph;
119187d0738Sbrakmo pkti->is_ip = true;
120187d0738Sbrakmo pkti->is_tcp = (ip6h->nexthdr == 6);
121187d0738Sbrakmo pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
122187d0738Sbrakmo } else if (iph.version == 4) {
123187d0738Sbrakmo pkti->is_ip = true;
124187d0738Sbrakmo pkti->is_tcp = (iph.protocol == 6);
125187d0738Sbrakmo pkti->ecn = iph.tos & INET_ECN_MASK;
126187d0738Sbrakmo } else {
127187d0738Sbrakmo pkti->is_ip = false;
128187d0738Sbrakmo pkti->is_tcp = false;
129187d0738Sbrakmo pkti->ecn = 0;
130187d0738Sbrakmo }
131d58c6f72Sbrakmo if (pkti->is_tcp)
132d58c6f72Sbrakmo get_tcp_info(skb, pkti);
133187d0738Sbrakmo }
134187d0738Sbrakmo
hbm_init_vqueue(struct hbm_vqueue * qdp,int rate)135187d0738Sbrakmo static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
136187d0738Sbrakmo {
137187d0738Sbrakmo bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
138187d0738Sbrakmo qdp->lasttime = bpf_ktime_get_ns();
139187d0738Sbrakmo qdp->credit = INIT_CREDIT;
140187d0738Sbrakmo qdp->rate = rate * 128;
141187d0738Sbrakmo }
142187d0738Sbrakmo
hbm_init_edt_vqueue(struct hbm_vqueue * qdp,int rate)14371634d7fSbrakmo static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
14471634d7fSbrakmo int rate)
14571634d7fSbrakmo {
14671634d7fSbrakmo unsigned long long curtime;
14771634d7fSbrakmo
14871634d7fSbrakmo curtime = bpf_ktime_get_ns();
14971634d7fSbrakmo bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
15071634d7fSbrakmo qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst
15171634d7fSbrakmo qdp->credit = 0; // not used
15271634d7fSbrakmo qdp->rate = rate * 128;
15371634d7fSbrakmo }
15471634d7fSbrakmo
hbm_update_stats(struct hbm_queue_stats * qsp,int len,unsigned long long curtime,bool congestion_flag,bool drop_flag,bool cwr_flag,bool ecn_ce_flag,struct hbm_pkt_info * pkti,int credit)155187d0738Sbrakmo static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
156187d0738Sbrakmo int len,
157187d0738Sbrakmo unsigned long long curtime,
158187d0738Sbrakmo bool congestion_flag,
159d58c6f72Sbrakmo bool drop_flag,
160d58c6f72Sbrakmo bool cwr_flag,
161d58c6f72Sbrakmo bool ecn_ce_flag,
162d58c6f72Sbrakmo struct hbm_pkt_info *pkti,
163d58c6f72Sbrakmo int credit)
164187d0738Sbrakmo {
165d58c6f72Sbrakmo int rv = ALLOW_PKT;
166d58c6f72Sbrakmo
167187d0738Sbrakmo if (qsp != NULL) {
168187d0738Sbrakmo // Following is needed for work conserving
169187d0738Sbrakmo __sync_add_and_fetch(&(qsp->bytes_total), len);
170187d0738Sbrakmo if (qsp->stats) {
171187d0738Sbrakmo // Optionally update statistics
172187d0738Sbrakmo if (qsp->firstPacketTime == 0)
173187d0738Sbrakmo qsp->firstPacketTime = curtime;
174187d0738Sbrakmo qsp->lastPacketTime = curtime;
175187d0738Sbrakmo __sync_add_and_fetch(&(qsp->pkts_total), 1);
176d58c6f72Sbrakmo if (congestion_flag) {
177187d0738Sbrakmo __sync_add_and_fetch(&(qsp->pkts_marked), 1);
178187d0738Sbrakmo __sync_add_and_fetch(&(qsp->bytes_marked), len);
179187d0738Sbrakmo }
180187d0738Sbrakmo if (drop_flag) {
181187d0738Sbrakmo __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
182187d0738Sbrakmo __sync_add_and_fetch(&(qsp->bytes_dropped),
183187d0738Sbrakmo len);
184187d0738Sbrakmo }
185d58c6f72Sbrakmo if (ecn_ce_flag)
186d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
187d58c6f72Sbrakmo if (pkti->cwnd) {
188d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->sum_cwnd),
189d58c6f72Sbrakmo pkti->cwnd);
190d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
191d58c6f72Sbrakmo }
192d58c6f72Sbrakmo if (pkti->rtt)
193d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->sum_rtt),
194d58c6f72Sbrakmo pkti->rtt);
195d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->sum_credit), credit);
196d58c6f72Sbrakmo
197d58c6f72Sbrakmo if (drop_flag)
198d58c6f72Sbrakmo rv = DROP_PKT;
199d58c6f72Sbrakmo if (cwr_flag)
200d58c6f72Sbrakmo rv |= 2;
201d58c6f72Sbrakmo if (rv == DROP_PKT)
202d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->returnValCount[0]),
203d58c6f72Sbrakmo 1);
204d58c6f72Sbrakmo else if (rv == ALLOW_PKT)
205d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->returnValCount[1]),
206d58c6f72Sbrakmo 1);
207d58c6f72Sbrakmo else if (rv == 2)
208d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->returnValCount[2]),
209d58c6f72Sbrakmo 1);
210d58c6f72Sbrakmo else if (rv == 3)
211d58c6f72Sbrakmo __sync_add_and_fetch(&(qsp->returnValCount[3]),
212d58c6f72Sbrakmo 1);
213187d0738Sbrakmo }
214187d0738Sbrakmo }
215187d0738Sbrakmo }
216