12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2cb7f6a7bSJulius Volz /* 3cb7f6a7bSJulius Volz * ip_vs_est.c: simple rate estimator for IPVS 4cb7f6a7bSJulius Volz * 5cb7f6a7bSJulius Volz * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 6cb7f6a7bSJulius Volz * 729c2026fSHans Schillstrom * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 829c2026fSHans Schillstrom * Network name space (netns) aware. 929c2026fSHans Schillstrom * Global data moved to netns i.e struct netns_ipvs 1029c2026fSHans Schillstrom * Affected data: est_list and est_lock. 1129c2026fSHans Schillstrom * estimation_timer() runs with timer per netns. 1229c2026fSHans Schillstrom * get_stats()) do the per cpu summing. 13cb7f6a7bSJulius Volz */ 149aada7acSHannes Eder 159aada7acSHannes Eder #define KMSG_COMPONENT "IPVS" 169aada7acSHannes Eder #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 179aada7acSHannes Eder 18cb7f6a7bSJulius Volz #include <linux/kernel.h> 19cb7f6a7bSJulius Volz #include <linux/jiffies.h> 20cb7f6a7bSJulius Volz #include <linux/types.h> 21cb7f6a7bSJulius Volz #include <linux/interrupt.h> 22cb7f6a7bSJulius Volz #include <linux/sysctl.h> 23cb7f6a7bSJulius Volz #include <linux/list.h> 24cb7f6a7bSJulius Volz 25cb7f6a7bSJulius Volz #include <net/ip_vs.h> 26cb7f6a7bSJulius Volz 27cb7f6a7bSJulius Volz /* 28cb7f6a7bSJulius Volz This code is to estimate rate in a shorter interval (such as 8 29cb7f6a7bSJulius Volz seconds) for virtual services and real servers. For measure rate in a 30cb7f6a7bSJulius Volz long interval, it is easy to implement a user level daemon which 31cb7f6a7bSJulius Volz periodically reads those statistical counters and measure rate. 32cb7f6a7bSJulius Volz 33cb7f6a7bSJulius Volz Currently, the measurement is activated by slow timer handler. Hope 34cb7f6a7bSJulius Volz this measurement will not introduce too much load. 35cb7f6a7bSJulius Volz 36cb7f6a7bSJulius Volz We measure rate during the last 8 seconds every 2 seconds: 37cb7f6a7bSJulius Volz 38cb7f6a7bSJulius Volz avgrate = avgrate*(1-W) + rate*W 39cb7f6a7bSJulius Volz 40cb7f6a7bSJulius Volz where W = 2^(-2) 41cb7f6a7bSJulius Volz 42cb7f6a7bSJulius Volz NOTES. 43cb7f6a7bSJulius Volz 44cd67cd5eSJulian Anastasov * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. 45cb7f6a7bSJulius Volz 46cd67cd5eSJulian Anastasov * Netlink users can see 64-bit values but sockopt users are restricted 47cd67cd5eSJulian Anastasov to 32-bit values for conns, packets, bps, cps and pps. 48cd67cd5eSJulian Anastasov 49cd67cd5eSJulian Anastasov * A lot of code is taken from net/core/gen_estimator.c 50cb7f6a7bSJulius Volz */ 51cb7f6a7bSJulius Volz 52cb7f6a7bSJulius Volz 53b17fc996SHans Schillstrom /* 54b17fc996SHans Schillstrom * Make a summary from each cpu 55b17fc996SHans Schillstrom */ 56cd67cd5eSJulian Anastasov static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, 57b962abdcSJulian Anastasov struct ip_vs_cpu_stats __percpu *stats) 58b17fc996SHans Schillstrom { 59b17fc996SHans Schillstrom int i; 60d1ee4feaSJulian Anastasov bool add = false; 61b17fc996SHans Schillstrom 62b17fc996SHans Schillstrom for_each_possible_cpu(i) { 63b17fc996SHans Schillstrom struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 64b17fc996SHans Schillstrom unsigned int start; 65cd67cd5eSJulian Anastasov u64 conns, inpkts, outpkts, inbytes, outbytes; 66cd67cd5eSJulian Anastasov 67d1ee4feaSJulian Anastasov if (add) { 68b17fc996SHans Schillstrom do { 694a569c0cSJulian Anastasov start = u64_stats_fetch_begin(&s->syncp); 70cd67cd5eSJulian Anastasov conns = s->cnt.conns; 71cd67cd5eSJulian Anastasov inpkts = s->cnt.inpkts; 72cd67cd5eSJulian Anastasov outpkts = s->cnt.outpkts; 73cd67cd5eSJulian Anastasov inbytes = s->cnt.inbytes; 74cd67cd5eSJulian Anastasov outbytes = s->cnt.outbytes; 754a569c0cSJulian Anastasov } while (u64_stats_fetch_retry(&s->syncp, start)); 76cd67cd5eSJulian Anastasov sum->conns += conns; 77cd67cd5eSJulian Anastasov sum->inpkts += inpkts; 78cd67cd5eSJulian Anastasov sum->outpkts += outpkts; 79b17fc996SHans Schillstrom sum->inbytes += inbytes; 80b17fc996SHans Schillstrom sum->outbytes += outbytes; 81b17fc996SHans Schillstrom } else { 82d1ee4feaSJulian Anastasov add = true; 83b17fc996SHans Schillstrom do { 844a569c0cSJulian Anastasov start = u64_stats_fetch_begin(&s->syncp); 85cd67cd5eSJulian Anastasov sum->conns = s->cnt.conns; 86cd67cd5eSJulian Anastasov sum->inpkts = s->cnt.inpkts; 87cd67cd5eSJulian Anastasov sum->outpkts = s->cnt.outpkts; 88cd67cd5eSJulian Anastasov sum->inbytes = s->cnt.inbytes; 89cd67cd5eSJulian Anastasov sum->outbytes = s->cnt.outbytes; 904a569c0cSJulian Anastasov } while (u64_stats_fetch_retry(&s->syncp, start)); 91b17fc996SHans Schillstrom } 92b17fc996SHans Schillstrom } 93b17fc996SHans Schillstrom } 94b17fc996SHans Schillstrom 95b17fc996SHans Schillstrom 968ef81c65SKees Cook static void estimation_timer(struct timer_list *t) 97cb7f6a7bSJulius Volz { 98cb7f6a7bSJulius Volz struct ip_vs_estimator *e; 99cb7f6a7bSJulius Volz struct ip_vs_stats *s; 100cd67cd5eSJulian Anastasov u64 rate; 1018ef81c65SKees Cook struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); 102cb7f6a7bSJulius Volz 103*2232642eSDust Li if (!sysctl_run_estimation(ipvs)) 104*2232642eSDust Li goto skip; 105*2232642eSDust Li 10629c2026fSHans Schillstrom spin_lock(&ipvs->est_lock); 10729c2026fSHans Schillstrom list_for_each_entry(e, &ipvs->est_list, list) { 108cb7f6a7bSJulius Volz s = container_of(e, struct ip_vs_stats, est); 109cb7f6a7bSJulius Volz 110cb7f6a7bSJulius Volz spin_lock(&s->lock); 111cd67cd5eSJulian Anastasov ip_vs_read_cpu_stats(&s->kstats, s->cpustats); 112cb7f6a7bSJulius Volz 113cb7f6a7bSJulius Volz /* scaled by 2^10, but divided 2 seconds */ 114cd67cd5eSJulian Anastasov rate = (s->kstats.conns - e->last_conns) << 9; 115cd67cd5eSJulian Anastasov e->last_conns = s->kstats.conns; 116cd67cd5eSJulian Anastasov e->cps += ((s64)rate - (s64)e->cps) >> 2; 117cb7f6a7bSJulius Volz 118cd67cd5eSJulian Anastasov rate = (s->kstats.inpkts - e->last_inpkts) << 9; 119cd67cd5eSJulian Anastasov e->last_inpkts = s->kstats.inpkts; 120cd67cd5eSJulian Anastasov e->inpps += ((s64)rate - (s64)e->inpps) >> 2; 121cb7f6a7bSJulius Volz 122cd67cd5eSJulian Anastasov rate = (s->kstats.outpkts - e->last_outpkts) << 9; 123cd67cd5eSJulian Anastasov e->last_outpkts = s->kstats.outpkts; 124cd67cd5eSJulian Anastasov e->outpps += ((s64)rate - (s64)e->outpps) >> 2; 125cb7f6a7bSJulius Volz 126cd67cd5eSJulian Anastasov /* scaled by 2^5, but divided 2 seconds */ 127cd67cd5eSJulian Anastasov rate = (s->kstats.inbytes - e->last_inbytes) << 4; 128cd67cd5eSJulian Anastasov e->last_inbytes = s->kstats.inbytes; 129cd67cd5eSJulian Anastasov e->inbps += ((s64)rate - (s64)e->inbps) >> 2; 130cb7f6a7bSJulius Volz 131cd67cd5eSJulian Anastasov rate = (s->kstats.outbytes - e->last_outbytes) << 4; 132cd67cd5eSJulian Anastasov e->last_outbytes = s->kstats.outbytes; 133cd67cd5eSJulian Anastasov e->outbps += ((s64)rate - (s64)e->outbps) >> 2; 134cb7f6a7bSJulius Volz spin_unlock(&s->lock); 135cb7f6a7bSJulius Volz } 13629c2026fSHans Schillstrom spin_unlock(&ipvs->est_lock); 137*2232642eSDust Li 138*2232642eSDust Li skip: 13929c2026fSHans Schillstrom mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 140cb7f6a7bSJulius Volz } 141cb7f6a7bSJulius Volz 1420f34d54bSEric W. Biederman void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) 143cb7f6a7bSJulius Volz { 144cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 145cb7f6a7bSJulius Volz 146cb7f6a7bSJulius Volz INIT_LIST_HEAD(&est->list); 147cb7f6a7bSJulius Volz 14829c2026fSHans Schillstrom spin_lock_bh(&ipvs->est_lock); 14929c2026fSHans Schillstrom list_add(&est->list, &ipvs->est_list); 15029c2026fSHans Schillstrom spin_unlock_bh(&ipvs->est_lock); 151cb7f6a7bSJulius Volz } 152cb7f6a7bSJulius Volz 1530f34d54bSEric W. Biederman void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) 154cb7f6a7bSJulius Volz { 155cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 156cb7f6a7bSJulius Volz 15729c2026fSHans Schillstrom spin_lock_bh(&ipvs->est_lock); 158cb7f6a7bSJulius Volz list_del(&est->list); 15929c2026fSHans Schillstrom spin_unlock_bh(&ipvs->est_lock); 160cb7f6a7bSJulius Volz } 161cb7f6a7bSJulius Volz 162cb7f6a7bSJulius Volz void ip_vs_zero_estimator(struct ip_vs_stats *stats) 163cb7f6a7bSJulius Volz { 164cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 165cd67cd5eSJulian Anastasov struct ip_vs_kstats *k = &stats->kstats; 166cb7f6a7bSJulius Volz 16755a3d4e1SJulian Anastasov /* reset counters, caller must hold the stats->lock lock */ 168cd67cd5eSJulian Anastasov est->last_inbytes = k->inbytes; 169cd67cd5eSJulian Anastasov est->last_outbytes = k->outbytes; 170cd67cd5eSJulian Anastasov est->last_conns = k->conns; 171cd67cd5eSJulian Anastasov est->last_inpkts = k->inpkts; 172cd67cd5eSJulian Anastasov est->last_outpkts = k->outpkts; 173cb7f6a7bSJulius Volz est->cps = 0; 174cb7f6a7bSJulius Volz est->inpps = 0; 175cb7f6a7bSJulius Volz est->outpps = 0; 176cb7f6a7bSJulius Volz est->inbps = 0; 177cb7f6a7bSJulius Volz est->outbps = 0; 178cb7f6a7bSJulius Volz } 179cb7f6a7bSJulius Volz 180ea9f22ccSJulian Anastasov /* Get decoded rates */ 181cd67cd5eSJulian Anastasov void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) 182ea9f22ccSJulian Anastasov { 183ea9f22ccSJulian Anastasov struct ip_vs_estimator *e = &stats->est; 184ea9f22ccSJulian Anastasov 185ea9f22ccSJulian Anastasov dst->cps = (e->cps + 0x1FF) >> 10; 186ea9f22ccSJulian Anastasov dst->inpps = (e->inpps + 0x1FF) >> 10; 187ea9f22ccSJulian Anastasov dst->outpps = (e->outpps + 0x1FF) >> 10; 188ea9f22ccSJulian Anastasov dst->inbps = (e->inbps + 0xF) >> 5; 189ea9f22ccSJulian Anastasov dst->outbps = (e->outbps + 0xF) >> 5; 190ea9f22ccSJulian Anastasov } 191ea9f22ccSJulian Anastasov 192a4dd0360SEric W. Biederman int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) 19361b1ab45SHans Schillstrom { 19429c2026fSHans Schillstrom INIT_LIST_HEAD(&ipvs->est_list); 19529c2026fSHans Schillstrom spin_lock_init(&ipvs->est_lock); 1968ef81c65SKees Cook timer_setup(&ipvs->est_timer, estimation_timer, 0); 19729c2026fSHans Schillstrom mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); 19861b1ab45SHans Schillstrom return 0; 19961b1ab45SHans Schillstrom } 20061b1ab45SHans Schillstrom 201a4dd0360SEric W. Biederman void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs) 20229c2026fSHans Schillstrom { 203a4dd0360SEric W. Biederman del_timer_sync(&ipvs->est_timer); 20429c2026fSHans Schillstrom } 205