1cb7f6a7bSJulius Volz /* 2cb7f6a7bSJulius Volz * ip_vs_est.c: simple rate estimator for IPVS 3cb7f6a7bSJulius Volz * 4cb7f6a7bSJulius Volz * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5cb7f6a7bSJulius Volz * 6cb7f6a7bSJulius Volz * This program is free software; you can redistribute it and/or 7cb7f6a7bSJulius Volz * modify it under the terms of the GNU General Public License 8cb7f6a7bSJulius Volz * as published by the Free Software Foundation; either version 9cb7f6a7bSJulius Volz * 2 of the License, or (at your option) any later version. 10cb7f6a7bSJulius Volz * 1129c2026fSHans Schillstrom * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 1229c2026fSHans Schillstrom * Network name space (netns) aware. 1329c2026fSHans Schillstrom * Global data moved to netns i.e struct netns_ipvs 1429c2026fSHans Schillstrom * Affected data: est_list and est_lock. 1529c2026fSHans Schillstrom * estimation_timer() runs with timer per netns. 1629c2026fSHans Schillstrom * get_stats()) do the per cpu summing. 17cb7f6a7bSJulius Volz */ 189aada7acSHannes Eder 199aada7acSHannes Eder #define KMSG_COMPONENT "IPVS" 209aada7acSHannes Eder #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 219aada7acSHannes Eder 22cb7f6a7bSJulius Volz #include <linux/kernel.h> 23cb7f6a7bSJulius Volz #include <linux/jiffies.h> 24cb7f6a7bSJulius Volz #include <linux/types.h> 25cb7f6a7bSJulius Volz #include <linux/interrupt.h> 26cb7f6a7bSJulius Volz #include <linux/sysctl.h> 27cb7f6a7bSJulius Volz #include <linux/list.h> 28cb7f6a7bSJulius Volz 29cb7f6a7bSJulius Volz #include <net/ip_vs.h> 30cb7f6a7bSJulius Volz 31cb7f6a7bSJulius Volz /* 32cb7f6a7bSJulius Volz This code is to estimate rate in a shorter interval (such as 8 33cb7f6a7bSJulius Volz seconds) for virtual services and real servers. For measure rate in a 34cb7f6a7bSJulius Volz long interval, it is easy to implement a user level daemon which 35cb7f6a7bSJulius Volz periodically reads those statistical counters and measure rate. 36cb7f6a7bSJulius Volz 37cb7f6a7bSJulius Volz Currently, the measurement is activated by slow timer handler. Hope 38cb7f6a7bSJulius Volz this measurement will not introduce too much load. 39cb7f6a7bSJulius Volz 40cb7f6a7bSJulius Volz We measure rate during the last 8 seconds every 2 seconds: 41cb7f6a7bSJulius Volz 42cb7f6a7bSJulius Volz avgrate = avgrate*(1-W) + rate*W 43cb7f6a7bSJulius Volz 44cb7f6a7bSJulius Volz where W = 2^(-2) 45cb7f6a7bSJulius Volz 46cb7f6a7bSJulius Volz NOTES. 47cb7f6a7bSJulius Volz 48cd67cd5eSJulian Anastasov * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10. 49cb7f6a7bSJulius Volz 50cd67cd5eSJulian Anastasov * Netlink users can see 64-bit values but sockopt users are restricted 51cd67cd5eSJulian Anastasov to 32-bit values for conns, packets, bps, cps and pps. 52cd67cd5eSJulian Anastasov 53cd67cd5eSJulian Anastasov * A lot of code is taken from net/core/gen_estimator.c 54cb7f6a7bSJulius Volz */ 55cb7f6a7bSJulius Volz 56cb7f6a7bSJulius Volz 57b17fc996SHans Schillstrom /* 58b17fc996SHans Schillstrom * Make a summary from each cpu 59b17fc996SHans Schillstrom */ 60cd67cd5eSJulian Anastasov static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, 61b962abdcSJulian Anastasov struct ip_vs_cpu_stats __percpu *stats) 62b17fc996SHans Schillstrom { 63b17fc996SHans Schillstrom int i; 64d1ee4feaSJulian Anastasov bool add = false; 65b17fc996SHans Schillstrom 66b17fc996SHans Schillstrom for_each_possible_cpu(i) { 67b17fc996SHans Schillstrom struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 68b17fc996SHans Schillstrom unsigned int start; 69cd67cd5eSJulian Anastasov u64 conns, inpkts, outpkts, inbytes, outbytes; 70cd67cd5eSJulian Anastasov 71d1ee4feaSJulian Anastasov if (add) { 72b17fc996SHans Schillstrom do { 734a569c0cSJulian Anastasov start = u64_stats_fetch_begin(&s->syncp); 74cd67cd5eSJulian Anastasov conns = s->cnt.conns; 75cd67cd5eSJulian Anastasov inpkts = s->cnt.inpkts; 76cd67cd5eSJulian Anastasov outpkts = s->cnt.outpkts; 77cd67cd5eSJulian Anastasov inbytes = s->cnt.inbytes; 78cd67cd5eSJulian Anastasov outbytes = s->cnt.outbytes; 794a569c0cSJulian Anastasov } while (u64_stats_fetch_retry(&s->syncp, start)); 80cd67cd5eSJulian Anastasov sum->conns += conns; 81cd67cd5eSJulian Anastasov sum->inpkts += inpkts; 82cd67cd5eSJulian Anastasov sum->outpkts += outpkts; 83b17fc996SHans Schillstrom sum->inbytes += inbytes; 84b17fc996SHans Schillstrom sum->outbytes += outbytes; 85b17fc996SHans Schillstrom } else { 86d1ee4feaSJulian Anastasov add = true; 87b17fc996SHans Schillstrom do { 884a569c0cSJulian Anastasov start = u64_stats_fetch_begin(&s->syncp); 89cd67cd5eSJulian Anastasov sum->conns = s->cnt.conns; 90cd67cd5eSJulian Anastasov sum->inpkts = s->cnt.inpkts; 91cd67cd5eSJulian Anastasov sum->outpkts = s->cnt.outpkts; 92cd67cd5eSJulian Anastasov sum->inbytes = s->cnt.inbytes; 93cd67cd5eSJulian Anastasov sum->outbytes = s->cnt.outbytes; 944a569c0cSJulian Anastasov } while (u64_stats_fetch_retry(&s->syncp, start)); 95b17fc996SHans Schillstrom } 96b17fc996SHans Schillstrom } 97b17fc996SHans Schillstrom } 98b17fc996SHans Schillstrom 99b17fc996SHans Schillstrom 100cb7f6a7bSJulius Volz static void estimation_timer(unsigned long arg) 101cb7f6a7bSJulius Volz { 102cb7f6a7bSJulius Volz struct ip_vs_estimator *e; 103cb7f6a7bSJulius Volz struct ip_vs_stats *s; 104cd67cd5eSJulian Anastasov u64 rate; 10529c2026fSHans Schillstrom struct net *net = (struct net *)arg; 10629c2026fSHans Schillstrom struct netns_ipvs *ipvs; 107cb7f6a7bSJulius Volz 10829c2026fSHans Schillstrom ipvs = net_ipvs(net); 10929c2026fSHans Schillstrom spin_lock(&ipvs->est_lock); 11029c2026fSHans Schillstrom list_for_each_entry(e, &ipvs->est_list, list) { 111cb7f6a7bSJulius Volz s = container_of(e, struct ip_vs_stats, est); 112cb7f6a7bSJulius Volz 113cb7f6a7bSJulius Volz spin_lock(&s->lock); 114cd67cd5eSJulian Anastasov ip_vs_read_cpu_stats(&s->kstats, s->cpustats); 115cb7f6a7bSJulius Volz 116cb7f6a7bSJulius Volz /* scaled by 2^10, but divided 2 seconds */ 117cd67cd5eSJulian Anastasov rate = (s->kstats.conns - e->last_conns) << 9; 118cd67cd5eSJulian Anastasov e->last_conns = s->kstats.conns; 119cd67cd5eSJulian Anastasov e->cps += ((s64)rate - (s64)e->cps) >> 2; 120cb7f6a7bSJulius Volz 121cd67cd5eSJulian Anastasov rate = (s->kstats.inpkts - e->last_inpkts) << 9; 122cd67cd5eSJulian Anastasov e->last_inpkts = s->kstats.inpkts; 123cd67cd5eSJulian Anastasov e->inpps += ((s64)rate - (s64)e->inpps) >> 2; 124cb7f6a7bSJulius Volz 125cd67cd5eSJulian Anastasov rate = (s->kstats.outpkts - e->last_outpkts) << 9; 126cd67cd5eSJulian Anastasov e->last_outpkts = s->kstats.outpkts; 127cd67cd5eSJulian Anastasov e->outpps += ((s64)rate - (s64)e->outpps) >> 2; 128cb7f6a7bSJulius Volz 129cd67cd5eSJulian Anastasov /* scaled by 2^5, but divided 2 seconds */ 130cd67cd5eSJulian Anastasov rate = (s->kstats.inbytes - e->last_inbytes) << 4; 131cd67cd5eSJulian Anastasov e->last_inbytes = s->kstats.inbytes; 132cd67cd5eSJulian Anastasov e->inbps += ((s64)rate - (s64)e->inbps) >> 2; 133cb7f6a7bSJulius Volz 134cd67cd5eSJulian Anastasov rate = (s->kstats.outbytes - e->last_outbytes) << 4; 135cd67cd5eSJulian Anastasov e->last_outbytes = s->kstats.outbytes; 136cd67cd5eSJulian Anastasov e->outbps += ((s64)rate - (s64)e->outbps) >> 2; 137cb7f6a7bSJulius Volz spin_unlock(&s->lock); 138cb7f6a7bSJulius Volz } 13929c2026fSHans Schillstrom spin_unlock(&ipvs->est_lock); 14029c2026fSHans Schillstrom mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 141cb7f6a7bSJulius Volz } 142cb7f6a7bSJulius Volz 1430f34d54bSEric W. Biederman void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) 144cb7f6a7bSJulius Volz { 145cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 146cb7f6a7bSJulius Volz 147cb7f6a7bSJulius Volz INIT_LIST_HEAD(&est->list); 148cb7f6a7bSJulius Volz 14929c2026fSHans Schillstrom spin_lock_bh(&ipvs->est_lock); 15029c2026fSHans Schillstrom list_add(&est->list, &ipvs->est_list); 15129c2026fSHans Schillstrom spin_unlock_bh(&ipvs->est_lock); 152cb7f6a7bSJulius Volz } 153cb7f6a7bSJulius Volz 1540f34d54bSEric W. Biederman void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) 155cb7f6a7bSJulius Volz { 156cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 157cb7f6a7bSJulius Volz 15829c2026fSHans Schillstrom spin_lock_bh(&ipvs->est_lock); 159cb7f6a7bSJulius Volz list_del(&est->list); 16029c2026fSHans Schillstrom spin_unlock_bh(&ipvs->est_lock); 161cb7f6a7bSJulius Volz } 162cb7f6a7bSJulius Volz 163cb7f6a7bSJulius Volz void ip_vs_zero_estimator(struct ip_vs_stats *stats) 164cb7f6a7bSJulius Volz { 165cb7f6a7bSJulius Volz struct ip_vs_estimator *est = &stats->est; 166cd67cd5eSJulian Anastasov struct ip_vs_kstats *k = &stats->kstats; 167cb7f6a7bSJulius Volz 16855a3d4e1SJulian Anastasov /* reset counters, caller must hold the stats->lock lock */ 169cd67cd5eSJulian Anastasov est->last_inbytes = k->inbytes; 170cd67cd5eSJulian Anastasov est->last_outbytes = k->outbytes; 171cd67cd5eSJulian Anastasov est->last_conns = k->conns; 172cd67cd5eSJulian Anastasov est->last_inpkts = k->inpkts; 173cd67cd5eSJulian Anastasov est->last_outpkts = k->outpkts; 174cb7f6a7bSJulius Volz est->cps = 0; 175cb7f6a7bSJulius Volz est->inpps = 0; 176cb7f6a7bSJulius Volz est->outpps = 0; 177cb7f6a7bSJulius Volz est->inbps = 0; 178cb7f6a7bSJulius Volz est->outbps = 0; 179cb7f6a7bSJulius Volz } 180cb7f6a7bSJulius Volz 181ea9f22ccSJulian Anastasov /* Get decoded rates */ 182cd67cd5eSJulian Anastasov void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) 183ea9f22ccSJulian Anastasov { 184ea9f22ccSJulian Anastasov struct ip_vs_estimator *e = &stats->est; 185ea9f22ccSJulian Anastasov 186ea9f22ccSJulian Anastasov dst->cps = (e->cps + 0x1FF) >> 10; 187ea9f22ccSJulian Anastasov dst->inpps = (e->inpps + 0x1FF) >> 10; 188ea9f22ccSJulian Anastasov dst->outpps = (e->outpps + 0x1FF) >> 10; 189ea9f22ccSJulian Anastasov dst->inbps = (e->inbps + 0xF) >> 5; 190ea9f22ccSJulian Anastasov dst->outbps = (e->outbps + 0xF) >> 5; 191ea9f22ccSJulian Anastasov } 192ea9f22ccSJulian Anastasov 193503cf15aSHans Schillstrom int __net_init ip_vs_estimator_net_init(struct net *net) 19461b1ab45SHans Schillstrom { 19529c2026fSHans Schillstrom struct netns_ipvs *ipvs = net_ipvs(net); 19629c2026fSHans Schillstrom 19729c2026fSHans Schillstrom INIT_LIST_HEAD(&ipvs->est_list); 19829c2026fSHans Schillstrom spin_lock_init(&ipvs->est_lock); 19929c2026fSHans Schillstrom setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); 20029c2026fSHans Schillstrom mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); 20161b1ab45SHans Schillstrom return 0; 20261b1ab45SHans Schillstrom } 20361b1ab45SHans Schillstrom 204503cf15aSHans Schillstrom void __net_exit ip_vs_estimator_net_cleanup(struct net *net) 20529c2026fSHans Schillstrom { 20629c2026fSHans Schillstrom del_timer_sync(&net_ipvs(net)->est_timer); 20729c2026fSHans Schillstrom } 208