1 /* 2 * Monitoring code for network dropped packet alerts 3 * 4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> 5 */ 6 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/string.h> 10 #include <linux/if_arp.h> 11 #include <linux/inetdevice.h> 12 #include <linux/inet.h> 13 #include <linux/interrupt.h> 14 #include <linux/netpoll.h> 15 #include <linux/sched.h> 16 #include <linux/delay.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 #include <linux/netlink.h> 20 #include <linux/net_dropmon.h> 21 #include <linux/percpu.h> 22 #include <linux/timer.h> 23 #include <linux/bitops.h> 24 #include <linux/slab.h> 25 #include <net/genetlink.h> 26 #include <net/netevent.h> 27 28 #include <trace/events/skb.h> 29 #include <trace/events/napi.h> 30 31 #include <asm/unaligned.h> 32 33 #define TRACE_ON 1 34 #define TRACE_OFF 0 35 36 static void send_dm_alert(struct work_struct *unused); 37 38 39 /* 40 * Globals, our netlink socket pointer 41 * and the work handle that will send up 42 * netlink alerts 43 */ 44 static int trace_state = TRACE_OFF; 45 static DEFINE_MUTEX(trace_state_mutex); 46 47 struct per_cpu_dm_data { 48 struct work_struct dm_alert_work; 49 struct sk_buff __rcu *skb; 50 atomic_t dm_hit_count; 51 struct timer_list send_timer; 52 int cpu; 53 }; 54 55 struct dm_hw_stat_delta { 56 struct net_device *dev; 57 unsigned long last_rx; 58 struct list_head list; 59 struct rcu_head rcu; 60 unsigned long last_drop_val; 61 }; 62 63 static struct genl_family net_drop_monitor_family = { 64 .id = GENL_ID_GENERATE, 65 .hdrsize = 0, 66 .name = "NET_DM", 67 .version = 2, 68 .maxattr = NET_DM_CMD_MAX, 69 }; 70 71 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 72 73 static int dm_hit_limit = 64; 74 static int dm_delay = 1; 75 static unsigned long dm_hw_check_delta = 2*HZ; 76 static LIST_HEAD(hw_stats_list); 77 78 static void reset_per_cpu_data(struct per_cpu_dm_data *data) 79 { 80 size_t al; 81 struct net_dm_alert_msg *msg; 82 struct nlattr *nla; 83 struct sk_buff *skb; 84 struct sk_buff *oskb = rcu_dereference_protected(data->skb, 1); 85 86 al = sizeof(struct net_dm_alert_msg); 87 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 88 al += sizeof(struct nlattr); 89 90 skb = genlmsg_new(al, GFP_KERNEL); 91 92 if (skb) { 93 genlmsg_put(skb, 0, 0, &net_drop_monitor_family, 94 0, NET_DM_CMD_ALERT); 95 nla = nla_reserve(skb, NLA_UNSPEC, 96 sizeof(struct net_dm_alert_msg)); 97 msg = nla_data(nla); 98 memset(msg, 0, al); 99 } else 100 schedule_work_on(data->cpu, &data->dm_alert_work); 101 102 /* 103 * Don't need to lock this, since we are guaranteed to only 104 * run this on a single cpu at a time. 105 * Note also that we only update data->skb if the old and new skb 106 * pointers don't match. This ensures that we don't continually call 107 * synchornize_rcu if we repeatedly fail to alloc a new netlink message. 108 */ 109 if (skb != oskb) { 110 rcu_assign_pointer(data->skb, skb); 111 112 synchronize_rcu(); 113 114 atomic_set(&data->dm_hit_count, dm_hit_limit); 115 } 116 117 } 118 119 static void send_dm_alert(struct work_struct *unused) 120 { 121 struct sk_buff *skb; 122 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); 123 124 WARN_ON_ONCE(data->cpu != smp_processor_id()); 125 126 /* 127 * Grab the skb we're about to send 128 */ 129 skb = rcu_dereference_protected(data->skb, 1); 130 131 /* 132 * Replace it with a new one 133 */ 134 reset_per_cpu_data(data); 135 136 /* 137 * Ship it! 138 */ 139 if (skb) 140 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); 141 142 put_cpu_var(dm_cpu_data); 143 } 144 145 /* 146 * This is the timer function to delay the sending of an alert 147 * in the event that more drops will arrive during the 148 * hysteresis period. Note that it operates under the timer interrupt 149 * so we don't need to disable preemption here 150 */ 151 static void sched_send_work(unsigned long unused) 152 { 153 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); 154 155 schedule_work_on(smp_processor_id(), &data->dm_alert_work); 156 157 put_cpu_var(dm_cpu_data); 158 } 159 160 static void trace_drop_common(struct sk_buff *skb, void *location) 161 { 162 struct net_dm_alert_msg *msg; 163 struct nlmsghdr *nlh; 164 struct nlattr *nla; 165 int i; 166 struct sk_buff *dskb; 167 struct per_cpu_dm_data *data = &get_cpu_var(dm_cpu_data); 168 169 170 rcu_read_lock(); 171 dskb = rcu_dereference(data->skb); 172 173 if (!dskb) 174 goto out; 175 176 if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { 177 /* 178 * we're already at zero, discard this hit 179 */ 180 goto out; 181 } 182 183 nlh = (struct nlmsghdr *)dskb->data; 184 nla = genlmsg_data(nlmsg_data(nlh)); 185 msg = nla_data(nla); 186 for (i = 0; i < msg->entries; i++) { 187 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { 188 msg->points[i].count++; 189 atomic_inc(&data->dm_hit_count); 190 goto out; 191 } 192 } 193 194 /* 195 * We need to create a new entry 196 */ 197 __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); 198 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); 199 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); 200 msg->points[msg->entries].count = 1; 201 msg->entries++; 202 203 if (!timer_pending(&data->send_timer)) { 204 data->send_timer.expires = jiffies + dm_delay * HZ; 205 add_timer_on(&data->send_timer, smp_processor_id()); 206 } 207 208 out: 209 rcu_read_unlock(); 210 put_cpu_var(dm_cpu_data); 211 return; 212 } 213 214 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) 215 { 216 trace_drop_common(skb, location); 217 } 218 219 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) 220 { 221 struct dm_hw_stat_delta *new_stat; 222 223 /* 224 * Don't check napi structures with no associated device 225 */ 226 if (!napi->dev) 227 return; 228 229 rcu_read_lock(); 230 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 231 /* 232 * only add a note to our monitor buffer if: 233 * 1) this is the dev we received on 234 * 2) its after the last_rx delta 235 * 3) our rx_dropped count has gone up 236 */ 237 if ((new_stat->dev == napi->dev) && 238 (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && 239 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 240 trace_drop_common(NULL, NULL); 241 new_stat->last_drop_val = napi->dev->stats.rx_dropped; 242 new_stat->last_rx = jiffies; 243 break; 244 } 245 } 246 rcu_read_unlock(); 247 } 248 249 static int set_all_monitor_traces(int state) 250 { 251 int rc = 0; 252 struct dm_hw_stat_delta *new_stat = NULL; 253 struct dm_hw_stat_delta *temp; 254 255 mutex_lock(&trace_state_mutex); 256 257 if (state == trace_state) { 258 rc = -EAGAIN; 259 goto out_unlock; 260 } 261 262 switch (state) { 263 case TRACE_ON: 264 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); 265 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); 266 break; 267 case TRACE_OFF: 268 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); 269 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); 270 271 tracepoint_synchronize_unregister(); 272 273 /* 274 * Clean the device list 275 */ 276 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 277 if (new_stat->dev == NULL) { 278 list_del_rcu(&new_stat->list); 279 kfree_rcu(new_stat, rcu); 280 } 281 } 282 break; 283 default: 284 rc = 1; 285 break; 286 } 287 288 if (!rc) 289 trace_state = state; 290 else 291 rc = -EINPROGRESS; 292 293 out_unlock: 294 mutex_unlock(&trace_state_mutex); 295 296 return rc; 297 } 298 299 300 static int net_dm_cmd_config(struct sk_buff *skb, 301 struct genl_info *info) 302 { 303 return -ENOTSUPP; 304 } 305 306 static int net_dm_cmd_trace(struct sk_buff *skb, 307 struct genl_info *info) 308 { 309 switch (info->genlhdr->cmd) { 310 case NET_DM_CMD_START: 311 return set_all_monitor_traces(TRACE_ON); 312 break; 313 case NET_DM_CMD_STOP: 314 return set_all_monitor_traces(TRACE_OFF); 315 break; 316 } 317 318 return -ENOTSUPP; 319 } 320 321 static int dropmon_net_event(struct notifier_block *ev_block, 322 unsigned long event, void *ptr) 323 { 324 struct net_device *dev = ptr; 325 struct dm_hw_stat_delta *new_stat = NULL; 326 struct dm_hw_stat_delta *tmp; 327 328 switch (event) { 329 case NETDEV_REGISTER: 330 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 331 332 if (!new_stat) 333 goto out; 334 335 new_stat->dev = dev; 336 new_stat->last_rx = jiffies; 337 mutex_lock(&trace_state_mutex); 338 list_add_rcu(&new_stat->list, &hw_stats_list); 339 mutex_unlock(&trace_state_mutex); 340 break; 341 case NETDEV_UNREGISTER: 342 mutex_lock(&trace_state_mutex); 343 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 344 if (new_stat->dev == dev) { 345 new_stat->dev = NULL; 346 if (trace_state == TRACE_OFF) { 347 list_del_rcu(&new_stat->list); 348 kfree_rcu(new_stat, rcu); 349 break; 350 } 351 } 352 } 353 mutex_unlock(&trace_state_mutex); 354 break; 355 } 356 out: 357 return NOTIFY_DONE; 358 } 359 360 static struct genl_ops dropmon_ops[] = { 361 { 362 .cmd = NET_DM_CMD_CONFIG, 363 .doit = net_dm_cmd_config, 364 }, 365 { 366 .cmd = NET_DM_CMD_START, 367 .doit = net_dm_cmd_trace, 368 }, 369 { 370 .cmd = NET_DM_CMD_STOP, 371 .doit = net_dm_cmd_trace, 372 }, 373 }; 374 375 static struct notifier_block dropmon_net_notifier = { 376 .notifier_call = dropmon_net_event 377 }; 378 379 static int __init init_net_drop_monitor(void) 380 { 381 struct per_cpu_dm_data *data; 382 int cpu, rc; 383 384 printk(KERN_INFO "Initializing network drop monitor service\n"); 385 386 if (sizeof(void *) > 8) { 387 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 388 return -ENOSPC; 389 } 390 391 rc = genl_register_family_with_ops(&net_drop_monitor_family, 392 dropmon_ops, 393 ARRAY_SIZE(dropmon_ops)); 394 if (rc) { 395 printk(KERN_ERR "Could not create drop monitor netlink family\n"); 396 return rc; 397 } 398 399 rc = register_netdevice_notifier(&dropmon_net_notifier); 400 if (rc < 0) { 401 printk(KERN_CRIT "Failed to register netdevice notifier\n"); 402 goto out_unreg; 403 } 404 405 rc = 0; 406 407 for_each_present_cpu(cpu) { 408 data = &per_cpu(dm_cpu_data, cpu); 409 data->cpu = cpu; 410 INIT_WORK(&data->dm_alert_work, send_dm_alert); 411 init_timer(&data->send_timer); 412 data->send_timer.data = cpu; 413 data->send_timer.function = sched_send_work; 414 reset_per_cpu_data(data); 415 } 416 417 418 goto out; 419 420 out_unreg: 421 genl_unregister_family(&net_drop_monitor_family); 422 out: 423 return rc; 424 } 425 426 late_initcall(init_net_drop_monitor); 427