1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This Software is licensed under one of the following licenses: 8 * 9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 10 * available from the Open Source Initiative, see 11 * http://www.opensource.org/licenses/cpl.php. 12 * 13 * 2) under the terms of the "The BSD License" a copy of which is 14 * available from the Open Source Initiative, see 15 * http://www.opensource.org/licenses/bsd-license.php. 16 * 17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 18 * copy of which is available from the Open Source Initiative, see 19 * http://www.opensource.org/licenses/gpl-license.php. 20 * 21 * Licensee has the right to choose one of the above licenses. 22 * 23 * Redistributions of source code must retain the above copyright 24 * notice and one of the license notices. 25 * 26 * Redistributions in binary form must reproduce both the above copyright 27 * notice, one of the license notices in the documentation 28 * and/or other materials provided with the distribution. 29 */ 30 31 #include <linux/mutex.h> 32 #include <linux/inetdevice.h> 33 #include <linux/workqueue.h> 34 #include <linux/if_arp.h> 35 #include <net/arp.h> 36 #include <net/neighbour.h> 37 #include <net/route.h> 38 #include <net/netevent.h> 39 #include <rdma/ib_addr.h> 40 41 MODULE_AUTHOR("Sean Hefty"); 42 MODULE_DESCRIPTION("IB Address Translation"); 43 MODULE_LICENSE("Dual BSD/GPL"); 44 45 struct addr_req { 46 struct list_head list; 47 struct sockaddr src_addr; 48 struct sockaddr dst_addr; 49 struct rdma_dev_addr *addr; 50 struct rdma_addr_client *client; 51 void *context; 52 void (*callback)(int status, struct sockaddr *src_addr, 53 struct rdma_dev_addr *addr, void *context); 54 unsigned long timeout; 55 int status; 56 }; 57 58 static void process_req(struct work_struct *work); 59 60 static DEFINE_MUTEX(lock); 61 static LIST_HEAD(req_list); 62 static DECLARE_DELAYED_WORK(work, process_req); 63 static struct workqueue_struct *addr_wq; 64 65 void rdma_addr_register_client(struct rdma_addr_client *client) 66 { 67 atomic_set(&client->refcount, 1); 68 init_completion(&client->comp); 69 } 70 EXPORT_SYMBOL(rdma_addr_register_client); 71 72 static inline void put_client(struct rdma_addr_client *client) 73 { 74 if (atomic_dec_and_test(&client->refcount)) 75 complete(&client->comp); 76 } 77 78 void rdma_addr_unregister_client(struct rdma_addr_client *client) 79 { 80 put_client(client); 81 wait_for_completion(&client->comp); 82 } 83 EXPORT_SYMBOL(rdma_addr_unregister_client); 84 85 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 86 const unsigned char *dst_dev_addr) 87 { 88 switch (dev->type) { 89 case ARPHRD_INFINIBAND: 90 dev_addr->dev_type = RDMA_NODE_IB_CA; 91 break; 92 case ARPHRD_ETHER: 93 dev_addr->dev_type = RDMA_NODE_RNIC; 94 break; 95 default: 96 return -EADDRNOTAVAIL; 97 } 98 99 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 100 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 101 if (dst_dev_addr) 102 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 103 return 0; 104 } 105 EXPORT_SYMBOL(rdma_copy_addr); 106 107 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 108 { 109 struct net_device *dev; 110 __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 111 int ret; 112 113 dev = ip_dev_find(&init_net, ip); 114 if (!dev) 115 return -EADDRNOTAVAIL; 116 117 ret = rdma_copy_addr(dev_addr, dev, NULL); 118 dev_put(dev); 119 return ret; 120 } 121 EXPORT_SYMBOL(rdma_translate_ip); 122 123 static void set_timeout(unsigned long time) 124 { 125 unsigned long delay; 126 127 cancel_delayed_work(&work); 128 129 delay = time - jiffies; 130 if ((long)delay <= 0) 131 delay = 1; 132 133 queue_delayed_work(addr_wq, &work, delay); 134 } 135 136 static void queue_req(struct addr_req *req) 137 { 138 struct addr_req *temp_req; 139 140 mutex_lock(&lock); 141 list_for_each_entry_reverse(temp_req, &req_list, list) { 142 if (time_after_eq(req->timeout, temp_req->timeout)) 143 break; 144 } 145 146 list_add(&req->list, &temp_req->list); 147 148 if (req_list.next == &req->list) 149 set_timeout(req->timeout); 150 mutex_unlock(&lock); 151 } 152 153 static void addr_send_arp(struct sockaddr_in *dst_in) 154 { 155 struct rtable *rt; 156 struct flowi fl; 157 u32 dst_ip = dst_in->sin_addr.s_addr; 158 159 memset(&fl, 0, sizeof fl); 160 fl.nl_u.ip4_u.daddr = dst_ip; 161 if (ip_route_output_key(&init_net, &rt, &fl)) 162 return; 163 164 neigh_event_send(rt->u.dst.neighbour, NULL); 165 ip_rt_put(rt); 166 } 167 168 static int addr_resolve_remote(struct sockaddr_in *src_in, 169 struct sockaddr_in *dst_in, 170 struct rdma_dev_addr *addr) 171 { 172 u32 src_ip = src_in->sin_addr.s_addr; 173 u32 dst_ip = dst_in->sin_addr.s_addr; 174 struct flowi fl; 175 struct rtable *rt; 176 struct neighbour *neigh; 177 int ret; 178 179 memset(&fl, 0, sizeof fl); 180 fl.nl_u.ip4_u.daddr = dst_ip; 181 fl.nl_u.ip4_u.saddr = src_ip; 182 ret = ip_route_output_key(&init_net, &rt, &fl); 183 if (ret) 184 goto out; 185 186 /* If the device does ARP internally, return 'done' */ 187 if (rt->idev->dev->flags & IFF_NOARP) { 188 rdma_copy_addr(addr, rt->idev->dev, NULL); 189 goto put; 190 } 191 192 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 193 if (!neigh) { 194 ret = -ENODATA; 195 goto put; 196 } 197 198 if (!(neigh->nud_state & NUD_VALID)) { 199 ret = -ENODATA; 200 goto release; 201 } 202 203 if (!src_ip) { 204 src_in->sin_family = dst_in->sin_family; 205 src_in->sin_addr.s_addr = rt->rt_src; 206 } 207 208 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 209 release: 210 neigh_release(neigh); 211 put: 212 ip_rt_put(rt); 213 out: 214 return ret; 215 } 216 217 static void process_req(struct work_struct *work) 218 { 219 struct addr_req *req, *temp_req; 220 struct sockaddr_in *src_in, *dst_in; 221 struct list_head done_list; 222 223 INIT_LIST_HEAD(&done_list); 224 225 mutex_lock(&lock); 226 list_for_each_entry_safe(req, temp_req, &req_list, list) { 227 if (req->status == -ENODATA) { 228 src_in = (struct sockaddr_in *) &req->src_addr; 229 dst_in = (struct sockaddr_in *) &req->dst_addr; 230 req->status = addr_resolve_remote(src_in, dst_in, 231 req->addr); 232 if (req->status && time_after_eq(jiffies, req->timeout)) 233 req->status = -ETIMEDOUT; 234 else if (req->status == -ENODATA) 235 continue; 236 } 237 list_move_tail(&req->list, &done_list); 238 } 239 240 if (!list_empty(&req_list)) { 241 req = list_entry(req_list.next, struct addr_req, list); 242 set_timeout(req->timeout); 243 } 244 mutex_unlock(&lock); 245 246 list_for_each_entry_safe(req, temp_req, &done_list, list) { 247 list_del(&req->list); 248 req->callback(req->status, &req->src_addr, req->addr, 249 req->context); 250 put_client(req->client); 251 kfree(req); 252 } 253 } 254 255 static int addr_resolve_local(struct sockaddr_in *src_in, 256 struct sockaddr_in *dst_in, 257 struct rdma_dev_addr *addr) 258 { 259 struct net_device *dev; 260 u32 src_ip = src_in->sin_addr.s_addr; 261 __be32 dst_ip = dst_in->sin_addr.s_addr; 262 int ret; 263 264 dev = ip_dev_find(&init_net, dst_ip); 265 if (!dev) 266 return -EADDRNOTAVAIL; 267 268 if (ipv4_is_zeronet(src_ip)) { 269 src_in->sin_family = dst_in->sin_family; 270 src_in->sin_addr.s_addr = dst_ip; 271 ret = rdma_copy_addr(addr, dev, dev->dev_addr); 272 } else if (ipv4_is_loopback(src_ip)) { 273 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 274 if (!ret) 275 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 276 } else { 277 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 278 if (!ret) 279 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 280 } 281 282 dev_put(dev); 283 return ret; 284 } 285 286 int rdma_resolve_ip(struct rdma_addr_client *client, 287 struct sockaddr *src_addr, struct sockaddr *dst_addr, 288 struct rdma_dev_addr *addr, int timeout_ms, 289 void (*callback)(int status, struct sockaddr *src_addr, 290 struct rdma_dev_addr *addr, void *context), 291 void *context) 292 { 293 struct sockaddr_in *src_in, *dst_in; 294 struct addr_req *req; 295 int ret = 0; 296 297 req = kzalloc(sizeof *req, GFP_KERNEL); 298 if (!req) 299 return -ENOMEM; 300 301 if (src_addr) 302 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 303 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 304 req->addr = addr; 305 req->callback = callback; 306 req->context = context; 307 req->client = client; 308 atomic_inc(&client->refcount); 309 310 src_in = (struct sockaddr_in *) &req->src_addr; 311 dst_in = (struct sockaddr_in *) &req->dst_addr; 312 313 req->status = addr_resolve_local(src_in, dst_in, addr); 314 if (req->status == -EADDRNOTAVAIL) 315 req->status = addr_resolve_remote(src_in, dst_in, addr); 316 317 switch (req->status) { 318 case 0: 319 req->timeout = jiffies; 320 queue_req(req); 321 break; 322 case -ENODATA: 323 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 324 queue_req(req); 325 addr_send_arp(dst_in); 326 break; 327 default: 328 ret = req->status; 329 atomic_dec(&client->refcount); 330 kfree(req); 331 break; 332 } 333 return ret; 334 } 335 EXPORT_SYMBOL(rdma_resolve_ip); 336 337 void rdma_addr_cancel(struct rdma_dev_addr *addr) 338 { 339 struct addr_req *req, *temp_req; 340 341 mutex_lock(&lock); 342 list_for_each_entry_safe(req, temp_req, &req_list, list) { 343 if (req->addr == addr) { 344 req->status = -ECANCELED; 345 req->timeout = jiffies; 346 list_move(&req->list, &req_list); 347 set_timeout(req->timeout); 348 break; 349 } 350 } 351 mutex_unlock(&lock); 352 } 353 EXPORT_SYMBOL(rdma_addr_cancel); 354 355 static int netevent_callback(struct notifier_block *self, unsigned long event, 356 void *ctx) 357 { 358 if (event == NETEVENT_NEIGH_UPDATE) { 359 struct neighbour *neigh = ctx; 360 361 if (neigh->nud_state & NUD_VALID) { 362 set_timeout(jiffies); 363 } 364 } 365 return 0; 366 } 367 368 static struct notifier_block nb = { 369 .notifier_call = netevent_callback 370 }; 371 372 static int addr_init(void) 373 { 374 addr_wq = create_singlethread_workqueue("ib_addr"); 375 if (!addr_wq) 376 return -ENOMEM; 377 378 register_netevent_notifier(&nb); 379 return 0; 380 } 381 382 static void addr_cleanup(void) 383 { 384 unregister_netevent_notifier(&nb); 385 destroy_workqueue(addr_wq); 386 } 387 388 module_init(addr_init); 389 module_exit(addr_cleanup); 390