1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This Software is licensed under one of the following licenses: 8 * 9 * 1) under the terms of the "Common Public License 1.0" a copy of which is 10 * available from the Open Source Initiative, see 11 * http://www.opensource.org/licenses/cpl.php. 12 * 13 * 2) under the terms of the "The BSD License" a copy of which is 14 * available from the Open Source Initiative, see 15 * http://www.opensource.org/licenses/bsd-license.php. 16 * 17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a 18 * copy of which is available from the Open Source Initiative, see 19 * http://www.opensource.org/licenses/gpl-license.php. 20 * 21 * Licensee has the right to choose one of the above licenses. 22 * 23 * Redistributions of source code must retain the above copyright 24 * notice and one of the license notices. 25 * 26 * Redistributions in binary form must reproduce both the above copyright 27 * notice, one of the license notices in the documentation 28 * and/or other materials provided with the distribution. 29 */ 30 31 #include <linux/mutex.h> 32 #include <linux/inetdevice.h> 33 #include <linux/workqueue.h> 34 #include <linux/if_arp.h> 35 #include <net/arp.h> 36 #include <net/neighbour.h> 37 #include <net/route.h> 38 #include <net/netevent.h> 39 #include <rdma/ib_addr.h> 40 41 MODULE_AUTHOR("Sean Hefty"); 42 MODULE_DESCRIPTION("IB Address Translation"); 43 MODULE_LICENSE("Dual BSD/GPL"); 44 45 struct addr_req { 46 struct list_head list; 47 struct sockaddr src_addr; 48 struct sockaddr dst_addr; 49 struct rdma_dev_addr *addr; 50 struct rdma_addr_client *client; 51 void *context; 52 void (*callback)(int status, struct sockaddr *src_addr, 53 struct rdma_dev_addr *addr, void *context); 54 unsigned long timeout; 55 int status; 56 }; 57 58 static void process_req(struct work_struct *work); 59 60 static DEFINE_MUTEX(lock); 61 static LIST_HEAD(req_list); 62 static DECLARE_DELAYED_WORK(work, process_req); 63 static struct workqueue_struct *addr_wq; 64 65 void rdma_addr_register_client(struct rdma_addr_client *client) 66 { 67 atomic_set(&client->refcount, 1); 68 init_completion(&client->comp); 69 } 70 EXPORT_SYMBOL(rdma_addr_register_client); 71 72 static inline void put_client(struct rdma_addr_client *client) 73 { 74 if (atomic_dec_and_test(&client->refcount)) 75 complete(&client->comp); 76 } 77 78 void rdma_addr_unregister_client(struct rdma_addr_client *client) 79 { 80 put_client(client); 81 wait_for_completion(&client->comp); 82 } 83 EXPORT_SYMBOL(rdma_addr_unregister_client); 84 85 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 86 const unsigned char *dst_dev_addr) 87 { 88 switch (dev->type) { 89 case ARPHRD_INFINIBAND: 90 dev_addr->dev_type = RDMA_NODE_IB_CA; 91 break; 92 case ARPHRD_ETHER: 93 dev_addr->dev_type = RDMA_NODE_RNIC; 94 break; 95 default: 96 return -EADDRNOTAVAIL; 97 } 98 99 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 100 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 101 if (dst_dev_addr) 102 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 103 return 0; 104 } 105 EXPORT_SYMBOL(rdma_copy_addr); 106 107 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 108 { 109 struct net_device *dev; 110 __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 111 int ret; 112 113 dev = ip_dev_find(ip); 114 if (!dev) 115 return -EADDRNOTAVAIL; 116 117 ret = rdma_copy_addr(dev_addr, dev, NULL); 118 dev_put(dev); 119 return ret; 120 } 121 EXPORT_SYMBOL(rdma_translate_ip); 122 123 static void set_timeout(unsigned long time) 124 { 125 unsigned long delay; 126 127 cancel_delayed_work(&work); 128 129 delay = time - jiffies; 130 if ((long)delay <= 0) 131 delay = 1; 132 133 queue_delayed_work(addr_wq, &work, delay); 134 } 135 136 static void queue_req(struct addr_req *req) 137 { 138 struct addr_req *temp_req; 139 140 mutex_lock(&lock); 141 list_for_each_entry_reverse(temp_req, &req_list, list) { 142 if (time_after_eq(req->timeout, temp_req->timeout)) 143 break; 144 } 145 146 list_add(&req->list, &temp_req->list); 147 148 if (req_list.next == &req->list) 149 set_timeout(req->timeout); 150 mutex_unlock(&lock); 151 } 152 153 static void addr_send_arp(struct sockaddr_in *dst_in) 154 { 155 struct rtable *rt; 156 struct flowi fl; 157 u32 dst_ip = dst_in->sin_addr.s_addr; 158 159 memset(&fl, 0, sizeof fl); 160 fl.nl_u.ip4_u.daddr = dst_ip; 161 if (ip_route_output_key(&rt, &fl)) 162 return; 163 164 arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev, 165 rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL); 166 ip_rt_put(rt); 167 } 168 169 static int addr_resolve_remote(struct sockaddr_in *src_in, 170 struct sockaddr_in *dst_in, 171 struct rdma_dev_addr *addr) 172 { 173 u32 src_ip = src_in->sin_addr.s_addr; 174 u32 dst_ip = dst_in->sin_addr.s_addr; 175 struct flowi fl; 176 struct rtable *rt; 177 struct neighbour *neigh; 178 int ret; 179 180 memset(&fl, 0, sizeof fl); 181 fl.nl_u.ip4_u.daddr = dst_ip; 182 fl.nl_u.ip4_u.saddr = src_ip; 183 ret = ip_route_output_key(&rt, &fl); 184 if (ret) 185 goto out; 186 187 /* If the device does ARP internally, return 'done' */ 188 if (rt->idev->dev->flags & IFF_NOARP) { 189 rdma_copy_addr(addr, rt->idev->dev, NULL); 190 goto put; 191 } 192 193 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 194 if (!neigh) { 195 ret = -ENODATA; 196 goto put; 197 } 198 199 if (!(neigh->nud_state & NUD_VALID)) { 200 ret = -ENODATA; 201 goto release; 202 } 203 204 if (!src_ip) { 205 src_in->sin_family = dst_in->sin_family; 206 src_in->sin_addr.s_addr = rt->rt_src; 207 } 208 209 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 210 release: 211 neigh_release(neigh); 212 put: 213 ip_rt_put(rt); 214 out: 215 return ret; 216 } 217 218 static void process_req(struct work_struct *work) 219 { 220 struct addr_req *req, *temp_req; 221 struct sockaddr_in *src_in, *dst_in; 222 struct list_head done_list; 223 224 INIT_LIST_HEAD(&done_list); 225 226 mutex_lock(&lock); 227 list_for_each_entry_safe(req, temp_req, &req_list, list) { 228 if (req->status == -ENODATA) { 229 src_in = (struct sockaddr_in *) &req->src_addr; 230 dst_in = (struct sockaddr_in *) &req->dst_addr; 231 req->status = addr_resolve_remote(src_in, dst_in, 232 req->addr); 233 if (req->status && time_after_eq(jiffies, req->timeout)) 234 req->status = -ETIMEDOUT; 235 else if (req->status == -ENODATA) 236 continue; 237 } 238 list_move_tail(&req->list, &done_list); 239 } 240 241 if (!list_empty(&req_list)) { 242 req = list_entry(req_list.next, struct addr_req, list); 243 set_timeout(req->timeout); 244 } 245 mutex_unlock(&lock); 246 247 list_for_each_entry_safe(req, temp_req, &done_list, list) { 248 list_del(&req->list); 249 req->callback(req->status, &req->src_addr, req->addr, 250 req->context); 251 put_client(req->client); 252 kfree(req); 253 } 254 } 255 256 static int addr_resolve_local(struct sockaddr_in *src_in, 257 struct sockaddr_in *dst_in, 258 struct rdma_dev_addr *addr) 259 { 260 struct net_device *dev; 261 u32 src_ip = src_in->sin_addr.s_addr; 262 __be32 dst_ip = dst_in->sin_addr.s_addr; 263 int ret; 264 265 dev = ip_dev_find(dst_ip); 266 if (!dev) 267 return -EADDRNOTAVAIL; 268 269 if (ZERONET(src_ip)) { 270 src_in->sin_family = dst_in->sin_family; 271 src_in->sin_addr.s_addr = dst_ip; 272 ret = rdma_copy_addr(addr, dev, dev->dev_addr); 273 } else if (LOOPBACK(src_ip)) { 274 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 275 if (!ret) 276 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 277 } else { 278 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 279 if (!ret) 280 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 281 } 282 283 dev_put(dev); 284 return ret; 285 } 286 287 int rdma_resolve_ip(struct rdma_addr_client *client, 288 struct sockaddr *src_addr, struct sockaddr *dst_addr, 289 struct rdma_dev_addr *addr, int timeout_ms, 290 void (*callback)(int status, struct sockaddr *src_addr, 291 struct rdma_dev_addr *addr, void *context), 292 void *context) 293 { 294 struct sockaddr_in *src_in, *dst_in; 295 struct addr_req *req; 296 int ret = 0; 297 298 req = kmalloc(sizeof *req, GFP_KERNEL); 299 if (!req) 300 return -ENOMEM; 301 memset(req, 0, sizeof *req); 302 303 if (src_addr) 304 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 305 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 306 req->addr = addr; 307 req->callback = callback; 308 req->context = context; 309 req->client = client; 310 atomic_inc(&client->refcount); 311 312 src_in = (struct sockaddr_in *) &req->src_addr; 313 dst_in = (struct sockaddr_in *) &req->dst_addr; 314 315 req->status = addr_resolve_local(src_in, dst_in, addr); 316 if (req->status == -EADDRNOTAVAIL) 317 req->status = addr_resolve_remote(src_in, dst_in, addr); 318 319 switch (req->status) { 320 case 0: 321 req->timeout = jiffies; 322 queue_req(req); 323 break; 324 case -ENODATA: 325 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 326 queue_req(req); 327 addr_send_arp(dst_in); 328 break; 329 default: 330 ret = req->status; 331 atomic_dec(&client->refcount); 332 kfree(req); 333 break; 334 } 335 return ret; 336 } 337 EXPORT_SYMBOL(rdma_resolve_ip); 338 339 void rdma_addr_cancel(struct rdma_dev_addr *addr) 340 { 341 struct addr_req *req, *temp_req; 342 343 mutex_lock(&lock); 344 list_for_each_entry_safe(req, temp_req, &req_list, list) { 345 if (req->addr == addr) { 346 req->status = -ECANCELED; 347 req->timeout = jiffies; 348 list_move(&req->list, &req_list); 349 set_timeout(req->timeout); 350 break; 351 } 352 } 353 mutex_unlock(&lock); 354 } 355 EXPORT_SYMBOL(rdma_addr_cancel); 356 357 static int netevent_callback(struct notifier_block *self, unsigned long event, 358 void *ctx) 359 { 360 if (event == NETEVENT_NEIGH_UPDATE) { 361 struct neighbour *neigh = ctx; 362 363 if (neigh->nud_state & NUD_VALID) { 364 set_timeout(jiffies); 365 } 366 } 367 return 0; 368 } 369 370 static struct notifier_block nb = { 371 .notifier_call = netevent_callback 372 }; 373 374 static int addr_init(void) 375 { 376 addr_wq = create_singlethread_workqueue("ib_addr"); 377 if (!addr_wq) 378 return -ENOMEM; 379 380 register_netevent_notifier(&nb); 381 return 0; 382 } 383 384 static void addr_cleanup(void) 385 { 386 unregister_netevent_notifier(&nb); 387 destroy_workqueue(addr_wq); 388 } 389 390 module_init(addr_init); 391 module_exit(addr_cleanup); 392