1 /* 2 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #include <linux/mutex.h> 37 #include <linux/inetdevice.h> 38 #include <linux/workqueue.h> 39 #include <linux/if_arp.h> 40 #include <net/arp.h> 41 #include <net/neighbour.h> 42 #include <net/route.h> 43 #include <net/netevent.h> 44 #include <rdma/ib_addr.h> 45 46 MODULE_AUTHOR("Sean Hefty"); 47 MODULE_DESCRIPTION("IB Address Translation"); 48 MODULE_LICENSE("Dual BSD/GPL"); 49 50 struct addr_req { 51 struct list_head list; 52 struct sockaddr src_addr; 53 struct sockaddr dst_addr; 54 struct rdma_dev_addr *addr; 55 struct rdma_addr_client *client; 56 void *context; 57 void (*callback)(int status, struct sockaddr *src_addr, 58 struct rdma_dev_addr *addr, void *context); 59 unsigned long timeout; 60 int status; 61 }; 62 63 static void process_req(struct work_struct *work); 64 65 static DEFINE_MUTEX(lock); 66 static LIST_HEAD(req_list); 67 static DECLARE_DELAYED_WORK(work, process_req); 68 static struct workqueue_struct *addr_wq; 69 70 void rdma_addr_register_client(struct rdma_addr_client *client) 71 { 72 atomic_set(&client->refcount, 1); 73 init_completion(&client->comp); 74 } 75 EXPORT_SYMBOL(rdma_addr_register_client); 76 77 static inline void put_client(struct rdma_addr_client *client) 78 { 79 if (atomic_dec_and_test(&client->refcount)) 80 complete(&client->comp); 81 } 82 83 void rdma_addr_unregister_client(struct rdma_addr_client *client) 84 { 85 put_client(client); 86 wait_for_completion(&client->comp); 87 } 88 EXPORT_SYMBOL(rdma_addr_unregister_client); 89 90 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 91 const unsigned char *dst_dev_addr) 92 { 93 switch (dev->type) { 94 case ARPHRD_INFINIBAND: 95 dev_addr->dev_type = RDMA_NODE_IB_CA; 96 break; 97 case ARPHRD_ETHER: 98 dev_addr->dev_type = RDMA_NODE_RNIC; 99 break; 100 default: 101 return -EADDRNOTAVAIL; 102 } 103 104 memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 105 memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); 106 if (dst_dev_addr) 107 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); 108 dev_addr->src_dev = dev; 109 return 0; 110 } 111 EXPORT_SYMBOL(rdma_copy_addr); 112 113 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 114 { 115 struct net_device *dev; 116 __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 117 int ret; 118 119 dev = ip_dev_find(&init_net, ip); 120 if (!dev) 121 return -EADDRNOTAVAIL; 122 123 ret = rdma_copy_addr(dev_addr, dev, NULL); 124 dev_put(dev); 125 return ret; 126 } 127 EXPORT_SYMBOL(rdma_translate_ip); 128 129 static void set_timeout(unsigned long time) 130 { 131 unsigned long delay; 132 133 cancel_delayed_work(&work); 134 135 delay = time - jiffies; 136 if ((long)delay <= 0) 137 delay = 1; 138 139 queue_delayed_work(addr_wq, &work, delay); 140 } 141 142 static void queue_req(struct addr_req *req) 143 { 144 struct addr_req *temp_req; 145 146 mutex_lock(&lock); 147 list_for_each_entry_reverse(temp_req, &req_list, list) { 148 if (time_after_eq(req->timeout, temp_req->timeout)) 149 break; 150 } 151 152 list_add(&req->list, &temp_req->list); 153 154 if (req_list.next == &req->list) 155 set_timeout(req->timeout); 156 mutex_unlock(&lock); 157 } 158 159 static void addr_send_arp(struct sockaddr_in *dst_in) 160 { 161 struct rtable *rt; 162 struct flowi fl; 163 __be32 dst_ip = dst_in->sin_addr.s_addr; 164 165 memset(&fl, 0, sizeof fl); 166 fl.nl_u.ip4_u.daddr = dst_ip; 167 if (ip_route_output_key(&init_net, &rt, &fl)) 168 return; 169 170 neigh_event_send(rt->u.dst.neighbour, NULL); 171 ip_rt_put(rt); 172 } 173 174 static int addr_resolve_remote(struct sockaddr_in *src_in, 175 struct sockaddr_in *dst_in, 176 struct rdma_dev_addr *addr) 177 { 178 __be32 src_ip = src_in->sin_addr.s_addr; 179 __be32 dst_ip = dst_in->sin_addr.s_addr; 180 struct flowi fl; 181 struct rtable *rt; 182 struct neighbour *neigh; 183 int ret; 184 185 memset(&fl, 0, sizeof fl); 186 fl.nl_u.ip4_u.daddr = dst_ip; 187 fl.nl_u.ip4_u.saddr = src_ip; 188 ret = ip_route_output_key(&init_net, &rt, &fl); 189 if (ret) 190 goto out; 191 192 /* If the device does ARP internally, return 'done' */ 193 if (rt->idev->dev->flags & IFF_NOARP) { 194 rdma_copy_addr(addr, rt->idev->dev, NULL); 195 goto put; 196 } 197 198 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); 199 if (!neigh) { 200 ret = -ENODATA; 201 goto put; 202 } 203 204 if (!(neigh->nud_state & NUD_VALID)) { 205 ret = -ENODATA; 206 goto release; 207 } 208 209 if (!src_ip) { 210 src_in->sin_family = dst_in->sin_family; 211 src_in->sin_addr.s_addr = rt->rt_src; 212 } 213 214 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); 215 release: 216 neigh_release(neigh); 217 put: 218 ip_rt_put(rt); 219 out: 220 return ret; 221 } 222 223 static void process_req(struct work_struct *work) 224 { 225 struct addr_req *req, *temp_req; 226 struct sockaddr_in *src_in, *dst_in; 227 struct list_head done_list; 228 229 INIT_LIST_HEAD(&done_list); 230 231 mutex_lock(&lock); 232 list_for_each_entry_safe(req, temp_req, &req_list, list) { 233 if (req->status == -ENODATA) { 234 src_in = (struct sockaddr_in *) &req->src_addr; 235 dst_in = (struct sockaddr_in *) &req->dst_addr; 236 req->status = addr_resolve_remote(src_in, dst_in, 237 req->addr); 238 if (req->status && time_after_eq(jiffies, req->timeout)) 239 req->status = -ETIMEDOUT; 240 else if (req->status == -ENODATA) 241 continue; 242 } 243 list_move_tail(&req->list, &done_list); 244 } 245 246 if (!list_empty(&req_list)) { 247 req = list_entry(req_list.next, struct addr_req, list); 248 set_timeout(req->timeout); 249 } 250 mutex_unlock(&lock); 251 252 list_for_each_entry_safe(req, temp_req, &done_list, list) { 253 list_del(&req->list); 254 req->callback(req->status, &req->src_addr, req->addr, 255 req->context); 256 put_client(req->client); 257 kfree(req); 258 } 259 } 260 261 static int addr_resolve_local(struct sockaddr_in *src_in, 262 struct sockaddr_in *dst_in, 263 struct rdma_dev_addr *addr) 264 { 265 struct net_device *dev; 266 __be32 src_ip = src_in->sin_addr.s_addr; 267 __be32 dst_ip = dst_in->sin_addr.s_addr; 268 int ret; 269 270 dev = ip_dev_find(&init_net, dst_ip); 271 if (!dev) 272 return -EADDRNOTAVAIL; 273 274 if (ipv4_is_zeronet(src_ip)) { 275 src_in->sin_family = dst_in->sin_family; 276 src_in->sin_addr.s_addr = dst_ip; 277 ret = rdma_copy_addr(addr, dev, dev->dev_addr); 278 } else if (ipv4_is_loopback(src_ip)) { 279 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); 280 if (!ret) 281 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 282 } else { 283 ret = rdma_translate_ip((struct sockaddr *)src_in, addr); 284 if (!ret) 285 memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); 286 } 287 288 dev_put(dev); 289 return ret; 290 } 291 292 int rdma_resolve_ip(struct rdma_addr_client *client, 293 struct sockaddr *src_addr, struct sockaddr *dst_addr, 294 struct rdma_dev_addr *addr, int timeout_ms, 295 void (*callback)(int status, struct sockaddr *src_addr, 296 struct rdma_dev_addr *addr, void *context), 297 void *context) 298 { 299 struct sockaddr_in *src_in, *dst_in; 300 struct addr_req *req; 301 int ret = 0; 302 303 req = kzalloc(sizeof *req, GFP_KERNEL); 304 if (!req) 305 return -ENOMEM; 306 307 if (src_addr) 308 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); 309 memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); 310 req->addr = addr; 311 req->callback = callback; 312 req->context = context; 313 req->client = client; 314 atomic_inc(&client->refcount); 315 316 src_in = (struct sockaddr_in *) &req->src_addr; 317 dst_in = (struct sockaddr_in *) &req->dst_addr; 318 319 req->status = addr_resolve_local(src_in, dst_in, addr); 320 if (req->status == -EADDRNOTAVAIL) 321 req->status = addr_resolve_remote(src_in, dst_in, addr); 322 323 switch (req->status) { 324 case 0: 325 req->timeout = jiffies; 326 queue_req(req); 327 break; 328 case -ENODATA: 329 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; 330 queue_req(req); 331 addr_send_arp(dst_in); 332 break; 333 default: 334 ret = req->status; 335 atomic_dec(&client->refcount); 336 kfree(req); 337 break; 338 } 339 return ret; 340 } 341 EXPORT_SYMBOL(rdma_resolve_ip); 342 343 void rdma_addr_cancel(struct rdma_dev_addr *addr) 344 { 345 struct addr_req *req, *temp_req; 346 347 mutex_lock(&lock); 348 list_for_each_entry_safe(req, temp_req, &req_list, list) { 349 if (req->addr == addr) { 350 req->status = -ECANCELED; 351 req->timeout = jiffies; 352 list_move(&req->list, &req_list); 353 set_timeout(req->timeout); 354 break; 355 } 356 } 357 mutex_unlock(&lock); 358 } 359 EXPORT_SYMBOL(rdma_addr_cancel); 360 361 static int netevent_callback(struct notifier_block *self, unsigned long event, 362 void *ctx) 363 { 364 if (event == NETEVENT_NEIGH_UPDATE) { 365 struct neighbour *neigh = ctx; 366 367 if (neigh->nud_state & NUD_VALID) { 368 set_timeout(jiffies); 369 } 370 } 371 return 0; 372 } 373 374 static struct notifier_block nb = { 375 .notifier_call = netevent_callback 376 }; 377 378 static int addr_init(void) 379 { 380 addr_wq = create_singlethread_workqueue("ib_addr"); 381 if (!addr_wq) 382 return -ENOMEM; 383 384 register_netevent_notifier(&nb); 385 return 0; 386 } 387 388 static void addr_cleanup(void) 389 { 390 unregister_netevent_notifier(&nb); 391 destroy_workqueue(addr_wq); 392 } 393 394 module_init(addr_init); 395 module_exit(addr_cleanup); 396