1 /* 2 * Copyright (c) 2006 Oracle. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/percpu.h> 34 #include <linux/seq_file.h> 35 #include <linux/slab.h> 36 #include <linux/proc_fs.h> 37 #include <linux/export.h> 38 39 #include "rds.h" 40 41 /* 42 * This file implements a getsockopt() call which copies a set of fixed 43 * sized structs into a user-specified buffer as a means of providing 44 * read-only information about RDS. 45 * 46 * For a given information source there are a given number of fixed sized 47 * structs at a given time. The structs are only copied if the user-specified 48 * buffer is big enough. The destination pages that make up the buffer 49 * are pinned for the duration of the copy. 50 * 51 * This gives us the following benefits: 52 * 53 * - simple implementation, no copy "position" across multiple calls 54 * - consistent snapshot of an info source 55 * - atomic copy works well with whatever locking info source has 56 * - one portable tool to get rds info across implementations 57 * - long-lived tool can get info without allocating 58 * 59 * at the following costs: 60 * 61 * - info source copy must be pinned, may be "large" 62 */ 63 64 struct rds_info_iterator { 65 struct page **pages; 66 void *addr; 67 unsigned long offset; 68 }; 69 70 static DEFINE_SPINLOCK(rds_info_lock); 71 static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; 72 73 void rds_info_register_func(int optname, rds_info_func func) 74 { 75 int offset = optname - RDS_INFO_FIRST; 76 77 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 78 79 spin_lock(&rds_info_lock); 80 BUG_ON(rds_info_funcs[offset]); 81 rds_info_funcs[offset] = func; 82 spin_unlock(&rds_info_lock); 83 } 84 EXPORT_SYMBOL_GPL(rds_info_register_func); 85 86 void rds_info_deregister_func(int optname, rds_info_func func) 87 { 88 int offset = optname - RDS_INFO_FIRST; 89 90 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 91 92 spin_lock(&rds_info_lock); 93 BUG_ON(rds_info_funcs[offset] != func); 94 rds_info_funcs[offset] = NULL; 95 spin_unlock(&rds_info_lock); 96 } 97 EXPORT_SYMBOL_GPL(rds_info_deregister_func); 98 99 /* 100 * Typically we hold an atomic kmap across multiple rds_info_copy() calls 101 * because the kmap is so expensive. This must be called before using blocking 102 * operations while holding the mapping and as the iterator is torn down. 103 */ 104 void rds_info_iter_unmap(struct rds_info_iterator *iter) 105 { 106 if (iter->addr) { 107 kunmap_atomic(iter->addr); 108 iter->addr = NULL; 109 } 110 } 111 112 /* 113 * get_user_pages() called flush_dcache_page() on the pages for us. 114 */ 115 void rds_info_copy(struct rds_info_iterator *iter, void *data, 116 unsigned long bytes) 117 { 118 unsigned long this; 119 120 while (bytes) { 121 if (!iter->addr) 122 iter->addr = kmap_atomic(*iter->pages); 123 124 this = min(bytes, PAGE_SIZE - iter->offset); 125 126 rdsdebug("page %p addr %p offset %lu this %lu data %p " 127 "bytes %lu\n", *iter->pages, iter->addr, 128 iter->offset, this, data, bytes); 129 130 memcpy(iter->addr + iter->offset, data, this); 131 132 data += this; 133 bytes -= this; 134 iter->offset += this; 135 136 if (iter->offset == PAGE_SIZE) { 137 kunmap_atomic(iter->addr); 138 iter->addr = NULL; 139 iter->offset = 0; 140 iter->pages++; 141 } 142 } 143 } 144 EXPORT_SYMBOL_GPL(rds_info_copy); 145 146 /* 147 * @optval points to the userspace buffer that the information snapshot 148 * will be copied into. 149 * 150 * @optlen on input is the size of the buffer in userspace. @optlen 151 * on output is the size of the requested snapshot in bytes. 152 * 153 * This function returns -errno if there is a failure, particularly -ENOSPC 154 * if the given userspace buffer was not large enough to fit the snapshot. 155 * On success it returns the positive number of bytes of each array element 156 * in the snapshot. 157 */ 158 int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, 159 int __user *optlen) 160 { 161 struct rds_info_iterator iter; 162 struct rds_info_lengths lens; 163 unsigned long nr_pages = 0; 164 unsigned long start; 165 unsigned long i; 166 rds_info_func func; 167 struct page **pages = NULL; 168 int ret; 169 int len; 170 int total; 171 172 if (get_user(len, optlen)) { 173 ret = -EFAULT; 174 goto out; 175 } 176 177 /* check for all kinds of wrapping and the like */ 178 start = (unsigned long)optval; 179 if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { 180 ret = -EINVAL; 181 goto out; 182 } 183 184 /* a 0 len call is just trying to probe its length */ 185 if (len == 0) 186 goto call_func; 187 188 nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) 189 >> PAGE_SHIFT; 190 191 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 192 if (!pages) { 193 ret = -ENOMEM; 194 goto out; 195 } 196 ret = get_user_pages_fast(start, nr_pages, 1, pages); 197 if (ret != nr_pages) { 198 if (ret > 0) 199 nr_pages = ret; 200 else 201 nr_pages = 0; 202 ret = -EAGAIN; /* XXX ? */ 203 goto out; 204 } 205 206 rdsdebug("len %d nr_pages %lu\n", len, nr_pages); 207 208 call_func: 209 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 210 if (!func) { 211 ret = -ENOPROTOOPT; 212 goto out; 213 } 214 215 iter.pages = pages; 216 iter.addr = NULL; 217 iter.offset = start & (PAGE_SIZE - 1); 218 219 func(sock, len, &iter, &lens); 220 BUG_ON(lens.each == 0); 221 222 total = lens.nr * lens.each; 223 224 rds_info_iter_unmap(&iter); 225 226 if (total > len) { 227 len = total; 228 ret = -ENOSPC; 229 } else { 230 len = total; 231 ret = lens.each; 232 } 233 234 if (put_user(len, optlen)) 235 ret = -EFAULT; 236 237 out: 238 for (i = 0; pages && i < nr_pages; i++) 239 put_page(pages[i]); 240 kfree(pages); 241 242 return ret; 243 } 244