1 /* 2 * This file contains the procedures for the handling of select and poll 3 * 4 * Created for Linux based loosely upon Mathius Lattner's minix 5 * patches by Peter MacDonald. Heavily edited by Linus. 6 * 7 * 4 February 1994 8 * COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS 9 * flag set in its personality we do *not* modify the given timeout 10 * parameter to reflect time remaining. 11 * 12 * 24 January 2000 13 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 14 * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). 15 */ 16 17 #include <linux/syscalls.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/smp_lock.h> 21 #include <linux/poll.h> 22 #include <linux/personality.h> /* for STICKY_TIMEOUTS */ 23 #include <linux/file.h> 24 #include <linux/fs.h> 25 26 #include <asm/uaccess.h> 27 28 #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) 29 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) 30 31 struct poll_table_entry { 32 struct file * filp; 33 wait_queue_t wait; 34 wait_queue_head_t * wait_address; 35 }; 36 37 struct poll_table_page { 38 struct poll_table_page * next; 39 struct poll_table_entry * entry; 40 struct poll_table_entry entries[0]; 41 }; 42 43 #define POLL_TABLE_FULL(table) \ 44 ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) 45 46 /* 47 * Ok, Peter made a complicated, but straightforward multiple_wait() function. 48 * I have rewritten this, taking some shortcuts: This code may not be easy to 49 * follow, but it should be free of race-conditions, and it's practical. If you 50 * understand what I'm doing here, then you understand how the linux 51 * sleep/wakeup mechanism works. 52 * 53 * Two very simple procedures, poll_wait() and poll_freewait() make all the 54 * work. poll_wait() is an inline-function defined in <linux/poll.h>, 55 * as all select/poll functions have to call it to add an entry to the 56 * poll table. 57 */ 58 void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p); 59 60 void poll_initwait(struct poll_wqueues *pwq) 61 { 62 init_poll_funcptr(&pwq->pt, __pollwait); 63 pwq->error = 0; 64 pwq->table = NULL; 65 } 66 67 EXPORT_SYMBOL(poll_initwait); 68 69 void poll_freewait(struct poll_wqueues *pwq) 70 { 71 struct poll_table_page * p = pwq->table; 72 while (p) { 73 struct poll_table_entry * entry; 74 struct poll_table_page *old; 75 76 entry = p->entry; 77 do { 78 entry--; 79 remove_wait_queue(entry->wait_address,&entry->wait); 80 fput(entry->filp); 81 } while (entry > p->entries); 82 old = p; 83 p = p->next; 84 free_page((unsigned long) old); 85 } 86 } 87 88 EXPORT_SYMBOL(poll_freewait); 89 90 void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *_p) 91 { 92 struct poll_wqueues *p = container_of(_p, struct poll_wqueues, pt); 93 struct poll_table_page *table = p->table; 94 95 if (!table || POLL_TABLE_FULL(table)) { 96 struct poll_table_page *new_table; 97 98 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); 99 if (!new_table) { 100 p->error = -ENOMEM; 101 __set_current_state(TASK_RUNNING); 102 return; 103 } 104 new_table->entry = new_table->entries; 105 new_table->next = table; 106 p->table = new_table; 107 table = new_table; 108 } 109 110 /* Add a new entry */ 111 { 112 struct poll_table_entry * entry = table->entry; 113 table->entry = entry+1; 114 get_file(filp); 115 entry->filp = filp; 116 entry->wait_address = wait_address; 117 init_waitqueue_entry(&entry->wait, current); 118 add_wait_queue(wait_address,&entry->wait); 119 } 120 } 121 122 #define FDS_IN(fds, n) (fds->in + n) 123 #define FDS_OUT(fds, n) (fds->out + n) 124 #define FDS_EX(fds, n) (fds->ex + n) 125 126 #define BITS(fds, n) (*FDS_IN(fds, n)|*FDS_OUT(fds, n)|*FDS_EX(fds, n)) 127 128 static int max_select_fd(unsigned long n, fd_set_bits *fds) 129 { 130 unsigned long *open_fds; 131 unsigned long set; 132 int max; 133 134 /* handle last in-complete long-word first */ 135 set = ~(~0UL << (n & (__NFDBITS-1))); 136 n /= __NFDBITS; 137 open_fds = current->files->open_fds->fds_bits+n; 138 max = 0; 139 if (set) { 140 set &= BITS(fds, n); 141 if (set) { 142 if (!(set & ~*open_fds)) 143 goto get_max; 144 return -EBADF; 145 } 146 } 147 while (n) { 148 open_fds--; 149 n--; 150 set = BITS(fds, n); 151 if (!set) 152 continue; 153 if (set & ~*open_fds) 154 return -EBADF; 155 if (max) 156 continue; 157 get_max: 158 do { 159 max++; 160 set >>= 1; 161 } while (set); 162 max += n * __NFDBITS; 163 } 164 165 return max; 166 } 167 168 #define BIT(i) (1UL << ((i)&(__NFDBITS-1))) 169 #define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS) 170 #define ISSET(i,m) (((i)&*(m)) != 0) 171 #define SET(i,m) (*(m) |= (i)) 172 173 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) 174 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 175 #define POLLEX_SET (POLLPRI) 176 177 int do_select(int n, fd_set_bits *fds, long *timeout) 178 { 179 struct poll_wqueues table; 180 poll_table *wait; 181 int retval, i; 182 long __timeout = *timeout; 183 184 spin_lock(¤t->files->file_lock); 185 retval = max_select_fd(n, fds); 186 spin_unlock(¤t->files->file_lock); 187 188 if (retval < 0) 189 return retval; 190 n = retval; 191 192 poll_initwait(&table); 193 wait = &table.pt; 194 if (!__timeout) 195 wait = NULL; 196 retval = 0; 197 for (;;) { 198 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 199 200 set_current_state(TASK_INTERRUPTIBLE); 201 202 inp = fds->in; outp = fds->out; exp = fds->ex; 203 rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; 204 205 for (i = 0; i < n; ++rinp, ++routp, ++rexp) { 206 unsigned long in, out, ex, all_bits, bit = 1, mask, j; 207 unsigned long res_in = 0, res_out = 0, res_ex = 0; 208 struct file_operations *f_op = NULL; 209 struct file *file = NULL; 210 211 in = *inp++; out = *outp++; ex = *exp++; 212 all_bits = in | out | ex; 213 if (all_bits == 0) { 214 i += __NFDBITS; 215 continue; 216 } 217 218 for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) { 219 if (i >= n) 220 break; 221 if (!(bit & all_bits)) 222 continue; 223 file = fget(i); 224 if (file) { 225 f_op = file->f_op; 226 mask = DEFAULT_POLLMASK; 227 if (f_op && f_op->poll) 228 mask = (*f_op->poll)(file, retval ? NULL : wait); 229 fput(file); 230 if ((mask & POLLIN_SET) && (in & bit)) { 231 res_in |= bit; 232 retval++; 233 } 234 if ((mask & POLLOUT_SET) && (out & bit)) { 235 res_out |= bit; 236 retval++; 237 } 238 if ((mask & POLLEX_SET) && (ex & bit)) { 239 res_ex |= bit; 240 retval++; 241 } 242 } 243 cond_resched(); 244 } 245 if (res_in) 246 *rinp = res_in; 247 if (res_out) 248 *routp = res_out; 249 if (res_ex) 250 *rexp = res_ex; 251 } 252 wait = NULL; 253 if (retval || !__timeout || signal_pending(current)) 254 break; 255 if(table.error) { 256 retval = table.error; 257 break; 258 } 259 __timeout = schedule_timeout(__timeout); 260 } 261 __set_current_state(TASK_RUNNING); 262 263 poll_freewait(&table); 264 265 /* 266 * Up-to-date the caller timeout. 267 */ 268 *timeout = __timeout; 269 return retval; 270 } 271 272 static void *select_bits_alloc(int size) 273 { 274 return kmalloc(6 * size, GFP_KERNEL); 275 } 276 277 static void select_bits_free(void *bits, int size) 278 { 279 kfree(bits); 280 } 281 282 /* 283 * We can actually return ERESTARTSYS instead of EINTR, but I'd 284 * like to be certain this leads to no problems. So I return 285 * EINTR just for safety. 286 * 287 * Update: ERESTARTSYS breaks at least the xview clock binary, so 288 * I'm trying ERESTARTNOHAND which restart only when you want to. 289 */ 290 #define MAX_SELECT_SECONDS \ 291 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 292 293 asmlinkage long 294 sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) 295 { 296 fd_set_bits fds; 297 char *bits; 298 long timeout; 299 int ret, size, max_fdset; 300 301 timeout = MAX_SCHEDULE_TIMEOUT; 302 if (tvp) { 303 time_t sec, usec; 304 305 if (!access_ok(VERIFY_READ, tvp, sizeof(*tvp)) 306 || __get_user(sec, &tvp->tv_sec) 307 || __get_user(usec, &tvp->tv_usec)) { 308 ret = -EFAULT; 309 goto out_nofds; 310 } 311 312 ret = -EINVAL; 313 if (sec < 0 || usec < 0) 314 goto out_nofds; 315 316 if ((unsigned long) sec < MAX_SELECT_SECONDS) { 317 timeout = ROUND_UP(usec, 1000000/HZ); 318 timeout += sec * (unsigned long) HZ; 319 } 320 } 321 322 ret = -EINVAL; 323 if (n < 0) 324 goto out_nofds; 325 326 /* max_fdset can increase, so grab it once to avoid race */ 327 max_fdset = current->files->max_fdset; 328 if (n > max_fdset) 329 n = max_fdset; 330 331 /* 332 * We need 6 bitmaps (in/out/ex for both incoming and outgoing), 333 * since we used fdset we need to allocate memory in units of 334 * long-words. 335 */ 336 ret = -ENOMEM; 337 size = FDS_BYTES(n); 338 bits = select_bits_alloc(size); 339 if (!bits) 340 goto out_nofds; 341 fds.in = (unsigned long *) bits; 342 fds.out = (unsigned long *) (bits + size); 343 fds.ex = (unsigned long *) (bits + 2*size); 344 fds.res_in = (unsigned long *) (bits + 3*size); 345 fds.res_out = (unsigned long *) (bits + 4*size); 346 fds.res_ex = (unsigned long *) (bits + 5*size); 347 348 if ((ret = get_fd_set(n, inp, fds.in)) || 349 (ret = get_fd_set(n, outp, fds.out)) || 350 (ret = get_fd_set(n, exp, fds.ex))) 351 goto out; 352 zero_fd_set(n, fds.res_in); 353 zero_fd_set(n, fds.res_out); 354 zero_fd_set(n, fds.res_ex); 355 356 ret = do_select(n, &fds, &timeout); 357 358 if (tvp && !(current->personality & STICKY_TIMEOUTS)) { 359 time_t sec = 0, usec = 0; 360 if (timeout) { 361 sec = timeout / HZ; 362 usec = timeout % HZ; 363 usec *= (1000000/HZ); 364 } 365 put_user(sec, &tvp->tv_sec); 366 put_user(usec, &tvp->tv_usec); 367 } 368 369 if (ret < 0) 370 goto out; 371 if (!ret) { 372 ret = -ERESTARTNOHAND; 373 if (signal_pending(current)) 374 goto out; 375 ret = 0; 376 } 377 378 if (set_fd_set(n, inp, fds.res_in) || 379 set_fd_set(n, outp, fds.res_out) || 380 set_fd_set(n, exp, fds.res_ex)) 381 ret = -EFAULT; 382 383 out: 384 select_bits_free(bits, size); 385 out_nofds: 386 return ret; 387 } 388 389 struct poll_list { 390 struct poll_list *next; 391 int len; 392 struct pollfd entries[0]; 393 }; 394 395 #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) 396 397 static void do_pollfd(unsigned int num, struct pollfd * fdpage, 398 poll_table ** pwait, int *count) 399 { 400 int i; 401 402 for (i = 0; i < num; i++) { 403 int fd; 404 unsigned int mask; 405 struct pollfd *fdp; 406 407 mask = 0; 408 fdp = fdpage+i; 409 fd = fdp->fd; 410 if (fd >= 0) { 411 struct file * file = fget(fd); 412 mask = POLLNVAL; 413 if (file != NULL) { 414 mask = DEFAULT_POLLMASK; 415 if (file->f_op && file->f_op->poll) 416 mask = file->f_op->poll(file, *pwait); 417 mask &= fdp->events | POLLERR | POLLHUP; 418 fput(file); 419 } 420 if (mask) { 421 *pwait = NULL; 422 (*count)++; 423 } 424 } 425 fdp->revents = mask; 426 } 427 } 428 429 static int do_poll(unsigned int nfds, struct poll_list *list, 430 struct poll_wqueues *wait, long timeout) 431 { 432 int count = 0; 433 poll_table* pt = &wait->pt; 434 435 if (!timeout) 436 pt = NULL; 437 438 for (;;) { 439 struct poll_list *walk; 440 set_current_state(TASK_INTERRUPTIBLE); 441 walk = list; 442 while(walk != NULL) { 443 do_pollfd( walk->len, walk->entries, &pt, &count); 444 walk = walk->next; 445 } 446 pt = NULL; 447 if (count || !timeout || signal_pending(current)) 448 break; 449 count = wait->error; 450 if (count) 451 break; 452 timeout = schedule_timeout(timeout); 453 } 454 __set_current_state(TASK_RUNNING); 455 return count; 456 } 457 458 asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout) 459 { 460 struct poll_wqueues table; 461 int fdcount, err; 462 unsigned int i; 463 struct poll_list *head; 464 struct poll_list *walk; 465 466 /* Do a sanity check on nfds ... */ 467 if (nfds > current->files->max_fdset && nfds > OPEN_MAX) 468 return -EINVAL; 469 470 if (timeout) { 471 /* Careful about overflow in the intermediate values */ 472 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ) 473 timeout = (unsigned long)(timeout*HZ+999)/1000+1; 474 else /* Negative or overflow */ 475 timeout = MAX_SCHEDULE_TIMEOUT; 476 } 477 478 poll_initwait(&table); 479 480 head = NULL; 481 walk = NULL; 482 i = nfds; 483 err = -ENOMEM; 484 while(i!=0) { 485 struct poll_list *pp; 486 pp = kmalloc(sizeof(struct poll_list)+ 487 sizeof(struct pollfd)* 488 (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i), 489 GFP_KERNEL); 490 if(pp==NULL) 491 goto out_fds; 492 pp->next=NULL; 493 pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i); 494 if (head == NULL) 495 head = pp; 496 else 497 walk->next = pp; 498 499 walk = pp; 500 if (copy_from_user(pp->entries, ufds + nfds-i, 501 sizeof(struct pollfd)*pp->len)) { 502 err = -EFAULT; 503 goto out_fds; 504 } 505 i -= pp->len; 506 } 507 fdcount = do_poll(nfds, head, &table, timeout); 508 509 /* OK, now copy the revents fields back to user space. */ 510 walk = head; 511 err = -EFAULT; 512 while(walk != NULL) { 513 struct pollfd *fds = walk->entries; 514 int j; 515 516 for (j=0; j < walk->len; j++, ufds++) { 517 if(__put_user(fds[j].revents, &ufds->revents)) 518 goto out_fds; 519 } 520 walk = walk->next; 521 } 522 err = fdcount; 523 if (!fdcount && signal_pending(current)) 524 err = -EINTR; 525 out_fds: 526 walk = head; 527 while(walk!=NULL) { 528 struct poll_list *pp = walk->next; 529 kfree(walk); 530 walk = pp; 531 } 532 poll_freewait(&table); 533 return err; 534 } 535