1 /* 2 * arch/parisc/lib/io.c 3 * 4 * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard 5 * Copyright (c) Randolph Chung 2001 <tausq@debian.org> 6 * 7 * IO accessing functions which shouldn't be inlined because they're too big 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <asm/io.h> 13 14 /* Copies a block of memory to a device in an efficient manner. 15 * Assumes the device can cope with 32-bit transfers. If it can't, 16 * don't use this function. 17 */ 18 void memcpy_toio(volatile void __iomem *dst, const void *src, int count) 19 { 20 if (((unsigned long)dst & 3) != ((unsigned long)src & 3)) 21 goto bytecopy; 22 while ((unsigned long)dst & 3) { 23 writeb(*(char *)src, dst++); 24 src++; 25 count--; 26 } 27 while (count > 3) { 28 __raw_writel(*(u32 *)src, dst); 29 src += 4; 30 dst += 4; 31 count -= 4; 32 } 33 bytecopy: 34 while (count--) { 35 writeb(*(char *)src, dst++); 36 src++; 37 } 38 } 39 40 /* 41 ** Copies a block of memory from a device in an efficient manner. 42 ** Assumes the device can cope with 32-bit transfers. If it can't, 43 ** don't use this function. 44 ** 45 ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM: 46 ** 27341/64 = 427 cyc per int 47 ** 61311/128 = 478 cyc per short 48 ** 122637/256 = 479 cyc per byte 49 ** Ergo bus latencies dominant (not transfer size). 50 ** Minimize total number of transfers at cost of CPU cycles. 51 ** TODO: only look at src alignment and adjust the stores to dest. 52 */ 53 void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) 54 { 55 /* first compare alignment of src/dst */ 56 if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) ) 57 goto bytecopy; 58 59 if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) ) 60 goto shortcopy; 61 62 /* Then check for misaligned start address */ 63 if ((unsigned long)src & 1) { 64 *(u8 *)dst = readb(src); 65 src++; 66 dst++; 67 count--; 68 if (count < 2) goto bytecopy; 69 } 70 71 if ((unsigned long)src & 2) { 72 *(u16 *)dst = __raw_readw(src); 73 src += 2; 74 dst += 2; 75 count -= 2; 76 } 77 78 while (count > 3) { 79 *(u32 *)dst = __raw_readl(src); 80 dst += 4; 81 src += 4; 82 count -= 4; 83 } 84 85 shortcopy: 86 while (count > 1) { 87 *(u16 *)dst = __raw_readw(src); 88 src += 2; 89 dst += 2; 90 count -= 2; 91 } 92 93 bytecopy: 94 while (count--) { 95 *(char *)dst = readb(src); 96 src++; 97 dst++; 98 } 99 } 100 101 /* Sets a block of memory on a device to a given value. 102 * Assumes the device can cope with 32-bit transfers. If it can't, 103 * don't use this function. 104 */ 105 void memset_io(volatile void __iomem *addr, unsigned char val, int count) 106 { 107 u32 val32 = (val << 24) | (val << 16) | (val << 8) | val; 108 while ((unsigned long)addr & 3) { 109 writeb(val, addr++); 110 count--; 111 } 112 while (count > 3) { 113 __raw_writel(val32, addr); 114 addr += 4; 115 count -= 4; 116 } 117 while (count--) { 118 writeb(val, addr++); 119 } 120 } 121 122 /* 123 * Read COUNT 8-bit bytes from port PORT into memory starting at 124 * SRC. 125 */ 126 void insb (unsigned long port, void *dst, unsigned long count) 127 { 128 unsigned char *p; 129 130 p = (unsigned char *)dst; 131 132 while (((unsigned long)p) & 0x3) { 133 if (!count) 134 return; 135 count--; 136 *p = inb(port); 137 p++; 138 } 139 140 while (count >= 4) { 141 unsigned int w; 142 count -= 4; 143 w = inb(port) << 24; 144 w |= inb(port) << 16; 145 w |= inb(port) << 8; 146 w |= inb(port); 147 *(unsigned int *) p = w; 148 p += 4; 149 } 150 151 while (count) { 152 --count; 153 *p = inb(port); 154 p++; 155 } 156 } 157 158 159 /* 160 * Read COUNT 16-bit words from port PORT into memory starting at 161 * SRC. SRC must be at least short aligned. This is used by the 162 * IDE driver to read disk sectors. Performance is important, but 163 * the interfaces seems to be slow: just using the inlined version 164 * of the inw() breaks things. 165 */ 166 void insw (unsigned long port, void *dst, unsigned long count) 167 { 168 unsigned int l = 0, l2; 169 unsigned char *p; 170 171 p = (unsigned char *)dst; 172 173 if (!count) 174 return; 175 176 switch (((unsigned long)p) & 0x3) 177 { 178 case 0x00: /* Buffer 32-bit aligned */ 179 while (count>=2) { 180 181 count -= 2; 182 l = cpu_to_le16(inw(port)) << 16; 183 l |= cpu_to_le16(inw(port)); 184 *(unsigned int *)p = l; 185 p += 4; 186 } 187 if (count) { 188 *(unsigned short *)p = cpu_to_le16(inw(port)); 189 } 190 break; 191 192 case 0x02: /* Buffer 16-bit aligned */ 193 *(unsigned short *)p = cpu_to_le16(inw(port)); 194 p += 2; 195 count--; 196 while (count>=2) { 197 198 count -= 2; 199 l = cpu_to_le16(inw(port)) << 16; 200 l |= cpu_to_le16(inw(port)); 201 *(unsigned int *)p = l; 202 p += 4; 203 } 204 if (count) { 205 *(unsigned short *)p = cpu_to_le16(inw(port)); 206 } 207 break; 208 209 case 0x01: /* Buffer 8-bit aligned */ 210 case 0x03: 211 /* I don't bother with 32bit transfers 212 * in this case, 16bit will have to do -- DE */ 213 --count; 214 215 l = cpu_to_le16(inw(port)); 216 *p = l >> 8; 217 p++; 218 while (count--) 219 { 220 l2 = cpu_to_le16(inw(port)); 221 *(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8); 222 p += 2; 223 l = l2; 224 } 225 *p = l & 0xff; 226 break; 227 } 228 } 229 230 231 232 /* 233 * Read COUNT 32-bit words from port PORT into memory starting at 234 * SRC. Now works with any alignment in SRC. Performance is important, 235 * but the interfaces seems to be slow: just using the inlined version 236 * of the inl() breaks things. 237 */ 238 void insl (unsigned long port, void *dst, unsigned long count) 239 { 240 unsigned int l = 0, l2; 241 unsigned char *p; 242 243 p = (unsigned char *)dst; 244 245 if (!count) 246 return; 247 248 switch (((unsigned long) dst) & 0x3) 249 { 250 case 0x00: /* Buffer 32-bit aligned */ 251 while (count--) 252 { 253 *(unsigned int *)p = cpu_to_le32(inl(port)); 254 p += 4; 255 } 256 break; 257 258 case 0x02: /* Buffer 16-bit aligned */ 259 --count; 260 261 l = cpu_to_le32(inl(port)); 262 *(unsigned short *)p = l >> 16; 263 p += 2; 264 265 while (count--) 266 { 267 l2 = cpu_to_le32(inl(port)); 268 *(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16); 269 p += 4; 270 l = l2; 271 } 272 *(unsigned short *)p = l & 0xffff; 273 break; 274 case 0x01: /* Buffer 8-bit aligned */ 275 --count; 276 277 l = cpu_to_le32(inl(port)); 278 *(unsigned char *)p = l >> 24; 279 p++; 280 *(unsigned short *)p = (l >> 8) & 0xffff; 281 p += 2; 282 while (count--) 283 { 284 l2 = cpu_to_le32(inl(port)); 285 *(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8); 286 p += 4; 287 l = l2; 288 } 289 *p = l & 0xff; 290 break; 291 case 0x03: /* Buffer 8-bit aligned */ 292 --count; 293 294 l = cpu_to_le32(inl(port)); 295 *p = l >> 24; 296 p++; 297 while (count--) 298 { 299 l2 = cpu_to_le32(inl(port)); 300 *(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24; 301 p += 4; 302 l = l2; 303 } 304 *(unsigned short *)p = (l >> 8) & 0xffff; 305 p += 2; 306 *p = l & 0xff; 307 break; 308 } 309 } 310 311 312 /* 313 * Like insb but in the opposite direction. 314 * Don't worry as much about doing aligned memory transfers: 315 * doing byte reads the "slow" way isn't nearly as slow as 316 * doing byte writes the slow way (no r-m-w cycle). 317 */ 318 void outsb(unsigned long port, const void * src, unsigned long count) 319 { 320 const unsigned char *p; 321 322 p = (const unsigned char *)src; 323 while (count) { 324 count--; 325 outb(*p, port); 326 p++; 327 } 328 } 329 330 /* 331 * Like insw but in the opposite direction. This is used by the IDE 332 * driver to write disk sectors. Performance is important, but the 333 * interfaces seems to be slow: just using the inlined version of the 334 * outw() breaks things. 335 */ 336 void outsw (unsigned long port, const void *src, unsigned long count) 337 { 338 unsigned int l = 0, l2; 339 const unsigned char *p; 340 341 p = (const unsigned char *)src; 342 343 if (!count) 344 return; 345 346 switch (((unsigned long)p) & 0x3) 347 { 348 case 0x00: /* Buffer 32-bit aligned */ 349 while (count>=2) { 350 count -= 2; 351 l = *(unsigned int *)p; 352 p += 4; 353 outw(le16_to_cpu(l >> 16), port); 354 outw(le16_to_cpu(l & 0xffff), port); 355 } 356 if (count) { 357 outw(le16_to_cpu(*(unsigned short*)p), port); 358 } 359 break; 360 361 case 0x02: /* Buffer 16-bit aligned */ 362 363 outw(le16_to_cpu(*(unsigned short*)p), port); 364 p += 2; 365 count--; 366 367 while (count>=2) { 368 count -= 2; 369 l = *(unsigned int *)p; 370 p += 4; 371 outw(le16_to_cpu(l >> 16), port); 372 outw(le16_to_cpu(l & 0xffff), port); 373 } 374 if (count) { 375 outw(le16_to_cpu(*(unsigned short *)p), port); 376 } 377 break; 378 379 case 0x01: /* Buffer 8-bit aligned */ 380 /* I don't bother with 32bit transfers 381 * in this case, 16bit will have to do -- DE */ 382 383 l = *p << 8; 384 p++; 385 count--; 386 while (count) 387 { 388 count--; 389 l2 = *(unsigned short *)p; 390 p += 2; 391 outw(le16_to_cpu(l | l2 >> 8), port); 392 l = l2 << 8; 393 } 394 l2 = *(unsigned char *)p; 395 outw (le16_to_cpu(l | l2>>8), port); 396 break; 397 398 } 399 } 400 401 402 /* 403 * Like insl but in the opposite direction. This is used by the IDE 404 * driver to write disk sectors. Works with any alignment in SRC. 405 * Performance is important, but the interfaces seems to be slow: 406 * just using the inlined version of the outl() breaks things. 407 */ 408 void outsl (unsigned long port, const void *src, unsigned long count) 409 { 410 unsigned int l = 0, l2; 411 const unsigned char *p; 412 413 p = (const unsigned char *)src; 414 415 if (!count) 416 return; 417 418 switch (((unsigned long)p) & 0x3) 419 { 420 case 0x00: /* Buffer 32-bit aligned */ 421 while (count--) 422 { 423 outl(le32_to_cpu(*(unsigned int *)p), port); 424 p += 4; 425 } 426 break; 427 428 case 0x02: /* Buffer 16-bit aligned */ 429 --count; 430 431 l = *(unsigned short *)p; 432 p += 2; 433 434 while (count--) 435 { 436 l2 = *(unsigned int *)p; 437 p += 4; 438 outl (le32_to_cpu(l << 16 | l2 >> 16), port); 439 l = l2; 440 } 441 l2 = *(unsigned short *)p; 442 outl (le32_to_cpu(l << 16 | l2), port); 443 break; 444 case 0x01: /* Buffer 8-bit aligned */ 445 --count; 446 447 l = *p << 24; 448 p++; 449 l |= *(unsigned short *)p << 8; 450 p += 2; 451 452 while (count--) 453 { 454 l2 = *(unsigned int *)p; 455 p += 4; 456 outl (le32_to_cpu(l | l2 >> 24), port); 457 l = l2 << 8; 458 } 459 l2 = *p; 460 outl (le32_to_cpu(l | l2), port); 461 break; 462 case 0x03: /* Buffer 8-bit aligned */ 463 --count; 464 465 l = *p << 24; 466 p++; 467 468 while (count--) 469 { 470 l2 = *(unsigned int *)p; 471 p += 4; 472 outl (le32_to_cpu(l | l2 >> 8), port); 473 l = l2 << 24; 474 } 475 l2 = *(unsigned short *)p << 16; 476 p += 2; 477 l2 |= *p; 478 outl (le32_to_cpu(l | l2), port); 479 break; 480 } 481 } 482 483 EXPORT_SYMBOL(insb); 484 EXPORT_SYMBOL(insw); 485 EXPORT_SYMBOL(insl); 486 EXPORT_SYMBOL(outsb); 487 EXPORT_SYMBOL(outsw); 488 EXPORT_SYMBOL(outsl); 489