1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 */ 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/mm.h> 14 #include <linux/module.h> 15 #include <linux/proc_fs.h> 16 17 #include <asm/bugs.h> 18 #include <asm/cacheops.h> 19 #include <asm/inst.h> 20 #include <asm/io.h> 21 #include <asm/page.h> 22 #include <asm/pgtable.h> 23 #include <asm/prefetch.h> 24 #include <asm/system.h> 25 #include <asm/bootinfo.h> 26 #include <asm/mipsregs.h> 27 #include <asm/mmu_context.h> 28 #include <asm/cpu.h> 29 #include <asm/war.h> 30 31 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 32 #include <asm/sibyte/sb1250.h> 33 #include <asm/sibyte/sb1250_regs.h> 34 #include <asm/sibyte/sb1250_dma.h> 35 #endif 36 37 #include "uasm.h" 38 39 /* Registers used in the assembled routines. */ 40 #define ZERO 0 41 #define AT 2 42 #define A0 4 43 #define A1 5 44 #define A2 6 45 #define T0 8 46 #define T1 9 47 #define T2 10 48 #define T3 11 49 #define T9 25 50 #define RA 31 51 52 /* Handle labels (which must be positive integers). */ 53 enum label_id { 54 label_clear_nopref = 1, 55 label_clear_pref, 56 label_copy_nopref, 57 label_copy_pref_both, 58 label_copy_pref_store, 59 }; 60 61 UASM_L_LA(_clear_nopref) 62 UASM_L_LA(_clear_pref) 63 UASM_L_LA(_copy_nopref) 64 UASM_L_LA(_copy_pref_both) 65 UASM_L_LA(_copy_pref_store) 66 67 /* We need one branch and therefore one relocation per target label. */ 68 static struct uasm_label __cpuinitdata labels[5]; 69 static struct uasm_reloc __cpuinitdata relocs[5]; 70 71 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 72 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 73 74 /* 75 * Maximum sizes: 76 * 77 * R4000 128 bytes S-cache: 0x058 bytes 78 * R4600 v1.7: 0x05c bytes 79 * R4600 v2.0: 0x060 bytes 80 * With prefetching, 16 word strides 0x120 bytes 81 */ 82 83 static u32 clear_page_array[0x120 / 4]; 84 85 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 86 void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); 87 #else 88 void clear_page(void *page) __attribute__((alias("clear_page_array"))); 89 #endif 90 91 EXPORT_SYMBOL(clear_page); 92 93 /* 94 * Maximum sizes: 95 * 96 * R4000 128 bytes S-cache: 0x11c bytes 97 * R4600 v1.7: 0x080 bytes 98 * R4600 v2.0: 0x07c bytes 99 * With prefetching, 16 word strides 0x540 bytes 100 */ 101 static u32 copy_page_array[0x540 / 4]; 102 103 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 104 void 105 copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); 106 #else 107 void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); 108 #endif 109 110 EXPORT_SYMBOL(copy_page); 111 112 113 static int pref_bias_clear_store __cpuinitdata; 114 static int pref_bias_copy_load __cpuinitdata; 115 static int pref_bias_copy_store __cpuinitdata; 116 117 static u32 pref_src_mode __cpuinitdata; 118 static u32 pref_dst_mode __cpuinitdata; 119 120 static int clear_word_size __cpuinitdata; 121 static int copy_word_size __cpuinitdata; 122 123 static int half_clear_loop_size __cpuinitdata; 124 static int half_copy_loop_size __cpuinitdata; 125 126 static int cache_line_size __cpuinitdata; 127 #define cache_line_mask() (cache_line_size - 1) 128 129 static inline void __cpuinit 130 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 131 { 132 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 133 if (off > 0x7fff) { 134 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 135 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 136 } else 137 uasm_i_addiu(buf, T9, ZERO, off); 138 uasm_i_daddu(buf, reg1, reg2, T9); 139 } else { 140 if (off > 0x7fff) { 141 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 142 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 143 UASM_i_ADDU(buf, reg1, reg2, T9); 144 } else 145 UASM_i_ADDIU(buf, reg1, reg2, off); 146 } 147 } 148 149 static void __cpuinit set_prefetch_parameters(void) 150 { 151 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 152 clear_word_size = 8; 153 else 154 clear_word_size = 4; 155 156 if (cpu_has_64bit_gp_regs) 157 copy_word_size = 8; 158 else 159 copy_word_size = 4; 160 161 /* 162 * The pref's used here are using "streaming" hints, which cause the 163 * copied data to be kicked out of the cache sooner. A page copy often 164 * ends up copying a lot more data than is commonly used, so this seems 165 * to make sense in terms of reducing cache pollution, but I've no real 166 * performance data to back this up. 167 */ 168 if (cpu_has_prefetch) { 169 /* 170 * XXX: Most prefetch bias values in here are based on 171 * guesswork. 172 */ 173 cache_line_size = cpu_dcache_line_size(); 174 switch (current_cpu_type()) { 175 case CPU_TX49XX: 176 /* TX49 supports only Pref_Load */ 177 pref_bias_copy_load = 256; 178 break; 179 180 case CPU_RM9000: 181 /* 182 * As a workaround for erratum G105 which make the 183 * PrepareForStore hint unusable we fall back to 184 * StoreRetained on the RM9000. Once it is known which 185 * versions of the RM9000 we'll be able to condition- 186 * alize this. 187 */ 188 189 case CPU_R10000: 190 case CPU_R12000: 191 case CPU_R14000: 192 /* 193 * Those values have been experimentally tuned for an 194 * Origin 200. 195 */ 196 pref_bias_clear_store = 512; 197 pref_bias_copy_load = 256; 198 pref_bias_copy_store = 256; 199 pref_src_mode = Pref_LoadStreamed; 200 pref_dst_mode = Pref_StoreStreamed; 201 break; 202 203 case CPU_SB1: 204 case CPU_SB1A: 205 pref_bias_clear_store = 128; 206 pref_bias_copy_load = 128; 207 pref_bias_copy_store = 128; 208 /* 209 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 210 * hints are broken. 211 */ 212 if (current_cpu_type() == CPU_SB1 && 213 (current_cpu_data.processor_id & 0xff) < 0x02) { 214 pref_src_mode = Pref_Load; 215 pref_dst_mode = Pref_Store; 216 } else { 217 pref_src_mode = Pref_LoadStreamed; 218 pref_dst_mode = Pref_StoreStreamed; 219 } 220 break; 221 222 default: 223 pref_bias_clear_store = 128; 224 pref_bias_copy_load = 256; 225 pref_bias_copy_store = 128; 226 pref_src_mode = Pref_LoadStreamed; 227 pref_dst_mode = Pref_PrepareForStore; 228 break; 229 } 230 } else { 231 if (cpu_has_cache_cdex_s) 232 cache_line_size = cpu_scache_line_size(); 233 else if (cpu_has_cache_cdex_p) 234 cache_line_size = cpu_dcache_line_size(); 235 } 236 /* 237 * Too much unrolling will overflow the available space in 238 * clear_space_array / copy_page_array. 8 words sounds generous, 239 * but a R4000 with 128 byte L2 line length can exceed even that. 240 */ 241 half_clear_loop_size = min(8 * clear_word_size, 242 max(cache_line_size >> 1, 243 4 * clear_word_size)); 244 half_copy_loop_size = min(8 * copy_word_size, 245 max(cache_line_size >> 1, 246 4 * copy_word_size)); 247 } 248 249 static void __cpuinit build_clear_store(u32 **buf, int off) 250 { 251 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 252 uasm_i_sd(buf, ZERO, off, A0); 253 } else { 254 uasm_i_sw(buf, ZERO, off, A0); 255 } 256 } 257 258 static inline void __cpuinit build_clear_pref(u32 **buf, int off) 259 { 260 if (off & cache_line_mask()) 261 return; 262 263 if (pref_bias_clear_store) { 264 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 265 A0); 266 } else if (cpu_has_cache_cdex_s) { 267 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 268 } else if (cpu_has_cache_cdex_p) { 269 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 270 uasm_i_nop(buf); 271 uasm_i_nop(buf); 272 uasm_i_nop(buf); 273 uasm_i_nop(buf); 274 } 275 276 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 277 uasm_i_lw(buf, ZERO, ZERO, AT); 278 279 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 280 } 281 } 282 283 void __cpuinit build_clear_page(void) 284 { 285 int off; 286 u32 *buf = (u32 *)&clear_page_array; 287 struct uasm_label *l = labels; 288 struct uasm_reloc *r = relocs; 289 int i; 290 291 memset(labels, 0, sizeof(labels)); 292 memset(relocs, 0, sizeof(relocs)); 293 294 set_prefetch_parameters(); 295 296 /* 297 * This algorithm makes the following assumptions: 298 * - The prefetch bias is a multiple of 2 words. 299 * - The prefetch bias is less than one page. 300 */ 301 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 302 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 303 304 off = PAGE_SIZE - pref_bias_clear_store; 305 if (off > 0xffff || !pref_bias_clear_store) 306 pg_addiu(&buf, A2, A0, off); 307 else 308 uasm_i_ori(&buf, A2, A0, off); 309 310 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 311 uasm_i_lui(&buf, AT, 0xa000); 312 313 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 314 * cache_line_size : 0; 315 while (off) { 316 build_clear_pref(&buf, -off); 317 off -= cache_line_size; 318 } 319 uasm_l_clear_pref(&l, buf); 320 do { 321 build_clear_pref(&buf, off); 322 build_clear_store(&buf, off); 323 off += clear_word_size; 324 } while (off < half_clear_loop_size); 325 pg_addiu(&buf, A0, A0, 2 * off); 326 off = -off; 327 do { 328 build_clear_pref(&buf, off); 329 if (off == -clear_word_size) 330 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 331 build_clear_store(&buf, off); 332 off += clear_word_size; 333 } while (off < 0); 334 335 if (pref_bias_clear_store) { 336 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 337 uasm_l_clear_nopref(&l, buf); 338 off = 0; 339 do { 340 build_clear_store(&buf, off); 341 off += clear_word_size; 342 } while (off < half_clear_loop_size); 343 pg_addiu(&buf, A0, A0, 2 * off); 344 off = -off; 345 do { 346 if (off == -clear_word_size) 347 uasm_il_bne(&buf, &r, A0, A2, 348 label_clear_nopref); 349 build_clear_store(&buf, off); 350 off += clear_word_size; 351 } while (off < 0); 352 } 353 354 uasm_i_jr(&buf, RA); 355 uasm_i_nop(&buf); 356 357 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); 358 359 uasm_resolve_relocs(relocs, labels); 360 361 pr_debug("Synthesized clear page handler (%u instructions).\n", 362 (u32)(buf - clear_page_array)); 363 364 pr_debug("\t.set push\n"); 365 pr_debug("\t.set noreorder\n"); 366 for (i = 0; i < (buf - clear_page_array); i++) 367 pr_debug("\t.word 0x%08x\n", clear_page_array[i]); 368 pr_debug("\t.set pop\n"); 369 } 370 371 static void __cpuinit build_copy_load(u32 **buf, int reg, int off) 372 { 373 if (cpu_has_64bit_gp_regs) { 374 uasm_i_ld(buf, reg, off, A1); 375 } else { 376 uasm_i_lw(buf, reg, off, A1); 377 } 378 } 379 380 static void __cpuinit build_copy_store(u32 **buf, int reg, int off) 381 { 382 if (cpu_has_64bit_gp_regs) { 383 uasm_i_sd(buf, reg, off, A0); 384 } else { 385 uasm_i_sw(buf, reg, off, A0); 386 } 387 } 388 389 static inline void build_copy_load_pref(u32 **buf, int off) 390 { 391 if (off & cache_line_mask()) 392 return; 393 394 if (pref_bias_copy_load) 395 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 396 } 397 398 static inline void build_copy_store_pref(u32 **buf, int off) 399 { 400 if (off & cache_line_mask()) 401 return; 402 403 if (pref_bias_copy_store) { 404 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 405 A0); 406 } else if (cpu_has_cache_cdex_s) { 407 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 408 } else if (cpu_has_cache_cdex_p) { 409 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 410 uasm_i_nop(buf); 411 uasm_i_nop(buf); 412 uasm_i_nop(buf); 413 uasm_i_nop(buf); 414 } 415 416 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 417 uasm_i_lw(buf, ZERO, ZERO, AT); 418 419 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 420 } 421 } 422 423 void __cpuinit build_copy_page(void) 424 { 425 int off; 426 u32 *buf = (u32 *)©_page_array; 427 struct uasm_label *l = labels; 428 struct uasm_reloc *r = relocs; 429 int i; 430 431 memset(labels, 0, sizeof(labels)); 432 memset(relocs, 0, sizeof(relocs)); 433 434 set_prefetch_parameters(); 435 436 /* 437 * This algorithm makes the following assumptions: 438 * - All prefetch biases are multiples of 8 words. 439 * - The prefetch biases are less than one page. 440 * - The store prefetch bias isn't greater than the load 441 * prefetch bias. 442 */ 443 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 444 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 445 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 446 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 447 448 off = PAGE_SIZE - pref_bias_copy_load; 449 if (off > 0xffff || !pref_bias_copy_load) 450 pg_addiu(&buf, A2, A0, off); 451 else 452 uasm_i_ori(&buf, A2, A0, off); 453 454 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 455 uasm_i_lui(&buf, AT, 0xa000); 456 457 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 458 cache_line_size : 0; 459 while (off) { 460 build_copy_load_pref(&buf, -off); 461 off -= cache_line_size; 462 } 463 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 464 cache_line_size : 0; 465 while (off) { 466 build_copy_store_pref(&buf, -off); 467 off -= cache_line_size; 468 } 469 uasm_l_copy_pref_both(&l, buf); 470 do { 471 build_copy_load_pref(&buf, off); 472 build_copy_load(&buf, T0, off); 473 build_copy_load_pref(&buf, off + copy_word_size); 474 build_copy_load(&buf, T1, off + copy_word_size); 475 build_copy_load_pref(&buf, off + 2 * copy_word_size); 476 build_copy_load(&buf, T2, off + 2 * copy_word_size); 477 build_copy_load_pref(&buf, off + 3 * copy_word_size); 478 build_copy_load(&buf, T3, off + 3 * copy_word_size); 479 build_copy_store_pref(&buf, off); 480 build_copy_store(&buf, T0, off); 481 build_copy_store_pref(&buf, off + copy_word_size); 482 build_copy_store(&buf, T1, off + copy_word_size); 483 build_copy_store_pref(&buf, off + 2 * copy_word_size); 484 build_copy_store(&buf, T2, off + 2 * copy_word_size); 485 build_copy_store_pref(&buf, off + 3 * copy_word_size); 486 build_copy_store(&buf, T3, off + 3 * copy_word_size); 487 off += 4 * copy_word_size; 488 } while (off < half_copy_loop_size); 489 pg_addiu(&buf, A1, A1, 2 * off); 490 pg_addiu(&buf, A0, A0, 2 * off); 491 off = -off; 492 do { 493 build_copy_load_pref(&buf, off); 494 build_copy_load(&buf, T0, off); 495 build_copy_load_pref(&buf, off + copy_word_size); 496 build_copy_load(&buf, T1, off + copy_word_size); 497 build_copy_load_pref(&buf, off + 2 * copy_word_size); 498 build_copy_load(&buf, T2, off + 2 * copy_word_size); 499 build_copy_load_pref(&buf, off + 3 * copy_word_size); 500 build_copy_load(&buf, T3, off + 3 * copy_word_size); 501 build_copy_store_pref(&buf, off); 502 build_copy_store(&buf, T0, off); 503 build_copy_store_pref(&buf, off + copy_word_size); 504 build_copy_store(&buf, T1, off + copy_word_size); 505 build_copy_store_pref(&buf, off + 2 * copy_word_size); 506 build_copy_store(&buf, T2, off + 2 * copy_word_size); 507 build_copy_store_pref(&buf, off + 3 * copy_word_size); 508 if (off == -(4 * copy_word_size)) 509 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 510 build_copy_store(&buf, T3, off + 3 * copy_word_size); 511 off += 4 * copy_word_size; 512 } while (off < 0); 513 514 if (pref_bias_copy_load - pref_bias_copy_store) { 515 pg_addiu(&buf, A2, A0, 516 pref_bias_copy_load - pref_bias_copy_store); 517 uasm_l_copy_pref_store(&l, buf); 518 off = 0; 519 do { 520 build_copy_load(&buf, T0, off); 521 build_copy_load(&buf, T1, off + copy_word_size); 522 build_copy_load(&buf, T2, off + 2 * copy_word_size); 523 build_copy_load(&buf, T3, off + 3 * copy_word_size); 524 build_copy_store_pref(&buf, off); 525 build_copy_store(&buf, T0, off); 526 build_copy_store_pref(&buf, off + copy_word_size); 527 build_copy_store(&buf, T1, off + copy_word_size); 528 build_copy_store_pref(&buf, off + 2 * copy_word_size); 529 build_copy_store(&buf, T2, off + 2 * copy_word_size); 530 build_copy_store_pref(&buf, off + 3 * copy_word_size); 531 build_copy_store(&buf, T3, off + 3 * copy_word_size); 532 off += 4 * copy_word_size; 533 } while (off < half_copy_loop_size); 534 pg_addiu(&buf, A1, A1, 2 * off); 535 pg_addiu(&buf, A0, A0, 2 * off); 536 off = -off; 537 do { 538 build_copy_load(&buf, T0, off); 539 build_copy_load(&buf, T1, off + copy_word_size); 540 build_copy_load(&buf, T2, off + 2 * copy_word_size); 541 build_copy_load(&buf, T3, off + 3 * copy_word_size); 542 build_copy_store_pref(&buf, off); 543 build_copy_store(&buf, T0, off); 544 build_copy_store_pref(&buf, off + copy_word_size); 545 build_copy_store(&buf, T1, off + copy_word_size); 546 build_copy_store_pref(&buf, off + 2 * copy_word_size); 547 build_copy_store(&buf, T2, off + 2 * copy_word_size); 548 build_copy_store_pref(&buf, off + 3 * copy_word_size); 549 if (off == -(4 * copy_word_size)) 550 uasm_il_bne(&buf, &r, A2, A0, 551 label_copy_pref_store); 552 build_copy_store(&buf, T3, off + 3 * copy_word_size); 553 off += 4 * copy_word_size; 554 } while (off < 0); 555 } 556 557 if (pref_bias_copy_store) { 558 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 559 uasm_l_copy_nopref(&l, buf); 560 off = 0; 561 do { 562 build_copy_load(&buf, T0, off); 563 build_copy_load(&buf, T1, off + copy_word_size); 564 build_copy_load(&buf, T2, off + 2 * copy_word_size); 565 build_copy_load(&buf, T3, off + 3 * copy_word_size); 566 build_copy_store(&buf, T0, off); 567 build_copy_store(&buf, T1, off + copy_word_size); 568 build_copy_store(&buf, T2, off + 2 * copy_word_size); 569 build_copy_store(&buf, T3, off + 3 * copy_word_size); 570 off += 4 * copy_word_size; 571 } while (off < half_copy_loop_size); 572 pg_addiu(&buf, A1, A1, 2 * off); 573 pg_addiu(&buf, A0, A0, 2 * off); 574 off = -off; 575 do { 576 build_copy_load(&buf, T0, off); 577 build_copy_load(&buf, T1, off + copy_word_size); 578 build_copy_load(&buf, T2, off + 2 * copy_word_size); 579 build_copy_load(&buf, T3, off + 3 * copy_word_size); 580 build_copy_store(&buf, T0, off); 581 build_copy_store(&buf, T1, off + copy_word_size); 582 build_copy_store(&buf, T2, off + 2 * copy_word_size); 583 if (off == -(4 * copy_word_size)) 584 uasm_il_bne(&buf, &r, A2, A0, 585 label_copy_nopref); 586 build_copy_store(&buf, T3, off + 3 * copy_word_size); 587 off += 4 * copy_word_size; 588 } while (off < 0); 589 } 590 591 uasm_i_jr(&buf, RA); 592 uasm_i_nop(&buf); 593 594 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); 595 596 uasm_resolve_relocs(relocs, labels); 597 598 pr_debug("Synthesized copy page handler (%u instructions).\n", 599 (u32)(buf - copy_page_array)); 600 601 pr_debug("\t.set push\n"); 602 pr_debug("\t.set noreorder\n"); 603 for (i = 0; i < (buf - copy_page_array); i++) 604 pr_debug("\t.word 0x%08x\n", copy_page_array[i]); 605 pr_debug("\t.set pop\n"); 606 } 607 608 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 609 610 /* 611 * Pad descriptors to cacheline, since each is exclusively owned by a 612 * particular CPU. 613 */ 614 struct dmadscr { 615 u64 dscr_a; 616 u64 dscr_b; 617 u64 pad_a; 618 u64 pad_b; 619 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 620 621 void sb1_dma_init(void) 622 { 623 int i; 624 625 for (i = 0; i < DM_NUM_CHANNELS; i++) { 626 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 627 V_DM_DSCR_BASE_RINGSZ(1); 628 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 629 630 __raw_writeq(base_val, base_reg); 631 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 632 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 633 } 634 } 635 636 void clear_page(void *page) 637 { 638 u64 to_phys = CPHYSADDR((unsigned long)page); 639 unsigned int cpu = smp_processor_id(); 640 641 /* if the page is not in KSEG0, use old way */ 642 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 643 return clear_page_cpu(page); 644 645 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 646 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 647 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 648 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 649 650 /* 651 * Don't really want to do it this way, but there's no 652 * reliable way to delay completion detection. 653 */ 654 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 655 & M_DM_DSCR_BASE_INTERRUPT)) 656 ; 657 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 658 } 659 660 void copy_page(void *to, void *from) 661 { 662 u64 from_phys = CPHYSADDR((unsigned long)from); 663 u64 to_phys = CPHYSADDR((unsigned long)to); 664 unsigned int cpu = smp_processor_id(); 665 666 /* if any page is not in KSEG0, use old way */ 667 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 668 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 669 return copy_page_cpu(to, from); 670 671 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 672 M_DM_DSCRA_INTERRUPT; 673 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 674 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 675 676 /* 677 * Don't really want to do it this way, but there's no 678 * reliable way to delay completion detection. 679 */ 680 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 681 & M_DM_DSCR_BASE_INTERRUPT)) 682 ; 683 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 684 } 685 686 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 687