1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 */ 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/mm.h> 14 #include <linux/module.h> 15 #include <linux/proc_fs.h> 16 17 #include <asm/bugs.h> 18 #include <asm/cacheops.h> 19 #include <asm/inst.h> 20 #include <asm/io.h> 21 #include <asm/page.h> 22 #include <asm/pgtable.h> 23 #include <asm/prefetch.h> 24 #include <asm/system.h> 25 #include <asm/bootinfo.h> 26 #include <asm/mipsregs.h> 27 #include <asm/mmu_context.h> 28 #include <asm/cpu.h> 29 #include <asm/war.h> 30 31 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 32 #include <asm/sibyte/sb1250.h> 33 #include <asm/sibyte/sb1250_regs.h> 34 #include <asm/sibyte/sb1250_dma.h> 35 #endif 36 37 #include "uasm.h" 38 39 /* Registers used in the assembled routines. */ 40 #define ZERO 0 41 #define AT 2 42 #define A0 4 43 #define A1 5 44 #define A2 6 45 #define T0 8 46 #define T1 9 47 #define T2 10 48 #define T3 11 49 #define T9 25 50 #define RA 31 51 52 /* Handle labels (which must be positive integers). */ 53 enum label_id { 54 label_clear_nopref = 1, 55 label_clear_pref, 56 label_copy_nopref, 57 label_copy_pref_both, 58 label_copy_pref_store, 59 }; 60 61 UASM_L_LA(_clear_nopref) 62 UASM_L_LA(_clear_pref) 63 UASM_L_LA(_copy_nopref) 64 UASM_L_LA(_copy_pref_both) 65 UASM_L_LA(_copy_pref_store) 66 67 /* We need one branch and therefore one relocation per target label. */ 68 static struct uasm_label __cpuinitdata labels[5]; 69 static struct uasm_reloc __cpuinitdata relocs[5]; 70 71 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 72 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 73 74 /* 75 * Maximum sizes: 76 * 77 * R4000 128 bytes S-cache: 0x058 bytes 78 * R4600 v1.7: 0x05c bytes 79 * R4600 v2.0: 0x060 bytes 80 * With prefetching, 16 word strides 0x120 bytes 81 */ 82 83 static u32 clear_page_array[0x120 / 4]; 84 85 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 86 void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); 87 #else 88 void clear_page(void *page) __attribute__((alias("clear_page_array"))); 89 #endif 90 91 EXPORT_SYMBOL(clear_page); 92 93 /* 94 * Maximum sizes: 95 * 96 * R4000 128 bytes S-cache: 0x11c bytes 97 * R4600 v1.7: 0x080 bytes 98 * R4600 v2.0: 0x07c bytes 99 * With prefetching, 16 word strides 0x540 bytes 100 */ 101 static u32 copy_page_array[0x540 / 4]; 102 103 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 104 void 105 copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); 106 #else 107 void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); 108 #endif 109 110 EXPORT_SYMBOL(copy_page); 111 112 113 static int pref_bias_clear_store __cpuinitdata; 114 static int pref_bias_copy_load __cpuinitdata; 115 static int pref_bias_copy_store __cpuinitdata; 116 117 static u32 pref_src_mode __cpuinitdata; 118 static u32 pref_dst_mode __cpuinitdata; 119 120 static int clear_word_size __cpuinitdata; 121 static int copy_word_size __cpuinitdata; 122 123 static int half_clear_loop_size __cpuinitdata; 124 static int half_copy_loop_size __cpuinitdata; 125 126 static int cache_line_size __cpuinitdata; 127 #define cache_line_mask() (cache_line_size - 1) 128 129 static inline void __cpuinit 130 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 131 { 132 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 133 if (off > 0x7fff) { 134 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 135 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 136 } else 137 uasm_i_addiu(buf, T9, ZERO, off); 138 uasm_i_daddu(buf, reg1, reg2, T9); 139 } else { 140 if (off > 0x7fff) { 141 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 142 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 143 UASM_i_ADDU(buf, reg1, reg2, T9); 144 } else 145 UASM_i_ADDIU(buf, reg1, reg2, off); 146 } 147 } 148 149 static void __cpuinit set_prefetch_parameters(void) 150 { 151 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 152 clear_word_size = 8; 153 else 154 clear_word_size = 4; 155 156 if (cpu_has_64bit_gp_regs) 157 copy_word_size = 8; 158 else 159 copy_word_size = 4; 160 161 /* 162 * The pref's used here are using "streaming" hints, which cause the 163 * copied data to be kicked out of the cache sooner. A page copy often 164 * ends up copying a lot more data than is commonly used, so this seems 165 * to make sense in terms of reducing cache pollution, but I've no real 166 * performance data to back this up. 167 */ 168 if (cpu_has_prefetch) { 169 /* 170 * XXX: Most prefetch bias values in here are based on 171 * guesswork. 172 */ 173 cache_line_size = cpu_dcache_line_size(); 174 switch (current_cpu_type()) { 175 case CPU_TX49XX: 176 /* TX49 supports only Pref_Load */ 177 pref_bias_copy_load = 256; 178 break; 179 180 case CPU_RM9000: 181 /* 182 * As a workaround for erratum G105 which make the 183 * PrepareForStore hint unusable we fall back to 184 * StoreRetained on the RM9000. Once it is known which 185 * versions of the RM9000 we'll be able to condition- 186 * alize this. 187 */ 188 189 case CPU_R10000: 190 case CPU_R12000: 191 case CPU_R14000: 192 /* 193 * Those values have been experimentally tuned for an 194 * Origin 200. 195 */ 196 pref_bias_clear_store = 512; 197 pref_bias_copy_load = 256; 198 pref_bias_copy_store = 256; 199 pref_src_mode = Pref_LoadStreamed; 200 pref_dst_mode = Pref_StoreStreamed; 201 break; 202 203 case CPU_SB1: 204 case CPU_SB1A: 205 pref_bias_clear_store = 128; 206 pref_bias_copy_load = 128; 207 pref_bias_copy_store = 128; 208 /* 209 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 210 * hints are broken. 211 */ 212 if (current_cpu_type() == CPU_SB1 && 213 (current_cpu_data.processor_id & 0xff) < 0x02) { 214 pref_src_mode = Pref_Load; 215 pref_dst_mode = Pref_Store; 216 } else { 217 pref_src_mode = Pref_LoadStreamed; 218 pref_dst_mode = Pref_StoreStreamed; 219 } 220 break; 221 222 default: 223 pref_bias_clear_store = 128; 224 pref_bias_copy_load = 256; 225 pref_bias_copy_store = 128; 226 pref_src_mode = Pref_LoadStreamed; 227 pref_dst_mode = Pref_PrepareForStore; 228 break; 229 } 230 } else { 231 if (cpu_has_cache_cdex_s) 232 cache_line_size = cpu_scache_line_size(); 233 else if (cpu_has_cache_cdex_p) 234 cache_line_size = cpu_dcache_line_size(); 235 } 236 /* 237 * Too much unrolling will overflow the available space in 238 * clear_space_array / copy_page_array. 239 */ 240 half_clear_loop_size = min(16 * clear_word_size, 241 max(cache_line_size >> 1, 242 4 * clear_word_size)); 243 half_copy_loop_size = min(16 * copy_word_size, 244 max(cache_line_size >> 1, 245 4 * copy_word_size)); 246 } 247 248 static void __cpuinit build_clear_store(u32 **buf, int off) 249 { 250 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 251 uasm_i_sd(buf, ZERO, off, A0); 252 } else { 253 uasm_i_sw(buf, ZERO, off, A0); 254 } 255 } 256 257 static inline void __cpuinit build_clear_pref(u32 **buf, int off) 258 { 259 if (off & cache_line_mask()) 260 return; 261 262 if (pref_bias_clear_store) { 263 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 264 A0); 265 } else if (cache_line_size == (half_clear_loop_size << 1)) { 266 if (cpu_has_cache_cdex_s) { 267 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 268 } else if (cpu_has_cache_cdex_p) { 269 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 270 uasm_i_nop(buf); 271 uasm_i_nop(buf); 272 uasm_i_nop(buf); 273 uasm_i_nop(buf); 274 } 275 276 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 277 uasm_i_lw(buf, ZERO, ZERO, AT); 278 279 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 280 } 281 } 282 } 283 284 void __cpuinit build_clear_page(void) 285 { 286 int off; 287 u32 *buf = (u32 *)&clear_page_array; 288 struct uasm_label *l = labels; 289 struct uasm_reloc *r = relocs; 290 int i; 291 292 memset(labels, 0, sizeof(labels)); 293 memset(relocs, 0, sizeof(relocs)); 294 295 set_prefetch_parameters(); 296 297 /* 298 * This algorithm makes the following assumptions: 299 * - The prefetch bias is a multiple of 2 words. 300 * - The prefetch bias is less than one page. 301 */ 302 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 303 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 304 305 off = PAGE_SIZE - pref_bias_clear_store; 306 if (off > 0xffff || !pref_bias_clear_store) 307 pg_addiu(&buf, A2, A0, off); 308 else 309 uasm_i_ori(&buf, A2, A0, off); 310 311 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 312 uasm_i_lui(&buf, AT, 0xa000); 313 314 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 315 * cache_line_size : 0; 316 while (off) { 317 build_clear_pref(&buf, -off); 318 off -= cache_line_size; 319 } 320 uasm_l_clear_pref(&l, buf); 321 do { 322 build_clear_pref(&buf, off); 323 build_clear_store(&buf, off); 324 off += clear_word_size; 325 } while (off < half_clear_loop_size); 326 pg_addiu(&buf, A0, A0, 2 * off); 327 off = -off; 328 do { 329 build_clear_pref(&buf, off); 330 if (off == -clear_word_size) 331 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 332 build_clear_store(&buf, off); 333 off += clear_word_size; 334 } while (off < 0); 335 336 if (pref_bias_clear_store) { 337 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 338 uasm_l_clear_nopref(&l, buf); 339 off = 0; 340 do { 341 build_clear_store(&buf, off); 342 off += clear_word_size; 343 } while (off < half_clear_loop_size); 344 pg_addiu(&buf, A0, A0, 2 * off); 345 off = -off; 346 do { 347 if (off == -clear_word_size) 348 uasm_il_bne(&buf, &r, A0, A2, 349 label_clear_nopref); 350 build_clear_store(&buf, off); 351 off += clear_word_size; 352 } while (off < 0); 353 } 354 355 uasm_i_jr(&buf, RA); 356 uasm_i_nop(&buf); 357 358 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); 359 360 uasm_resolve_relocs(relocs, labels); 361 362 pr_debug("Synthesized clear page handler (%u instructions).\n", 363 (u32)(buf - clear_page_array)); 364 365 pr_debug("\t.set push\n"); 366 pr_debug("\t.set noreorder\n"); 367 for (i = 0; i < (buf - clear_page_array); i++) 368 pr_debug("\t.word 0x%08x\n", clear_page_array[i]); 369 pr_debug("\t.set pop\n"); 370 } 371 372 static void __cpuinit build_copy_load(u32 **buf, int reg, int off) 373 { 374 if (cpu_has_64bit_gp_regs) { 375 uasm_i_ld(buf, reg, off, A1); 376 } else { 377 uasm_i_lw(buf, reg, off, A1); 378 } 379 } 380 381 static void __cpuinit build_copy_store(u32 **buf, int reg, int off) 382 { 383 if (cpu_has_64bit_gp_regs) { 384 uasm_i_sd(buf, reg, off, A0); 385 } else { 386 uasm_i_sw(buf, reg, off, A0); 387 } 388 } 389 390 static inline void build_copy_load_pref(u32 **buf, int off) 391 { 392 if (off & cache_line_mask()) 393 return; 394 395 if (pref_bias_copy_load) 396 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 397 } 398 399 static inline void build_copy_store_pref(u32 **buf, int off) 400 { 401 if (off & cache_line_mask()) 402 return; 403 404 if (pref_bias_copy_store) { 405 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 406 A0); 407 } else if (cache_line_size == (half_copy_loop_size << 1)) { 408 if (cpu_has_cache_cdex_s) { 409 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 410 } else if (cpu_has_cache_cdex_p) { 411 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 412 uasm_i_nop(buf); 413 uasm_i_nop(buf); 414 uasm_i_nop(buf); 415 uasm_i_nop(buf); 416 } 417 418 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 419 uasm_i_lw(buf, ZERO, ZERO, AT); 420 421 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 422 } 423 } 424 } 425 426 void __cpuinit build_copy_page(void) 427 { 428 int off; 429 u32 *buf = (u32 *)©_page_array; 430 struct uasm_label *l = labels; 431 struct uasm_reloc *r = relocs; 432 int i; 433 434 memset(labels, 0, sizeof(labels)); 435 memset(relocs, 0, sizeof(relocs)); 436 437 set_prefetch_parameters(); 438 439 /* 440 * This algorithm makes the following assumptions: 441 * - All prefetch biases are multiples of 8 words. 442 * - The prefetch biases are less than one page. 443 * - The store prefetch bias isn't greater than the load 444 * prefetch bias. 445 */ 446 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 447 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 448 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 449 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 450 451 off = PAGE_SIZE - pref_bias_copy_load; 452 if (off > 0xffff || !pref_bias_copy_load) 453 pg_addiu(&buf, A2, A0, off); 454 else 455 uasm_i_ori(&buf, A2, A0, off); 456 457 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 458 uasm_i_lui(&buf, AT, 0xa000); 459 460 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 461 cache_line_size : 0; 462 while (off) { 463 build_copy_load_pref(&buf, -off); 464 off -= cache_line_size; 465 } 466 off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * 467 cache_line_size : 0; 468 while (off) { 469 build_copy_store_pref(&buf, -off); 470 off -= cache_line_size; 471 } 472 uasm_l_copy_pref_both(&l, buf); 473 do { 474 build_copy_load_pref(&buf, off); 475 build_copy_load(&buf, T0, off); 476 build_copy_load_pref(&buf, off + copy_word_size); 477 build_copy_load(&buf, T1, off + copy_word_size); 478 build_copy_load_pref(&buf, off + 2 * copy_word_size); 479 build_copy_load(&buf, T2, off + 2 * copy_word_size); 480 build_copy_load_pref(&buf, off + 3 * copy_word_size); 481 build_copy_load(&buf, T3, off + 3 * copy_word_size); 482 build_copy_store_pref(&buf, off); 483 build_copy_store(&buf, T0, off); 484 build_copy_store_pref(&buf, off + copy_word_size); 485 build_copy_store(&buf, T1, off + copy_word_size); 486 build_copy_store_pref(&buf, off + 2 * copy_word_size); 487 build_copy_store(&buf, T2, off + 2 * copy_word_size); 488 build_copy_store_pref(&buf, off + 3 * copy_word_size); 489 build_copy_store(&buf, T3, off + 3 * copy_word_size); 490 off += 4 * copy_word_size; 491 } while (off < half_copy_loop_size); 492 pg_addiu(&buf, A1, A1, 2 * off); 493 pg_addiu(&buf, A0, A0, 2 * off); 494 off = -off; 495 do { 496 build_copy_load_pref(&buf, off); 497 build_copy_load(&buf, T0, off); 498 build_copy_load_pref(&buf, off + copy_word_size); 499 build_copy_load(&buf, T1, off + copy_word_size); 500 build_copy_load_pref(&buf, off + 2 * copy_word_size); 501 build_copy_load(&buf, T2, off + 2 * copy_word_size); 502 build_copy_load_pref(&buf, off + 3 * copy_word_size); 503 build_copy_load(&buf, T3, off + 3 * copy_word_size); 504 build_copy_store_pref(&buf, off); 505 build_copy_store(&buf, T0, off); 506 build_copy_store_pref(&buf, off + copy_word_size); 507 build_copy_store(&buf, T1, off + copy_word_size); 508 build_copy_store_pref(&buf, off + 2 * copy_word_size); 509 build_copy_store(&buf, T2, off + 2 * copy_word_size); 510 build_copy_store_pref(&buf, off + 3 * copy_word_size); 511 if (off == -(4 * copy_word_size)) 512 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 513 build_copy_store(&buf, T3, off + 3 * copy_word_size); 514 off += 4 * copy_word_size; 515 } while (off < 0); 516 517 if (pref_bias_copy_load - pref_bias_copy_store) { 518 pg_addiu(&buf, A2, A0, 519 pref_bias_copy_load - pref_bias_copy_store); 520 uasm_l_copy_pref_store(&l, buf); 521 off = 0; 522 do { 523 build_copy_load(&buf, T0, off); 524 build_copy_load(&buf, T1, off + copy_word_size); 525 build_copy_load(&buf, T2, off + 2 * copy_word_size); 526 build_copy_load(&buf, T3, off + 3 * copy_word_size); 527 build_copy_store_pref(&buf, off); 528 build_copy_store(&buf, T0, off); 529 build_copy_store_pref(&buf, off + copy_word_size); 530 build_copy_store(&buf, T1, off + copy_word_size); 531 build_copy_store_pref(&buf, off + 2 * copy_word_size); 532 build_copy_store(&buf, T2, off + 2 * copy_word_size); 533 build_copy_store_pref(&buf, off + 3 * copy_word_size); 534 build_copy_store(&buf, T3, off + 3 * copy_word_size); 535 off += 4 * copy_word_size; 536 } while (off < half_copy_loop_size); 537 pg_addiu(&buf, A1, A1, 2 * off); 538 pg_addiu(&buf, A0, A0, 2 * off); 539 off = -off; 540 do { 541 build_copy_load(&buf, T0, off); 542 build_copy_load(&buf, T1, off + copy_word_size); 543 build_copy_load(&buf, T2, off + 2 * copy_word_size); 544 build_copy_load(&buf, T3, off + 3 * copy_word_size); 545 build_copy_store_pref(&buf, off); 546 build_copy_store(&buf, T0, off); 547 build_copy_store_pref(&buf, off + copy_word_size); 548 build_copy_store(&buf, T1, off + copy_word_size); 549 build_copy_store_pref(&buf, off + 2 * copy_word_size); 550 build_copy_store(&buf, T2, off + 2 * copy_word_size); 551 build_copy_store_pref(&buf, off + 3 * copy_word_size); 552 if (off == -(4 * copy_word_size)) 553 uasm_il_bne(&buf, &r, A2, A0, 554 label_copy_pref_store); 555 build_copy_store(&buf, T3, off + 3 * copy_word_size); 556 off += 4 * copy_word_size; 557 } while (off < 0); 558 } 559 560 if (pref_bias_copy_store) { 561 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 562 uasm_l_copy_nopref(&l, buf); 563 off = 0; 564 do { 565 build_copy_load(&buf, T0, off); 566 build_copy_load(&buf, T1, off + copy_word_size); 567 build_copy_load(&buf, T2, off + 2 * copy_word_size); 568 build_copy_load(&buf, T3, off + 3 * copy_word_size); 569 build_copy_store(&buf, T0, off); 570 build_copy_store(&buf, T1, off + copy_word_size); 571 build_copy_store(&buf, T2, off + 2 * copy_word_size); 572 build_copy_store(&buf, T3, off + 3 * copy_word_size); 573 off += 4 * copy_word_size; 574 } while (off < half_copy_loop_size); 575 pg_addiu(&buf, A1, A1, 2 * off); 576 pg_addiu(&buf, A0, A0, 2 * off); 577 off = -off; 578 do { 579 build_copy_load(&buf, T0, off); 580 build_copy_load(&buf, T1, off + copy_word_size); 581 build_copy_load(&buf, T2, off + 2 * copy_word_size); 582 build_copy_load(&buf, T3, off + 3 * copy_word_size); 583 build_copy_store(&buf, T0, off); 584 build_copy_store(&buf, T1, off + copy_word_size); 585 build_copy_store(&buf, T2, off + 2 * copy_word_size); 586 if (off == -(4 * copy_word_size)) 587 uasm_il_bne(&buf, &r, A2, A0, 588 label_copy_nopref); 589 build_copy_store(&buf, T3, off + 3 * copy_word_size); 590 off += 4 * copy_word_size; 591 } while (off < 0); 592 } 593 594 uasm_i_jr(&buf, RA); 595 uasm_i_nop(&buf); 596 597 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); 598 599 uasm_resolve_relocs(relocs, labels); 600 601 pr_debug("Synthesized copy page handler (%u instructions).\n", 602 (u32)(buf - copy_page_array)); 603 604 pr_debug("\t.set push\n"); 605 pr_debug("\t.set noreorder\n"); 606 for (i = 0; i < (buf - copy_page_array); i++) 607 pr_debug("\t.word 0x%08x\n", copy_page_array[i]); 608 pr_debug("\t.set pop\n"); 609 } 610 611 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 612 613 /* 614 * Pad descriptors to cacheline, since each is exclusively owned by a 615 * particular CPU. 616 */ 617 struct dmadscr { 618 u64 dscr_a; 619 u64 dscr_b; 620 u64 pad_a; 621 u64 pad_b; 622 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 623 624 void sb1_dma_init(void) 625 { 626 int i; 627 628 for (i = 0; i < DM_NUM_CHANNELS; i++) { 629 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 630 V_DM_DSCR_BASE_RINGSZ(1); 631 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 632 633 __raw_writeq(base_val, base_reg); 634 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 635 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 636 } 637 } 638 639 void clear_page(void *page) 640 { 641 u64 to_phys = CPHYSADDR((unsigned long)page); 642 unsigned int cpu = smp_processor_id(); 643 644 /* if the page is not in KSEG0, use old way */ 645 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 646 return clear_page_cpu(page); 647 648 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 649 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 650 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 651 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 652 653 /* 654 * Don't really want to do it this way, but there's no 655 * reliable way to delay completion detection. 656 */ 657 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 658 & M_DM_DSCR_BASE_INTERRUPT)) 659 ; 660 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 661 } 662 663 void copy_page(void *to, void *from) 664 { 665 u64 from_phys = CPHYSADDR((unsigned long)from); 666 u64 to_phys = CPHYSADDR((unsigned long)to); 667 unsigned int cpu = smp_processor_id(); 668 669 /* if any page is not in KSEG0, use old way */ 670 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 671 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 672 return copy_page_cpu(to, from); 673 674 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 675 M_DM_DSCRA_INTERRUPT; 676 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 677 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 678 679 /* 680 * Don't really want to do it this way, but there's no 681 * reliable way to delay completion detection. 682 */ 683 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 684 & M_DM_DSCR_BASE_INTERRUPT)) 685 ; 686 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 687 } 688 689 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 690