1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 * Copyright (C) 2012 MIPS Technologies, Inc. 10 */ 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/smp.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 18 #include <asm/bugs.h> 19 #include <asm/cacheops.h> 20 #include <asm/cpu-type.h> 21 #include <asm/inst.h> 22 #include <asm/io.h> 23 #include <asm/page.h> 24 #include <asm/pgtable.h> 25 #include <asm/prefetch.h> 26 #include <asm/bootinfo.h> 27 #include <asm/mipsregs.h> 28 #include <asm/mmu_context.h> 29 #include <asm/cpu.h> 30 #include <asm/war.h> 31 32 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 33 #include <asm/sibyte/sb1250.h> 34 #include <asm/sibyte/sb1250_regs.h> 35 #include <asm/sibyte/sb1250_dma.h> 36 #endif 37 38 #include <asm/uasm.h> 39 40 /* Registers used in the assembled routines. */ 41 #define ZERO 0 42 #define AT 2 43 #define A0 4 44 #define A1 5 45 #define A2 6 46 #define T0 8 47 #define T1 9 48 #define T2 10 49 #define T3 11 50 #define T9 25 51 #define RA 31 52 53 /* Handle labels (which must be positive integers). */ 54 enum label_id { 55 label_clear_nopref = 1, 56 label_clear_pref, 57 label_copy_nopref, 58 label_copy_pref_both, 59 label_copy_pref_store, 60 }; 61 62 UASM_L_LA(_clear_nopref) 63 UASM_L_LA(_clear_pref) 64 UASM_L_LA(_copy_nopref) 65 UASM_L_LA(_copy_pref_both) 66 UASM_L_LA(_copy_pref_store) 67 68 /* We need one branch and therefore one relocation per target label. */ 69 static struct uasm_label labels[5]; 70 static struct uasm_reloc relocs[5]; 71 72 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 73 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 74 75 /* 76 * R6 has a limited offset of the pref instruction. 77 * Skip it if the offset is more than 9 bits. 78 */ 79 #define _uasm_i_pref(a, b, c, d) \ 80 do { \ 81 if (cpu_has_mips_r6) { \ 82 if (c <= 0xff && c >= -0x100) \ 83 uasm_i_pref(a, b, c, d);\ 84 } else { \ 85 uasm_i_pref(a, b, c, d); \ 86 } \ 87 } while(0) 88 89 static int pref_bias_clear_store; 90 static int pref_bias_copy_load; 91 static int pref_bias_copy_store; 92 93 static u32 pref_src_mode; 94 static u32 pref_dst_mode; 95 96 static int clear_word_size; 97 static int copy_word_size; 98 99 static int half_clear_loop_size; 100 static int half_copy_loop_size; 101 102 static int cache_line_size; 103 #define cache_line_mask() (cache_line_size - 1) 104 105 static inline void 106 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 107 { 108 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 109 if (off > 0x7fff) { 110 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 111 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 112 } else 113 uasm_i_addiu(buf, T9, ZERO, off); 114 uasm_i_daddu(buf, reg1, reg2, T9); 115 } else { 116 if (off > 0x7fff) { 117 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 118 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 119 UASM_i_ADDU(buf, reg1, reg2, T9); 120 } else 121 UASM_i_ADDIU(buf, reg1, reg2, off); 122 } 123 } 124 125 static void set_prefetch_parameters(void) 126 { 127 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 128 clear_word_size = 8; 129 else 130 clear_word_size = 4; 131 132 if (cpu_has_64bit_gp_regs) 133 copy_word_size = 8; 134 else 135 copy_word_size = 4; 136 137 /* 138 * The pref's used here are using "streaming" hints, which cause the 139 * copied data to be kicked out of the cache sooner. A page copy often 140 * ends up copying a lot more data than is commonly used, so this seems 141 * to make sense in terms of reducing cache pollution, but I've no real 142 * performance data to back this up. 143 */ 144 if (cpu_has_prefetch) { 145 /* 146 * XXX: Most prefetch bias values in here are based on 147 * guesswork. 148 */ 149 cache_line_size = cpu_dcache_line_size(); 150 switch (current_cpu_type()) { 151 case CPU_R5500: 152 case CPU_TX49XX: 153 /* These processors only support the Pref_Load. */ 154 pref_bias_copy_load = 256; 155 break; 156 157 case CPU_R10000: 158 case CPU_R12000: 159 case CPU_R14000: 160 /* 161 * Those values have been experimentally tuned for an 162 * Origin 200. 163 */ 164 pref_bias_clear_store = 512; 165 pref_bias_copy_load = 256; 166 pref_bias_copy_store = 256; 167 pref_src_mode = Pref_LoadStreamed; 168 pref_dst_mode = Pref_StoreStreamed; 169 break; 170 171 case CPU_SB1: 172 case CPU_SB1A: 173 pref_bias_clear_store = 128; 174 pref_bias_copy_load = 128; 175 pref_bias_copy_store = 128; 176 /* 177 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 178 * hints are broken. 179 */ 180 if (current_cpu_type() == CPU_SB1 && 181 (current_cpu_data.processor_id & 0xff) < 0x02) { 182 pref_src_mode = Pref_Load; 183 pref_dst_mode = Pref_Store; 184 } else { 185 pref_src_mode = Pref_LoadStreamed; 186 pref_dst_mode = Pref_StoreStreamed; 187 } 188 break; 189 190 default: 191 pref_bias_clear_store = 128; 192 pref_bias_copy_load = 256; 193 pref_bias_copy_store = 128; 194 pref_src_mode = Pref_LoadStreamed; 195 if (cpu_has_mips_r6) 196 /* 197 * Bit 30 (Pref_PrepareForStore) has been 198 * removed from MIPS R6. Use bit 5 199 * (Pref_StoreStreamed). 200 */ 201 pref_dst_mode = Pref_StoreStreamed; 202 else 203 pref_dst_mode = Pref_PrepareForStore; 204 break; 205 } 206 } else { 207 if (cpu_has_cache_cdex_s) 208 cache_line_size = cpu_scache_line_size(); 209 else if (cpu_has_cache_cdex_p) 210 cache_line_size = cpu_dcache_line_size(); 211 } 212 /* 213 * Too much unrolling will overflow the available space in 214 * clear_space_array / copy_page_array. 215 */ 216 half_clear_loop_size = min(16 * clear_word_size, 217 max(cache_line_size >> 1, 218 4 * clear_word_size)); 219 half_copy_loop_size = min(16 * copy_word_size, 220 max(cache_line_size >> 1, 221 4 * copy_word_size)); 222 } 223 224 static void build_clear_store(u32 **buf, int off) 225 { 226 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 227 uasm_i_sd(buf, ZERO, off, A0); 228 } else { 229 uasm_i_sw(buf, ZERO, off, A0); 230 } 231 } 232 233 static inline void build_clear_pref(u32 **buf, int off) 234 { 235 if (off & cache_line_mask()) 236 return; 237 238 if (pref_bias_clear_store) { 239 _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 240 A0); 241 } else if (cache_line_size == (half_clear_loop_size << 1)) { 242 if (cpu_has_cache_cdex_s) { 243 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 244 } else if (cpu_has_cache_cdex_p) { 245 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 246 uasm_i_nop(buf); 247 uasm_i_nop(buf); 248 uasm_i_nop(buf); 249 uasm_i_nop(buf); 250 } 251 252 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 253 uasm_i_lw(buf, ZERO, ZERO, AT); 254 255 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 256 } 257 } 258 } 259 260 extern u32 __clear_page_start; 261 extern u32 __clear_page_end; 262 extern u32 __copy_page_start; 263 extern u32 __copy_page_end; 264 265 void build_clear_page(void) 266 { 267 int off; 268 u32 *buf = &__clear_page_start; 269 struct uasm_label *l = labels; 270 struct uasm_reloc *r = relocs; 271 int i; 272 static atomic_t run_once = ATOMIC_INIT(0); 273 274 if (atomic_xchg(&run_once, 1)) { 275 return; 276 } 277 278 memset(labels, 0, sizeof(labels)); 279 memset(relocs, 0, sizeof(relocs)); 280 281 set_prefetch_parameters(); 282 283 /* 284 * This algorithm makes the following assumptions: 285 * - The prefetch bias is a multiple of 2 words. 286 * - The prefetch bias is less than one page. 287 */ 288 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 289 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 290 291 off = PAGE_SIZE - pref_bias_clear_store; 292 if (off > 0xffff || !pref_bias_clear_store) 293 pg_addiu(&buf, A2, A0, off); 294 else 295 uasm_i_ori(&buf, A2, A0, off); 296 297 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 298 uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); 299 300 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 301 * cache_line_size : 0; 302 while (off) { 303 build_clear_pref(&buf, -off); 304 off -= cache_line_size; 305 } 306 uasm_l_clear_pref(&l, buf); 307 do { 308 build_clear_pref(&buf, off); 309 build_clear_store(&buf, off); 310 off += clear_word_size; 311 } while (off < half_clear_loop_size); 312 pg_addiu(&buf, A0, A0, 2 * off); 313 off = -off; 314 do { 315 build_clear_pref(&buf, off); 316 if (off == -clear_word_size) 317 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 318 build_clear_store(&buf, off); 319 off += clear_word_size; 320 } while (off < 0); 321 322 if (pref_bias_clear_store) { 323 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 324 uasm_l_clear_nopref(&l, buf); 325 off = 0; 326 do { 327 build_clear_store(&buf, off); 328 off += clear_word_size; 329 } while (off < half_clear_loop_size); 330 pg_addiu(&buf, A0, A0, 2 * off); 331 off = -off; 332 do { 333 if (off == -clear_word_size) 334 uasm_il_bne(&buf, &r, A0, A2, 335 label_clear_nopref); 336 build_clear_store(&buf, off); 337 off += clear_word_size; 338 } while (off < 0); 339 } 340 341 uasm_i_jr(&buf, RA); 342 uasm_i_nop(&buf); 343 344 BUG_ON(buf > &__clear_page_end); 345 346 uasm_resolve_relocs(relocs, labels); 347 348 pr_debug("Synthesized clear page handler (%u instructions).\n", 349 (u32)(buf - &__clear_page_start)); 350 351 pr_debug("\t.set push\n"); 352 pr_debug("\t.set noreorder\n"); 353 for (i = 0; i < (buf - &__clear_page_start); i++) 354 pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]); 355 pr_debug("\t.set pop\n"); 356 } 357 358 static void build_copy_load(u32 **buf, int reg, int off) 359 { 360 if (cpu_has_64bit_gp_regs) { 361 uasm_i_ld(buf, reg, off, A1); 362 } else { 363 uasm_i_lw(buf, reg, off, A1); 364 } 365 } 366 367 static void build_copy_store(u32 **buf, int reg, int off) 368 { 369 if (cpu_has_64bit_gp_regs) { 370 uasm_i_sd(buf, reg, off, A0); 371 } else { 372 uasm_i_sw(buf, reg, off, A0); 373 } 374 } 375 376 static inline void build_copy_load_pref(u32 **buf, int off) 377 { 378 if (off & cache_line_mask()) 379 return; 380 381 if (pref_bias_copy_load) 382 _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 383 } 384 385 static inline void build_copy_store_pref(u32 **buf, int off) 386 { 387 if (off & cache_line_mask()) 388 return; 389 390 if (pref_bias_copy_store) { 391 _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 392 A0); 393 } else if (cache_line_size == (half_copy_loop_size << 1)) { 394 if (cpu_has_cache_cdex_s) { 395 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 396 } else if (cpu_has_cache_cdex_p) { 397 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 398 uasm_i_nop(buf); 399 uasm_i_nop(buf); 400 uasm_i_nop(buf); 401 uasm_i_nop(buf); 402 } 403 404 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 405 uasm_i_lw(buf, ZERO, ZERO, AT); 406 407 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 408 } 409 } 410 } 411 412 void build_copy_page(void) 413 { 414 int off; 415 u32 *buf = &__copy_page_start; 416 struct uasm_label *l = labels; 417 struct uasm_reloc *r = relocs; 418 int i; 419 static atomic_t run_once = ATOMIC_INIT(0); 420 421 if (atomic_xchg(&run_once, 1)) { 422 return; 423 } 424 425 memset(labels, 0, sizeof(labels)); 426 memset(relocs, 0, sizeof(relocs)); 427 428 set_prefetch_parameters(); 429 430 /* 431 * This algorithm makes the following assumptions: 432 * - All prefetch biases are multiples of 8 words. 433 * - The prefetch biases are less than one page. 434 * - The store prefetch bias isn't greater than the load 435 * prefetch bias. 436 */ 437 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 438 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 439 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 440 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 441 442 off = PAGE_SIZE - pref_bias_copy_load; 443 if (off > 0xffff || !pref_bias_copy_load) 444 pg_addiu(&buf, A2, A0, off); 445 else 446 uasm_i_ori(&buf, A2, A0, off); 447 448 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 449 uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); 450 451 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 452 cache_line_size : 0; 453 while (off) { 454 build_copy_load_pref(&buf, -off); 455 off -= cache_line_size; 456 } 457 off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * 458 cache_line_size : 0; 459 while (off) { 460 build_copy_store_pref(&buf, -off); 461 off -= cache_line_size; 462 } 463 uasm_l_copy_pref_both(&l, buf); 464 do { 465 build_copy_load_pref(&buf, off); 466 build_copy_load(&buf, T0, off); 467 build_copy_load_pref(&buf, off + copy_word_size); 468 build_copy_load(&buf, T1, off + copy_word_size); 469 build_copy_load_pref(&buf, off + 2 * copy_word_size); 470 build_copy_load(&buf, T2, off + 2 * copy_word_size); 471 build_copy_load_pref(&buf, off + 3 * copy_word_size); 472 build_copy_load(&buf, T3, off + 3 * copy_word_size); 473 build_copy_store_pref(&buf, off); 474 build_copy_store(&buf, T0, off); 475 build_copy_store_pref(&buf, off + copy_word_size); 476 build_copy_store(&buf, T1, off + copy_word_size); 477 build_copy_store_pref(&buf, off + 2 * copy_word_size); 478 build_copy_store(&buf, T2, off + 2 * copy_word_size); 479 build_copy_store_pref(&buf, off + 3 * copy_word_size); 480 build_copy_store(&buf, T3, off + 3 * copy_word_size); 481 off += 4 * copy_word_size; 482 } while (off < half_copy_loop_size); 483 pg_addiu(&buf, A1, A1, 2 * off); 484 pg_addiu(&buf, A0, A0, 2 * off); 485 off = -off; 486 do { 487 build_copy_load_pref(&buf, off); 488 build_copy_load(&buf, T0, off); 489 build_copy_load_pref(&buf, off + copy_word_size); 490 build_copy_load(&buf, T1, off + copy_word_size); 491 build_copy_load_pref(&buf, off + 2 * copy_word_size); 492 build_copy_load(&buf, T2, off + 2 * copy_word_size); 493 build_copy_load_pref(&buf, off + 3 * copy_word_size); 494 build_copy_load(&buf, T3, off + 3 * copy_word_size); 495 build_copy_store_pref(&buf, off); 496 build_copy_store(&buf, T0, off); 497 build_copy_store_pref(&buf, off + copy_word_size); 498 build_copy_store(&buf, T1, off + copy_word_size); 499 build_copy_store_pref(&buf, off + 2 * copy_word_size); 500 build_copy_store(&buf, T2, off + 2 * copy_word_size); 501 build_copy_store_pref(&buf, off + 3 * copy_word_size); 502 if (off == -(4 * copy_word_size)) 503 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 504 build_copy_store(&buf, T3, off + 3 * copy_word_size); 505 off += 4 * copy_word_size; 506 } while (off < 0); 507 508 if (pref_bias_copy_load - pref_bias_copy_store) { 509 pg_addiu(&buf, A2, A0, 510 pref_bias_copy_load - pref_bias_copy_store); 511 uasm_l_copy_pref_store(&l, buf); 512 off = 0; 513 do { 514 build_copy_load(&buf, T0, off); 515 build_copy_load(&buf, T1, off + copy_word_size); 516 build_copy_load(&buf, T2, off + 2 * copy_word_size); 517 build_copy_load(&buf, T3, off + 3 * copy_word_size); 518 build_copy_store_pref(&buf, off); 519 build_copy_store(&buf, T0, off); 520 build_copy_store_pref(&buf, off + copy_word_size); 521 build_copy_store(&buf, T1, off + copy_word_size); 522 build_copy_store_pref(&buf, off + 2 * copy_word_size); 523 build_copy_store(&buf, T2, off + 2 * copy_word_size); 524 build_copy_store_pref(&buf, off + 3 * copy_word_size); 525 build_copy_store(&buf, T3, off + 3 * copy_word_size); 526 off += 4 * copy_word_size; 527 } while (off < half_copy_loop_size); 528 pg_addiu(&buf, A1, A1, 2 * off); 529 pg_addiu(&buf, A0, A0, 2 * off); 530 off = -off; 531 do { 532 build_copy_load(&buf, T0, off); 533 build_copy_load(&buf, T1, off + copy_word_size); 534 build_copy_load(&buf, T2, off + 2 * copy_word_size); 535 build_copy_load(&buf, T3, off + 3 * copy_word_size); 536 build_copy_store_pref(&buf, off); 537 build_copy_store(&buf, T0, off); 538 build_copy_store_pref(&buf, off + copy_word_size); 539 build_copy_store(&buf, T1, off + copy_word_size); 540 build_copy_store_pref(&buf, off + 2 * copy_word_size); 541 build_copy_store(&buf, T2, off + 2 * copy_word_size); 542 build_copy_store_pref(&buf, off + 3 * copy_word_size); 543 if (off == -(4 * copy_word_size)) 544 uasm_il_bne(&buf, &r, A2, A0, 545 label_copy_pref_store); 546 build_copy_store(&buf, T3, off + 3 * copy_word_size); 547 off += 4 * copy_word_size; 548 } while (off < 0); 549 } 550 551 if (pref_bias_copy_store) { 552 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 553 uasm_l_copy_nopref(&l, buf); 554 off = 0; 555 do { 556 build_copy_load(&buf, T0, off); 557 build_copy_load(&buf, T1, off + copy_word_size); 558 build_copy_load(&buf, T2, off + 2 * copy_word_size); 559 build_copy_load(&buf, T3, off + 3 * copy_word_size); 560 build_copy_store(&buf, T0, off); 561 build_copy_store(&buf, T1, off + copy_word_size); 562 build_copy_store(&buf, T2, off + 2 * copy_word_size); 563 build_copy_store(&buf, T3, off + 3 * copy_word_size); 564 off += 4 * copy_word_size; 565 } while (off < half_copy_loop_size); 566 pg_addiu(&buf, A1, A1, 2 * off); 567 pg_addiu(&buf, A0, A0, 2 * off); 568 off = -off; 569 do { 570 build_copy_load(&buf, T0, off); 571 build_copy_load(&buf, T1, off + copy_word_size); 572 build_copy_load(&buf, T2, off + 2 * copy_word_size); 573 build_copy_load(&buf, T3, off + 3 * copy_word_size); 574 build_copy_store(&buf, T0, off); 575 build_copy_store(&buf, T1, off + copy_word_size); 576 build_copy_store(&buf, T2, off + 2 * copy_word_size); 577 if (off == -(4 * copy_word_size)) 578 uasm_il_bne(&buf, &r, A2, A0, 579 label_copy_nopref); 580 build_copy_store(&buf, T3, off + 3 * copy_word_size); 581 off += 4 * copy_word_size; 582 } while (off < 0); 583 } 584 585 uasm_i_jr(&buf, RA); 586 uasm_i_nop(&buf); 587 588 BUG_ON(buf > &__copy_page_end); 589 590 uasm_resolve_relocs(relocs, labels); 591 592 pr_debug("Synthesized copy page handler (%u instructions).\n", 593 (u32)(buf - &__copy_page_start)); 594 595 pr_debug("\t.set push\n"); 596 pr_debug("\t.set noreorder\n"); 597 for (i = 0; i < (buf - &__copy_page_start); i++) 598 pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]); 599 pr_debug("\t.set pop\n"); 600 } 601 602 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 603 extern void clear_page_cpu(void *page); 604 extern void copy_page_cpu(void *to, void *from); 605 606 /* 607 * Pad descriptors to cacheline, since each is exclusively owned by a 608 * particular CPU. 609 */ 610 struct dmadscr { 611 u64 dscr_a; 612 u64 dscr_b; 613 u64 pad_a; 614 u64 pad_b; 615 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 616 617 void sb1_dma_init(void) 618 { 619 int i; 620 621 for (i = 0; i < DM_NUM_CHANNELS; i++) { 622 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 623 V_DM_DSCR_BASE_RINGSZ(1); 624 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 625 626 __raw_writeq(base_val, base_reg); 627 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 628 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 629 } 630 } 631 632 void clear_page(void *page) 633 { 634 u64 to_phys = CPHYSADDR((unsigned long)page); 635 unsigned int cpu = smp_processor_id(); 636 637 /* if the page is not in KSEG0, use old way */ 638 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 639 return clear_page_cpu(page); 640 641 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 642 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 643 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 644 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 645 646 /* 647 * Don't really want to do it this way, but there's no 648 * reliable way to delay completion detection. 649 */ 650 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 651 & M_DM_DSCR_BASE_INTERRUPT)) 652 ; 653 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 654 } 655 656 void copy_page(void *to, void *from) 657 { 658 u64 from_phys = CPHYSADDR((unsigned long)from); 659 u64 to_phys = CPHYSADDR((unsigned long)to); 660 unsigned int cpu = smp_processor_id(); 661 662 /* if any page is not in KSEG0, use old way */ 663 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 664 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 665 return copy_page_cpu(to, from); 666 667 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 668 M_DM_DSCRA_INTERRUPT; 669 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 670 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 671 672 /* 673 * Don't really want to do it this way, but there's no 674 * reliable way to delay completion detection. 675 */ 676 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 677 & M_DM_DSCR_BASE_INTERRUPT)) 678 ; 679 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 680 } 681 682 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 683