1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 */ 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/smp.h> 14 #include <linux/mm.h> 15 #include <linux/module.h> 16 #include <linux/proc_fs.h> 17 18 #include <asm/bugs.h> 19 #include <asm/cacheops.h> 20 #include <asm/inst.h> 21 #include <asm/io.h> 22 #include <asm/page.h> 23 #include <asm/pgtable.h> 24 #include <asm/prefetch.h> 25 #include <asm/system.h> 26 #include <asm/bootinfo.h> 27 #include <asm/mipsregs.h> 28 #include <asm/mmu_context.h> 29 #include <asm/cpu.h> 30 #include <asm/war.h> 31 32 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 33 #include <asm/sibyte/sb1250.h> 34 #include <asm/sibyte/sb1250_regs.h> 35 #include <asm/sibyte/sb1250_dma.h> 36 #endif 37 38 #include "uasm.h" 39 40 /* Registers used in the assembled routines. */ 41 #define ZERO 0 42 #define AT 2 43 #define A0 4 44 #define A1 5 45 #define A2 6 46 #define T0 8 47 #define T1 9 48 #define T2 10 49 #define T3 11 50 #define T9 25 51 #define RA 31 52 53 /* Handle labels (which must be positive integers). */ 54 enum label_id { 55 label_clear_nopref = 1, 56 label_clear_pref, 57 label_copy_nopref, 58 label_copy_pref_both, 59 label_copy_pref_store, 60 }; 61 62 UASM_L_LA(_clear_nopref) 63 UASM_L_LA(_clear_pref) 64 UASM_L_LA(_copy_nopref) 65 UASM_L_LA(_copy_pref_both) 66 UASM_L_LA(_copy_pref_store) 67 68 /* We need one branch and therefore one relocation per target label. */ 69 static struct uasm_label __cpuinitdata labels[5]; 70 static struct uasm_reloc __cpuinitdata relocs[5]; 71 72 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 73 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 74 75 /* 76 * Maximum sizes: 77 * 78 * R4000 128 bytes S-cache: 0x058 bytes 79 * R4600 v1.7: 0x05c bytes 80 * R4600 v2.0: 0x060 bytes 81 * With prefetching, 16 word strides 0x120 bytes 82 */ 83 84 static u32 clear_page_array[0x120 / 4]; 85 86 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 87 void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); 88 #else 89 void clear_page(void *page) __attribute__((alias("clear_page_array"))); 90 #endif 91 92 EXPORT_SYMBOL(clear_page); 93 94 /* 95 * Maximum sizes: 96 * 97 * R4000 128 bytes S-cache: 0x11c bytes 98 * R4600 v1.7: 0x080 bytes 99 * R4600 v2.0: 0x07c bytes 100 * With prefetching, 16 word strides 0x540 bytes 101 */ 102 static u32 copy_page_array[0x540 / 4]; 103 104 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 105 void 106 copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); 107 #else 108 void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); 109 #endif 110 111 EXPORT_SYMBOL(copy_page); 112 113 114 static int pref_bias_clear_store __cpuinitdata; 115 static int pref_bias_copy_load __cpuinitdata; 116 static int pref_bias_copy_store __cpuinitdata; 117 118 static u32 pref_src_mode __cpuinitdata; 119 static u32 pref_dst_mode __cpuinitdata; 120 121 static int clear_word_size __cpuinitdata; 122 static int copy_word_size __cpuinitdata; 123 124 static int half_clear_loop_size __cpuinitdata; 125 static int half_copy_loop_size __cpuinitdata; 126 127 static int cache_line_size __cpuinitdata; 128 #define cache_line_mask() (cache_line_size - 1) 129 130 static inline void __cpuinit 131 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 132 { 133 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 134 if (off > 0x7fff) { 135 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 136 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 137 } else 138 uasm_i_addiu(buf, T9, ZERO, off); 139 uasm_i_daddu(buf, reg1, reg2, T9); 140 } else { 141 if (off > 0x7fff) { 142 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 143 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 144 UASM_i_ADDU(buf, reg1, reg2, T9); 145 } else 146 UASM_i_ADDIU(buf, reg1, reg2, off); 147 } 148 } 149 150 static void __cpuinit set_prefetch_parameters(void) 151 { 152 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 153 clear_word_size = 8; 154 else 155 clear_word_size = 4; 156 157 if (cpu_has_64bit_gp_regs) 158 copy_word_size = 8; 159 else 160 copy_word_size = 4; 161 162 /* 163 * The pref's used here are using "streaming" hints, which cause the 164 * copied data to be kicked out of the cache sooner. A page copy often 165 * ends up copying a lot more data than is commonly used, so this seems 166 * to make sense in terms of reducing cache pollution, but I've no real 167 * performance data to back this up. 168 */ 169 if (cpu_has_prefetch) { 170 /* 171 * XXX: Most prefetch bias values in here are based on 172 * guesswork. 173 */ 174 cache_line_size = cpu_dcache_line_size(); 175 switch (current_cpu_type()) { 176 case CPU_R5500: 177 case CPU_TX49XX: 178 /* These processors only support the Pref_Load. */ 179 pref_bias_copy_load = 256; 180 break; 181 182 case CPU_RM9000: 183 /* 184 * As a workaround for erratum G105 which make the 185 * PrepareForStore hint unusable we fall back to 186 * StoreRetained on the RM9000. Once it is known which 187 * versions of the RM9000 we'll be able to condition- 188 * alize this. 189 */ 190 191 case CPU_R10000: 192 case CPU_R12000: 193 case CPU_R14000: 194 /* 195 * Those values have been experimentally tuned for an 196 * Origin 200. 197 */ 198 pref_bias_clear_store = 512; 199 pref_bias_copy_load = 256; 200 pref_bias_copy_store = 256; 201 pref_src_mode = Pref_LoadStreamed; 202 pref_dst_mode = Pref_StoreStreamed; 203 break; 204 205 case CPU_SB1: 206 case CPU_SB1A: 207 pref_bias_clear_store = 128; 208 pref_bias_copy_load = 128; 209 pref_bias_copy_store = 128; 210 /* 211 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 212 * hints are broken. 213 */ 214 if (current_cpu_type() == CPU_SB1 && 215 (current_cpu_data.processor_id & 0xff) < 0x02) { 216 pref_src_mode = Pref_Load; 217 pref_dst_mode = Pref_Store; 218 } else { 219 pref_src_mode = Pref_LoadStreamed; 220 pref_dst_mode = Pref_StoreStreamed; 221 } 222 break; 223 224 default: 225 pref_bias_clear_store = 128; 226 pref_bias_copy_load = 256; 227 pref_bias_copy_store = 128; 228 pref_src_mode = Pref_LoadStreamed; 229 pref_dst_mode = Pref_PrepareForStore; 230 break; 231 } 232 } else { 233 if (cpu_has_cache_cdex_s) 234 cache_line_size = cpu_scache_line_size(); 235 else if (cpu_has_cache_cdex_p) 236 cache_line_size = cpu_dcache_line_size(); 237 } 238 /* 239 * Too much unrolling will overflow the available space in 240 * clear_space_array / copy_page_array. 241 */ 242 half_clear_loop_size = min(16 * clear_word_size, 243 max(cache_line_size >> 1, 244 4 * clear_word_size)); 245 half_copy_loop_size = min(16 * copy_word_size, 246 max(cache_line_size >> 1, 247 4 * copy_word_size)); 248 } 249 250 static void __cpuinit build_clear_store(u32 **buf, int off) 251 { 252 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 253 uasm_i_sd(buf, ZERO, off, A0); 254 } else { 255 uasm_i_sw(buf, ZERO, off, A0); 256 } 257 } 258 259 static inline void __cpuinit build_clear_pref(u32 **buf, int off) 260 { 261 if (off & cache_line_mask()) 262 return; 263 264 if (pref_bias_clear_store) { 265 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 266 A0); 267 } else if (cache_line_size == (half_clear_loop_size << 1)) { 268 if (cpu_has_cache_cdex_s) { 269 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 270 } else if (cpu_has_cache_cdex_p) { 271 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 272 uasm_i_nop(buf); 273 uasm_i_nop(buf); 274 uasm_i_nop(buf); 275 uasm_i_nop(buf); 276 } 277 278 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 279 uasm_i_lw(buf, ZERO, ZERO, AT); 280 281 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 282 } 283 } 284 } 285 286 void __cpuinit build_clear_page(void) 287 { 288 int off; 289 u32 *buf = (u32 *)&clear_page_array; 290 struct uasm_label *l = labels; 291 struct uasm_reloc *r = relocs; 292 int i; 293 294 memset(labels, 0, sizeof(labels)); 295 memset(relocs, 0, sizeof(relocs)); 296 297 set_prefetch_parameters(); 298 299 /* 300 * This algorithm makes the following assumptions: 301 * - The prefetch bias is a multiple of 2 words. 302 * - The prefetch bias is less than one page. 303 */ 304 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 305 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 306 307 off = PAGE_SIZE - pref_bias_clear_store; 308 if (off > 0xffff || !pref_bias_clear_store) 309 pg_addiu(&buf, A2, A0, off); 310 else 311 uasm_i_ori(&buf, A2, A0, off); 312 313 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 314 uasm_i_lui(&buf, AT, 0xa000); 315 316 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 317 * cache_line_size : 0; 318 while (off) { 319 build_clear_pref(&buf, -off); 320 off -= cache_line_size; 321 } 322 uasm_l_clear_pref(&l, buf); 323 do { 324 build_clear_pref(&buf, off); 325 build_clear_store(&buf, off); 326 off += clear_word_size; 327 } while (off < half_clear_loop_size); 328 pg_addiu(&buf, A0, A0, 2 * off); 329 off = -off; 330 do { 331 build_clear_pref(&buf, off); 332 if (off == -clear_word_size) 333 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 334 build_clear_store(&buf, off); 335 off += clear_word_size; 336 } while (off < 0); 337 338 if (pref_bias_clear_store) { 339 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 340 uasm_l_clear_nopref(&l, buf); 341 off = 0; 342 do { 343 build_clear_store(&buf, off); 344 off += clear_word_size; 345 } while (off < half_clear_loop_size); 346 pg_addiu(&buf, A0, A0, 2 * off); 347 off = -off; 348 do { 349 if (off == -clear_word_size) 350 uasm_il_bne(&buf, &r, A0, A2, 351 label_clear_nopref); 352 build_clear_store(&buf, off); 353 off += clear_word_size; 354 } while (off < 0); 355 } 356 357 uasm_i_jr(&buf, RA); 358 uasm_i_nop(&buf); 359 360 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); 361 362 uasm_resolve_relocs(relocs, labels); 363 364 pr_debug("Synthesized clear page handler (%u instructions).\n", 365 (u32)(buf - clear_page_array)); 366 367 pr_debug("\t.set push\n"); 368 pr_debug("\t.set noreorder\n"); 369 for (i = 0; i < (buf - clear_page_array); i++) 370 pr_debug("\t.word 0x%08x\n", clear_page_array[i]); 371 pr_debug("\t.set pop\n"); 372 } 373 374 static void __cpuinit build_copy_load(u32 **buf, int reg, int off) 375 { 376 if (cpu_has_64bit_gp_regs) { 377 uasm_i_ld(buf, reg, off, A1); 378 } else { 379 uasm_i_lw(buf, reg, off, A1); 380 } 381 } 382 383 static void __cpuinit build_copy_store(u32 **buf, int reg, int off) 384 { 385 if (cpu_has_64bit_gp_regs) { 386 uasm_i_sd(buf, reg, off, A0); 387 } else { 388 uasm_i_sw(buf, reg, off, A0); 389 } 390 } 391 392 static inline void build_copy_load_pref(u32 **buf, int off) 393 { 394 if (off & cache_line_mask()) 395 return; 396 397 if (pref_bias_copy_load) 398 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 399 } 400 401 static inline void build_copy_store_pref(u32 **buf, int off) 402 { 403 if (off & cache_line_mask()) 404 return; 405 406 if (pref_bias_copy_store) { 407 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 408 A0); 409 } else if (cache_line_size == (half_copy_loop_size << 1)) { 410 if (cpu_has_cache_cdex_s) { 411 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 412 } else if (cpu_has_cache_cdex_p) { 413 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 414 uasm_i_nop(buf); 415 uasm_i_nop(buf); 416 uasm_i_nop(buf); 417 uasm_i_nop(buf); 418 } 419 420 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 421 uasm_i_lw(buf, ZERO, ZERO, AT); 422 423 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 424 } 425 } 426 } 427 428 void __cpuinit build_copy_page(void) 429 { 430 int off; 431 u32 *buf = (u32 *)©_page_array; 432 struct uasm_label *l = labels; 433 struct uasm_reloc *r = relocs; 434 int i; 435 436 memset(labels, 0, sizeof(labels)); 437 memset(relocs, 0, sizeof(relocs)); 438 439 set_prefetch_parameters(); 440 441 /* 442 * This algorithm makes the following assumptions: 443 * - All prefetch biases are multiples of 8 words. 444 * - The prefetch biases are less than one page. 445 * - The store prefetch bias isn't greater than the load 446 * prefetch bias. 447 */ 448 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 449 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 450 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 451 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 452 453 off = PAGE_SIZE - pref_bias_copy_load; 454 if (off > 0xffff || !pref_bias_copy_load) 455 pg_addiu(&buf, A2, A0, off); 456 else 457 uasm_i_ori(&buf, A2, A0, off); 458 459 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 460 uasm_i_lui(&buf, AT, 0xa000); 461 462 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 463 cache_line_size : 0; 464 while (off) { 465 build_copy_load_pref(&buf, -off); 466 off -= cache_line_size; 467 } 468 off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * 469 cache_line_size : 0; 470 while (off) { 471 build_copy_store_pref(&buf, -off); 472 off -= cache_line_size; 473 } 474 uasm_l_copy_pref_both(&l, buf); 475 do { 476 build_copy_load_pref(&buf, off); 477 build_copy_load(&buf, T0, off); 478 build_copy_load_pref(&buf, off + copy_word_size); 479 build_copy_load(&buf, T1, off + copy_word_size); 480 build_copy_load_pref(&buf, off + 2 * copy_word_size); 481 build_copy_load(&buf, T2, off + 2 * copy_word_size); 482 build_copy_load_pref(&buf, off + 3 * copy_word_size); 483 build_copy_load(&buf, T3, off + 3 * copy_word_size); 484 build_copy_store_pref(&buf, off); 485 build_copy_store(&buf, T0, off); 486 build_copy_store_pref(&buf, off + copy_word_size); 487 build_copy_store(&buf, T1, off + copy_word_size); 488 build_copy_store_pref(&buf, off + 2 * copy_word_size); 489 build_copy_store(&buf, T2, off + 2 * copy_word_size); 490 build_copy_store_pref(&buf, off + 3 * copy_word_size); 491 build_copy_store(&buf, T3, off + 3 * copy_word_size); 492 off += 4 * copy_word_size; 493 } while (off < half_copy_loop_size); 494 pg_addiu(&buf, A1, A1, 2 * off); 495 pg_addiu(&buf, A0, A0, 2 * off); 496 off = -off; 497 do { 498 build_copy_load_pref(&buf, off); 499 build_copy_load(&buf, T0, off); 500 build_copy_load_pref(&buf, off + copy_word_size); 501 build_copy_load(&buf, T1, off + copy_word_size); 502 build_copy_load_pref(&buf, off + 2 * copy_word_size); 503 build_copy_load(&buf, T2, off + 2 * copy_word_size); 504 build_copy_load_pref(&buf, off + 3 * copy_word_size); 505 build_copy_load(&buf, T3, off + 3 * copy_word_size); 506 build_copy_store_pref(&buf, off); 507 build_copy_store(&buf, T0, off); 508 build_copy_store_pref(&buf, off + copy_word_size); 509 build_copy_store(&buf, T1, off + copy_word_size); 510 build_copy_store_pref(&buf, off + 2 * copy_word_size); 511 build_copy_store(&buf, T2, off + 2 * copy_word_size); 512 build_copy_store_pref(&buf, off + 3 * copy_word_size); 513 if (off == -(4 * copy_word_size)) 514 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 515 build_copy_store(&buf, T3, off + 3 * copy_word_size); 516 off += 4 * copy_word_size; 517 } while (off < 0); 518 519 if (pref_bias_copy_load - pref_bias_copy_store) { 520 pg_addiu(&buf, A2, A0, 521 pref_bias_copy_load - pref_bias_copy_store); 522 uasm_l_copy_pref_store(&l, buf); 523 off = 0; 524 do { 525 build_copy_load(&buf, T0, off); 526 build_copy_load(&buf, T1, off + copy_word_size); 527 build_copy_load(&buf, T2, off + 2 * copy_word_size); 528 build_copy_load(&buf, T3, off + 3 * copy_word_size); 529 build_copy_store_pref(&buf, off); 530 build_copy_store(&buf, T0, off); 531 build_copy_store_pref(&buf, off + copy_word_size); 532 build_copy_store(&buf, T1, off + copy_word_size); 533 build_copy_store_pref(&buf, off + 2 * copy_word_size); 534 build_copy_store(&buf, T2, off + 2 * copy_word_size); 535 build_copy_store_pref(&buf, off + 3 * copy_word_size); 536 build_copy_store(&buf, T3, off + 3 * copy_word_size); 537 off += 4 * copy_word_size; 538 } while (off < half_copy_loop_size); 539 pg_addiu(&buf, A1, A1, 2 * off); 540 pg_addiu(&buf, A0, A0, 2 * off); 541 off = -off; 542 do { 543 build_copy_load(&buf, T0, off); 544 build_copy_load(&buf, T1, off + copy_word_size); 545 build_copy_load(&buf, T2, off + 2 * copy_word_size); 546 build_copy_load(&buf, T3, off + 3 * copy_word_size); 547 build_copy_store_pref(&buf, off); 548 build_copy_store(&buf, T0, off); 549 build_copy_store_pref(&buf, off + copy_word_size); 550 build_copy_store(&buf, T1, off + copy_word_size); 551 build_copy_store_pref(&buf, off + 2 * copy_word_size); 552 build_copy_store(&buf, T2, off + 2 * copy_word_size); 553 build_copy_store_pref(&buf, off + 3 * copy_word_size); 554 if (off == -(4 * copy_word_size)) 555 uasm_il_bne(&buf, &r, A2, A0, 556 label_copy_pref_store); 557 build_copy_store(&buf, T3, off + 3 * copy_word_size); 558 off += 4 * copy_word_size; 559 } while (off < 0); 560 } 561 562 if (pref_bias_copy_store) { 563 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 564 uasm_l_copy_nopref(&l, buf); 565 off = 0; 566 do { 567 build_copy_load(&buf, T0, off); 568 build_copy_load(&buf, T1, off + copy_word_size); 569 build_copy_load(&buf, T2, off + 2 * copy_word_size); 570 build_copy_load(&buf, T3, off + 3 * copy_word_size); 571 build_copy_store(&buf, T0, off); 572 build_copy_store(&buf, T1, off + copy_word_size); 573 build_copy_store(&buf, T2, off + 2 * copy_word_size); 574 build_copy_store(&buf, T3, off + 3 * copy_word_size); 575 off += 4 * copy_word_size; 576 } while (off < half_copy_loop_size); 577 pg_addiu(&buf, A1, A1, 2 * off); 578 pg_addiu(&buf, A0, A0, 2 * off); 579 off = -off; 580 do { 581 build_copy_load(&buf, T0, off); 582 build_copy_load(&buf, T1, off + copy_word_size); 583 build_copy_load(&buf, T2, off + 2 * copy_word_size); 584 build_copy_load(&buf, T3, off + 3 * copy_word_size); 585 build_copy_store(&buf, T0, off); 586 build_copy_store(&buf, T1, off + copy_word_size); 587 build_copy_store(&buf, T2, off + 2 * copy_word_size); 588 if (off == -(4 * copy_word_size)) 589 uasm_il_bne(&buf, &r, A2, A0, 590 label_copy_nopref); 591 build_copy_store(&buf, T3, off + 3 * copy_word_size); 592 off += 4 * copy_word_size; 593 } while (off < 0); 594 } 595 596 uasm_i_jr(&buf, RA); 597 uasm_i_nop(&buf); 598 599 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); 600 601 uasm_resolve_relocs(relocs, labels); 602 603 pr_debug("Synthesized copy page handler (%u instructions).\n", 604 (u32)(buf - copy_page_array)); 605 606 pr_debug("\t.set push\n"); 607 pr_debug("\t.set noreorder\n"); 608 for (i = 0; i < (buf - copy_page_array); i++) 609 pr_debug("\t.word 0x%08x\n", copy_page_array[i]); 610 pr_debug("\t.set pop\n"); 611 } 612 613 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 614 615 /* 616 * Pad descriptors to cacheline, since each is exclusively owned by a 617 * particular CPU. 618 */ 619 struct dmadscr { 620 u64 dscr_a; 621 u64 dscr_b; 622 u64 pad_a; 623 u64 pad_b; 624 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 625 626 void sb1_dma_init(void) 627 { 628 int i; 629 630 for (i = 0; i < DM_NUM_CHANNELS; i++) { 631 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 632 V_DM_DSCR_BASE_RINGSZ(1); 633 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 634 635 __raw_writeq(base_val, base_reg); 636 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 637 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 638 } 639 } 640 641 void clear_page(void *page) 642 { 643 u64 to_phys = CPHYSADDR((unsigned long)page); 644 unsigned int cpu = smp_processor_id(); 645 646 /* if the page is not in KSEG0, use old way */ 647 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 648 return clear_page_cpu(page); 649 650 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 651 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 652 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 653 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 654 655 /* 656 * Don't really want to do it this way, but there's no 657 * reliable way to delay completion detection. 658 */ 659 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 660 & M_DM_DSCR_BASE_INTERRUPT)) 661 ; 662 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 663 } 664 665 void copy_page(void *to, void *from) 666 { 667 u64 from_phys = CPHYSADDR((unsigned long)from); 668 u64 to_phys = CPHYSADDR((unsigned long)to); 669 unsigned int cpu = smp_processor_id(); 670 671 /* if any page is not in KSEG0, use old way */ 672 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 673 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 674 return copy_page_cpu(to, from); 675 676 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 677 M_DM_DSCRA_INTERRUPT; 678 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 679 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 680 681 /* 682 * Don't really want to do it this way, but there's no 683 * reliable way to delay completion detection. 684 */ 685 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 686 & M_DM_DSCR_BASE_INTERRUPT)) 687 ; 688 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 689 } 690 691 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 692