1 /* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) 7 * Copyright (C) 2007 Maciej W. Rozycki 8 * Copyright (C) 2008 Thiemo Seufer 9 */ 10 #include <linux/init.h> 11 #include <linux/kernel.h> 12 #include <linux/sched.h> 13 #include <linux/mm.h> 14 #include <linux/module.h> 15 #include <linux/proc_fs.h> 16 17 #include <asm/bugs.h> 18 #include <asm/cacheops.h> 19 #include <asm/inst.h> 20 #include <asm/io.h> 21 #include <asm/page.h> 22 #include <asm/pgtable.h> 23 #include <asm/prefetch.h> 24 #include <asm/system.h> 25 #include <asm/bootinfo.h> 26 #include <asm/mipsregs.h> 27 #include <asm/mmu_context.h> 28 #include <asm/cpu.h> 29 #include <asm/war.h> 30 31 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 32 #include <asm/sibyte/sb1250.h> 33 #include <asm/sibyte/sb1250_regs.h> 34 #include <asm/sibyte/sb1250_dma.h> 35 #endif 36 37 #include "uasm.h" 38 39 /* Registers used in the assembled routines. */ 40 #define ZERO 0 41 #define AT 2 42 #define A0 4 43 #define A1 5 44 #define A2 6 45 #define T0 8 46 #define T1 9 47 #define T2 10 48 #define T3 11 49 #define T9 25 50 #define RA 31 51 52 /* Handle labels (which must be positive integers). */ 53 enum label_id { 54 label_clear_nopref = 1, 55 label_clear_pref, 56 label_copy_nopref, 57 label_copy_pref_both, 58 label_copy_pref_store, 59 }; 60 61 UASM_L_LA(_clear_nopref) 62 UASM_L_LA(_clear_pref) 63 UASM_L_LA(_copy_nopref) 64 UASM_L_LA(_copy_pref_both) 65 UASM_L_LA(_copy_pref_store) 66 67 /* We need one branch and therefore one relocation per target label. */ 68 static struct uasm_label __cpuinitdata labels[5]; 69 static struct uasm_reloc __cpuinitdata relocs[5]; 70 71 #define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) 72 #define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) 73 74 /* 75 * Maximum sizes: 76 * 77 * R4000 128 bytes S-cache: 0x058 bytes 78 * R4600 v1.7: 0x05c bytes 79 * R4600 v2.0: 0x060 bytes 80 * With prefetching, 16 word strides 0x120 bytes 81 */ 82 83 static u32 clear_page_array[0x120 / 4]; 84 85 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 86 void clear_page_cpu(void *page) __attribute__((alias("clear_page_array"))); 87 #else 88 void clear_page(void *page) __attribute__((alias("clear_page_array"))); 89 #endif 90 91 EXPORT_SYMBOL(clear_page); 92 93 /* 94 * Maximum sizes: 95 * 96 * R4000 128 bytes S-cache: 0x11c bytes 97 * R4600 v1.7: 0x080 bytes 98 * R4600 v2.0: 0x07c bytes 99 * With prefetching, 16 word strides 0x540 bytes 100 */ 101 static u32 copy_page_array[0x540 / 4]; 102 103 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 104 void 105 copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array"))); 106 #else 107 void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); 108 #endif 109 110 EXPORT_SYMBOL(copy_page); 111 112 113 static int pref_bias_clear_store __cpuinitdata; 114 static int pref_bias_copy_load __cpuinitdata; 115 static int pref_bias_copy_store __cpuinitdata; 116 117 static u32 pref_src_mode __cpuinitdata; 118 static u32 pref_dst_mode __cpuinitdata; 119 120 static int clear_word_size __cpuinitdata; 121 static int copy_word_size __cpuinitdata; 122 123 static int half_clear_loop_size __cpuinitdata; 124 static int half_copy_loop_size __cpuinitdata; 125 126 static int cache_line_size __cpuinitdata; 127 #define cache_line_mask() (cache_line_size - 1) 128 129 static inline void __cpuinit 130 pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) 131 { 132 if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { 133 if (off > 0x7fff) { 134 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 135 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 136 } else 137 uasm_i_addiu(buf, T9, ZERO, off); 138 uasm_i_daddu(buf, reg1, reg2, T9); 139 } else { 140 if (off > 0x7fff) { 141 uasm_i_lui(buf, T9, uasm_rel_hi(off)); 142 uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); 143 UASM_i_ADDU(buf, reg1, reg2, T9); 144 } else 145 UASM_i_ADDIU(buf, reg1, reg2, off); 146 } 147 } 148 149 static void __cpuinit set_prefetch_parameters(void) 150 { 151 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) 152 clear_word_size = 8; 153 else 154 clear_word_size = 4; 155 156 if (cpu_has_64bit_gp_regs) 157 copy_word_size = 8; 158 else 159 copy_word_size = 4; 160 161 /* 162 * The pref's used here are using "streaming" hints, which cause the 163 * copied data to be kicked out of the cache sooner. A page copy often 164 * ends up copying a lot more data than is commonly used, so this seems 165 * to make sense in terms of reducing cache pollution, but I've no real 166 * performance data to back this up. 167 */ 168 if (cpu_has_prefetch) { 169 /* 170 * XXX: Most prefetch bias values in here are based on 171 * guesswork. 172 */ 173 cache_line_size = cpu_dcache_line_size(); 174 switch (current_cpu_type()) { 175 case CPU_R5500: 176 case CPU_TX49XX: 177 /* These processors only support the Pref_Load. */ 178 pref_bias_copy_load = 256; 179 break; 180 181 case CPU_RM9000: 182 /* 183 * As a workaround for erratum G105 which make the 184 * PrepareForStore hint unusable we fall back to 185 * StoreRetained on the RM9000. Once it is known which 186 * versions of the RM9000 we'll be able to condition- 187 * alize this. 188 */ 189 190 case CPU_R10000: 191 case CPU_R12000: 192 case CPU_R14000: 193 /* 194 * Those values have been experimentally tuned for an 195 * Origin 200. 196 */ 197 pref_bias_clear_store = 512; 198 pref_bias_copy_load = 256; 199 pref_bias_copy_store = 256; 200 pref_src_mode = Pref_LoadStreamed; 201 pref_dst_mode = Pref_StoreStreamed; 202 break; 203 204 case CPU_SB1: 205 case CPU_SB1A: 206 pref_bias_clear_store = 128; 207 pref_bias_copy_load = 128; 208 pref_bias_copy_store = 128; 209 /* 210 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed 211 * hints are broken. 212 */ 213 if (current_cpu_type() == CPU_SB1 && 214 (current_cpu_data.processor_id & 0xff) < 0x02) { 215 pref_src_mode = Pref_Load; 216 pref_dst_mode = Pref_Store; 217 } else { 218 pref_src_mode = Pref_LoadStreamed; 219 pref_dst_mode = Pref_StoreStreamed; 220 } 221 break; 222 223 default: 224 pref_bias_clear_store = 128; 225 pref_bias_copy_load = 256; 226 pref_bias_copy_store = 128; 227 pref_src_mode = Pref_LoadStreamed; 228 pref_dst_mode = Pref_PrepareForStore; 229 break; 230 } 231 } else { 232 if (cpu_has_cache_cdex_s) 233 cache_line_size = cpu_scache_line_size(); 234 else if (cpu_has_cache_cdex_p) 235 cache_line_size = cpu_dcache_line_size(); 236 } 237 /* 238 * Too much unrolling will overflow the available space in 239 * clear_space_array / copy_page_array. 240 */ 241 half_clear_loop_size = min(16 * clear_word_size, 242 max(cache_line_size >> 1, 243 4 * clear_word_size)); 244 half_copy_loop_size = min(16 * copy_word_size, 245 max(cache_line_size >> 1, 246 4 * copy_word_size)); 247 } 248 249 static void __cpuinit build_clear_store(u32 **buf, int off) 250 { 251 if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { 252 uasm_i_sd(buf, ZERO, off, A0); 253 } else { 254 uasm_i_sw(buf, ZERO, off, A0); 255 } 256 } 257 258 static inline void __cpuinit build_clear_pref(u32 **buf, int off) 259 { 260 if (off & cache_line_mask()) 261 return; 262 263 if (pref_bias_clear_store) { 264 uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, 265 A0); 266 } else if (cache_line_size == (half_clear_loop_size << 1)) { 267 if (cpu_has_cache_cdex_s) { 268 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 269 } else if (cpu_has_cache_cdex_p) { 270 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 271 uasm_i_nop(buf); 272 uasm_i_nop(buf); 273 uasm_i_nop(buf); 274 uasm_i_nop(buf); 275 } 276 277 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 278 uasm_i_lw(buf, ZERO, ZERO, AT); 279 280 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 281 } 282 } 283 } 284 285 void __cpuinit build_clear_page(void) 286 { 287 int off; 288 u32 *buf = (u32 *)&clear_page_array; 289 struct uasm_label *l = labels; 290 struct uasm_reloc *r = relocs; 291 int i; 292 293 memset(labels, 0, sizeof(labels)); 294 memset(relocs, 0, sizeof(relocs)); 295 296 set_prefetch_parameters(); 297 298 /* 299 * This algorithm makes the following assumptions: 300 * - The prefetch bias is a multiple of 2 words. 301 * - The prefetch bias is less than one page. 302 */ 303 BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); 304 BUG_ON(PAGE_SIZE < pref_bias_clear_store); 305 306 off = PAGE_SIZE - pref_bias_clear_store; 307 if (off > 0xffff || !pref_bias_clear_store) 308 pg_addiu(&buf, A2, A0, off); 309 else 310 uasm_i_ori(&buf, A2, A0, off); 311 312 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 313 uasm_i_lui(&buf, AT, 0xa000); 314 315 off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) 316 * cache_line_size : 0; 317 while (off) { 318 build_clear_pref(&buf, -off); 319 off -= cache_line_size; 320 } 321 uasm_l_clear_pref(&l, buf); 322 do { 323 build_clear_pref(&buf, off); 324 build_clear_store(&buf, off); 325 off += clear_word_size; 326 } while (off < half_clear_loop_size); 327 pg_addiu(&buf, A0, A0, 2 * off); 328 off = -off; 329 do { 330 build_clear_pref(&buf, off); 331 if (off == -clear_word_size) 332 uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); 333 build_clear_store(&buf, off); 334 off += clear_word_size; 335 } while (off < 0); 336 337 if (pref_bias_clear_store) { 338 pg_addiu(&buf, A2, A0, pref_bias_clear_store); 339 uasm_l_clear_nopref(&l, buf); 340 off = 0; 341 do { 342 build_clear_store(&buf, off); 343 off += clear_word_size; 344 } while (off < half_clear_loop_size); 345 pg_addiu(&buf, A0, A0, 2 * off); 346 off = -off; 347 do { 348 if (off == -clear_word_size) 349 uasm_il_bne(&buf, &r, A0, A2, 350 label_clear_nopref); 351 build_clear_store(&buf, off); 352 off += clear_word_size; 353 } while (off < 0); 354 } 355 356 uasm_i_jr(&buf, RA); 357 uasm_i_nop(&buf); 358 359 BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array)); 360 361 uasm_resolve_relocs(relocs, labels); 362 363 pr_debug("Synthesized clear page handler (%u instructions).\n", 364 (u32)(buf - clear_page_array)); 365 366 pr_debug("\t.set push\n"); 367 pr_debug("\t.set noreorder\n"); 368 for (i = 0; i < (buf - clear_page_array); i++) 369 pr_debug("\t.word 0x%08x\n", clear_page_array[i]); 370 pr_debug("\t.set pop\n"); 371 } 372 373 static void __cpuinit build_copy_load(u32 **buf, int reg, int off) 374 { 375 if (cpu_has_64bit_gp_regs) { 376 uasm_i_ld(buf, reg, off, A1); 377 } else { 378 uasm_i_lw(buf, reg, off, A1); 379 } 380 } 381 382 static void __cpuinit build_copy_store(u32 **buf, int reg, int off) 383 { 384 if (cpu_has_64bit_gp_regs) { 385 uasm_i_sd(buf, reg, off, A0); 386 } else { 387 uasm_i_sw(buf, reg, off, A0); 388 } 389 } 390 391 static inline void build_copy_load_pref(u32 **buf, int off) 392 { 393 if (off & cache_line_mask()) 394 return; 395 396 if (pref_bias_copy_load) 397 uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); 398 } 399 400 static inline void build_copy_store_pref(u32 **buf, int off) 401 { 402 if (off & cache_line_mask()) 403 return; 404 405 if (pref_bias_copy_store) { 406 uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, 407 A0); 408 } else if (cache_line_size == (half_copy_loop_size << 1)) { 409 if (cpu_has_cache_cdex_s) { 410 uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); 411 } else if (cpu_has_cache_cdex_p) { 412 if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { 413 uasm_i_nop(buf); 414 uasm_i_nop(buf); 415 uasm_i_nop(buf); 416 uasm_i_nop(buf); 417 } 418 419 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 420 uasm_i_lw(buf, ZERO, ZERO, AT); 421 422 uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); 423 } 424 } 425 } 426 427 void __cpuinit build_copy_page(void) 428 { 429 int off; 430 u32 *buf = (u32 *)©_page_array; 431 struct uasm_label *l = labels; 432 struct uasm_reloc *r = relocs; 433 int i; 434 435 memset(labels, 0, sizeof(labels)); 436 memset(relocs, 0, sizeof(relocs)); 437 438 set_prefetch_parameters(); 439 440 /* 441 * This algorithm makes the following assumptions: 442 * - All prefetch biases are multiples of 8 words. 443 * - The prefetch biases are less than one page. 444 * - The store prefetch bias isn't greater than the load 445 * prefetch bias. 446 */ 447 BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); 448 BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); 449 BUG_ON(PAGE_SIZE < pref_bias_copy_load); 450 BUG_ON(pref_bias_copy_store > pref_bias_copy_load); 451 452 off = PAGE_SIZE - pref_bias_copy_load; 453 if (off > 0xffff || !pref_bias_copy_load) 454 pg_addiu(&buf, A2, A0, off); 455 else 456 uasm_i_ori(&buf, A2, A0, off); 457 458 if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) 459 uasm_i_lui(&buf, AT, 0xa000); 460 461 off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * 462 cache_line_size : 0; 463 while (off) { 464 build_copy_load_pref(&buf, -off); 465 off -= cache_line_size; 466 } 467 off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * 468 cache_line_size : 0; 469 while (off) { 470 build_copy_store_pref(&buf, -off); 471 off -= cache_line_size; 472 } 473 uasm_l_copy_pref_both(&l, buf); 474 do { 475 build_copy_load_pref(&buf, off); 476 build_copy_load(&buf, T0, off); 477 build_copy_load_pref(&buf, off + copy_word_size); 478 build_copy_load(&buf, T1, off + copy_word_size); 479 build_copy_load_pref(&buf, off + 2 * copy_word_size); 480 build_copy_load(&buf, T2, off + 2 * copy_word_size); 481 build_copy_load_pref(&buf, off + 3 * copy_word_size); 482 build_copy_load(&buf, T3, off + 3 * copy_word_size); 483 build_copy_store_pref(&buf, off); 484 build_copy_store(&buf, T0, off); 485 build_copy_store_pref(&buf, off + copy_word_size); 486 build_copy_store(&buf, T1, off + copy_word_size); 487 build_copy_store_pref(&buf, off + 2 * copy_word_size); 488 build_copy_store(&buf, T2, off + 2 * copy_word_size); 489 build_copy_store_pref(&buf, off + 3 * copy_word_size); 490 build_copy_store(&buf, T3, off + 3 * copy_word_size); 491 off += 4 * copy_word_size; 492 } while (off < half_copy_loop_size); 493 pg_addiu(&buf, A1, A1, 2 * off); 494 pg_addiu(&buf, A0, A0, 2 * off); 495 off = -off; 496 do { 497 build_copy_load_pref(&buf, off); 498 build_copy_load(&buf, T0, off); 499 build_copy_load_pref(&buf, off + copy_word_size); 500 build_copy_load(&buf, T1, off + copy_word_size); 501 build_copy_load_pref(&buf, off + 2 * copy_word_size); 502 build_copy_load(&buf, T2, off + 2 * copy_word_size); 503 build_copy_load_pref(&buf, off + 3 * copy_word_size); 504 build_copy_load(&buf, T3, off + 3 * copy_word_size); 505 build_copy_store_pref(&buf, off); 506 build_copy_store(&buf, T0, off); 507 build_copy_store_pref(&buf, off + copy_word_size); 508 build_copy_store(&buf, T1, off + copy_word_size); 509 build_copy_store_pref(&buf, off + 2 * copy_word_size); 510 build_copy_store(&buf, T2, off + 2 * copy_word_size); 511 build_copy_store_pref(&buf, off + 3 * copy_word_size); 512 if (off == -(4 * copy_word_size)) 513 uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); 514 build_copy_store(&buf, T3, off + 3 * copy_word_size); 515 off += 4 * copy_word_size; 516 } while (off < 0); 517 518 if (pref_bias_copy_load - pref_bias_copy_store) { 519 pg_addiu(&buf, A2, A0, 520 pref_bias_copy_load - pref_bias_copy_store); 521 uasm_l_copy_pref_store(&l, buf); 522 off = 0; 523 do { 524 build_copy_load(&buf, T0, off); 525 build_copy_load(&buf, T1, off + copy_word_size); 526 build_copy_load(&buf, T2, off + 2 * copy_word_size); 527 build_copy_load(&buf, T3, off + 3 * copy_word_size); 528 build_copy_store_pref(&buf, off); 529 build_copy_store(&buf, T0, off); 530 build_copy_store_pref(&buf, off + copy_word_size); 531 build_copy_store(&buf, T1, off + copy_word_size); 532 build_copy_store_pref(&buf, off + 2 * copy_word_size); 533 build_copy_store(&buf, T2, off + 2 * copy_word_size); 534 build_copy_store_pref(&buf, off + 3 * copy_word_size); 535 build_copy_store(&buf, T3, off + 3 * copy_word_size); 536 off += 4 * copy_word_size; 537 } while (off < half_copy_loop_size); 538 pg_addiu(&buf, A1, A1, 2 * off); 539 pg_addiu(&buf, A0, A0, 2 * off); 540 off = -off; 541 do { 542 build_copy_load(&buf, T0, off); 543 build_copy_load(&buf, T1, off + copy_word_size); 544 build_copy_load(&buf, T2, off + 2 * copy_word_size); 545 build_copy_load(&buf, T3, off + 3 * copy_word_size); 546 build_copy_store_pref(&buf, off); 547 build_copy_store(&buf, T0, off); 548 build_copy_store_pref(&buf, off + copy_word_size); 549 build_copy_store(&buf, T1, off + copy_word_size); 550 build_copy_store_pref(&buf, off + 2 * copy_word_size); 551 build_copy_store(&buf, T2, off + 2 * copy_word_size); 552 build_copy_store_pref(&buf, off + 3 * copy_word_size); 553 if (off == -(4 * copy_word_size)) 554 uasm_il_bne(&buf, &r, A2, A0, 555 label_copy_pref_store); 556 build_copy_store(&buf, T3, off + 3 * copy_word_size); 557 off += 4 * copy_word_size; 558 } while (off < 0); 559 } 560 561 if (pref_bias_copy_store) { 562 pg_addiu(&buf, A2, A0, pref_bias_copy_store); 563 uasm_l_copy_nopref(&l, buf); 564 off = 0; 565 do { 566 build_copy_load(&buf, T0, off); 567 build_copy_load(&buf, T1, off + copy_word_size); 568 build_copy_load(&buf, T2, off + 2 * copy_word_size); 569 build_copy_load(&buf, T3, off + 3 * copy_word_size); 570 build_copy_store(&buf, T0, off); 571 build_copy_store(&buf, T1, off + copy_word_size); 572 build_copy_store(&buf, T2, off + 2 * copy_word_size); 573 build_copy_store(&buf, T3, off + 3 * copy_word_size); 574 off += 4 * copy_word_size; 575 } while (off < half_copy_loop_size); 576 pg_addiu(&buf, A1, A1, 2 * off); 577 pg_addiu(&buf, A0, A0, 2 * off); 578 off = -off; 579 do { 580 build_copy_load(&buf, T0, off); 581 build_copy_load(&buf, T1, off + copy_word_size); 582 build_copy_load(&buf, T2, off + 2 * copy_word_size); 583 build_copy_load(&buf, T3, off + 3 * copy_word_size); 584 build_copy_store(&buf, T0, off); 585 build_copy_store(&buf, T1, off + copy_word_size); 586 build_copy_store(&buf, T2, off + 2 * copy_word_size); 587 if (off == -(4 * copy_word_size)) 588 uasm_il_bne(&buf, &r, A2, A0, 589 label_copy_nopref); 590 build_copy_store(&buf, T3, off + 3 * copy_word_size); 591 off += 4 * copy_word_size; 592 } while (off < 0); 593 } 594 595 uasm_i_jr(&buf, RA); 596 uasm_i_nop(&buf); 597 598 BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array)); 599 600 uasm_resolve_relocs(relocs, labels); 601 602 pr_debug("Synthesized copy page handler (%u instructions).\n", 603 (u32)(buf - copy_page_array)); 604 605 pr_debug("\t.set push\n"); 606 pr_debug("\t.set noreorder\n"); 607 for (i = 0; i < (buf - copy_page_array); i++) 608 pr_debug("\t.word 0x%08x\n", copy_page_array[i]); 609 pr_debug("\t.set pop\n"); 610 } 611 612 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS 613 614 /* 615 * Pad descriptors to cacheline, since each is exclusively owned by a 616 * particular CPU. 617 */ 618 struct dmadscr { 619 u64 dscr_a; 620 u64 dscr_b; 621 u64 pad_a; 622 u64 pad_b; 623 } ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; 624 625 void sb1_dma_init(void) 626 { 627 int i; 628 629 for (i = 0; i < DM_NUM_CHANNELS; i++) { 630 const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | 631 V_DM_DSCR_BASE_RINGSZ(1); 632 void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); 633 634 __raw_writeq(base_val, base_reg); 635 __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); 636 __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); 637 } 638 } 639 640 void clear_page(void *page) 641 { 642 u64 to_phys = CPHYSADDR((unsigned long)page); 643 unsigned int cpu = smp_processor_id(); 644 645 /* if the page is not in KSEG0, use old way */ 646 if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) 647 return clear_page_cpu(page); 648 649 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | 650 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; 651 page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 652 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 653 654 /* 655 * Don't really want to do it this way, but there's no 656 * reliable way to delay completion detection. 657 */ 658 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 659 & M_DM_DSCR_BASE_INTERRUPT)) 660 ; 661 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 662 } 663 664 void copy_page(void *to, void *from) 665 { 666 u64 from_phys = CPHYSADDR((unsigned long)from); 667 u64 to_phys = CPHYSADDR((unsigned long)to); 668 unsigned int cpu = smp_processor_id(); 669 670 /* if any page is not in KSEG0, use old way */ 671 if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 672 || (long)KSEGX((unsigned long)from) != (long)CKSEG0) 673 return copy_page_cpu(to, from); 674 675 page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | 676 M_DM_DSCRA_INTERRUPT; 677 page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); 678 __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); 679 680 /* 681 * Don't really want to do it this way, but there's no 682 * reliable way to delay completion detection. 683 */ 684 while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) 685 & M_DM_DSCR_BASE_INTERRUPT)) 686 ; 687 __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); 688 } 689 690 #endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ 691