1 /* 2 * include/asm-alpha/xor.h 3 * 4 * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2, or (at your option) 9 * any later version. 10 * 11 * You should have received a copy of the GNU General Public License 12 * (for example /usr/src/linux/COPYING); if not, write to the Free 13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 14 */ 15 16 extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); 17 extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, 18 unsigned long *); 19 extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, 20 unsigned long *, unsigned long *); 21 extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, 22 unsigned long *, unsigned long *, unsigned long *); 23 24 extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, 25 unsigned long *); 26 extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, 27 unsigned long *, unsigned long *); 28 extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, 29 unsigned long *, unsigned long *, 30 unsigned long *); 31 extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, 32 unsigned long *, unsigned long *, 33 unsigned long *, unsigned long *); 34 35 asm(" \n\ 36 .text \n\ 37 .align 3 \n\ 38 .ent xor_alpha_2 \n\ 39 xor_alpha_2: \n\ 40 .prologue 0 \n\ 41 srl $16, 6, $16 \n\ 42 .align 4 \n\ 43 2: \n\ 44 ldq $0,0($17) \n\ 45 ldq $1,0($18) \n\ 46 ldq $2,8($17) \n\ 47 ldq $3,8($18) \n\ 48 \n\ 49 ldq $4,16($17) \n\ 50 ldq $5,16($18) \n\ 51 ldq $6,24($17) \n\ 52 ldq $7,24($18) \n\ 53 \n\ 54 ldq $19,32($17) \n\ 55 ldq $20,32($18) \n\ 56 ldq $21,40($17) \n\ 57 ldq $22,40($18) \n\ 58 \n\ 59 ldq $23,48($17) \n\ 60 ldq $24,48($18) \n\ 61 ldq $25,56($17) \n\ 62 xor $0,$1,$0 # 7 cycles from $1 load \n\ 63 \n\ 64 ldq $27,56($18) \n\ 65 xor $2,$3,$2 \n\ 66 stq $0,0($17) \n\ 67 xor $4,$5,$4 \n\ 68 \n\ 69 stq $2,8($17) \n\ 70 xor $6,$7,$6 \n\ 71 stq $4,16($17) \n\ 72 xor $19,$20,$19 \n\ 73 \n\ 74 stq $6,24($17) \n\ 75 xor $21,$22,$21 \n\ 76 stq $19,32($17) \n\ 77 xor $23,$24,$23 \n\ 78 \n\ 79 stq $21,40($17) \n\ 80 xor $25,$27,$25 \n\ 81 stq $23,48($17) \n\ 82 subq $16,1,$16 \n\ 83 \n\ 84 stq $25,56($17) \n\ 85 addq $17,64,$17 \n\ 86 addq $18,64,$18 \n\ 87 bgt $16,2b \n\ 88 \n\ 89 ret \n\ 90 .end xor_alpha_2 \n\ 91 \n\ 92 .align 3 \n\ 93 .ent xor_alpha_3 \n\ 94 xor_alpha_3: \n\ 95 .prologue 0 \n\ 96 srl $16, 6, $16 \n\ 97 .align 4 \n\ 98 3: \n\ 99 ldq $0,0($17) \n\ 100 ldq $1,0($18) \n\ 101 ldq $2,0($19) \n\ 102 ldq $3,8($17) \n\ 103 \n\ 104 ldq $4,8($18) \n\ 105 ldq $6,16($17) \n\ 106 ldq $7,16($18) \n\ 107 ldq $21,24($17) \n\ 108 \n\ 109 ldq $22,24($18) \n\ 110 ldq $24,32($17) \n\ 111 ldq $25,32($18) \n\ 112 ldq $5,8($19) \n\ 113 \n\ 114 ldq $20,16($19) \n\ 115 ldq $23,24($19) \n\ 116 ldq $27,32($19) \n\ 117 nop \n\ 118 \n\ 119 xor $0,$1,$1 # 8 cycles from $0 load \n\ 120 xor $3,$4,$4 # 6 cycles from $4 load \n\ 121 xor $6,$7,$7 # 6 cycles from $7 load \n\ 122 xor $21,$22,$22 # 5 cycles from $22 load \n\ 123 \n\ 124 xor $1,$2,$2 # 9 cycles from $2 load \n\ 125 xor $24,$25,$25 # 5 cycles from $25 load \n\ 126 stq $2,0($17) \n\ 127 xor $4,$5,$5 # 6 cycles from $5 load \n\ 128 \n\ 129 stq $5,8($17) \n\ 130 xor $7,$20,$20 # 7 cycles from $20 load \n\ 131 stq $20,16($17) \n\ 132 xor $22,$23,$23 # 7 cycles from $23 load \n\ 133 \n\ 134 stq $23,24($17) \n\ 135 xor $25,$27,$27 # 7 cycles from $27 load \n\ 136 stq $27,32($17) \n\ 137 nop \n\ 138 \n\ 139 ldq $0,40($17) \n\ 140 ldq $1,40($18) \n\ 141 ldq $3,48($17) \n\ 142 ldq $4,48($18) \n\ 143 \n\ 144 ldq $6,56($17) \n\ 145 ldq $7,56($18) \n\ 146 ldq $2,40($19) \n\ 147 ldq $5,48($19) \n\ 148 \n\ 149 ldq $20,56($19) \n\ 150 xor $0,$1,$1 # 4 cycles from $1 load \n\ 151 xor $3,$4,$4 # 5 cycles from $4 load \n\ 152 xor $6,$7,$7 # 5 cycles from $7 load \n\ 153 \n\ 154 xor $1,$2,$2 # 4 cycles from $2 load \n\ 155 xor $4,$5,$5 # 5 cycles from $5 load \n\ 156 stq $2,40($17) \n\ 157 xor $7,$20,$20 # 4 cycles from $20 load \n\ 158 \n\ 159 stq $5,48($17) \n\ 160 subq $16,1,$16 \n\ 161 stq $20,56($17) \n\ 162 addq $19,64,$19 \n\ 163 \n\ 164 addq $18,64,$18 \n\ 165 addq $17,64,$17 \n\ 166 bgt $16,3b \n\ 167 ret \n\ 168 .end xor_alpha_3 \n\ 169 \n\ 170 .align 3 \n\ 171 .ent xor_alpha_4 \n\ 172 xor_alpha_4: \n\ 173 .prologue 0 \n\ 174 srl $16, 6, $16 \n\ 175 .align 4 \n\ 176 4: \n\ 177 ldq $0,0($17) \n\ 178 ldq $1,0($18) \n\ 179 ldq $2,0($19) \n\ 180 ldq $3,0($20) \n\ 181 \n\ 182 ldq $4,8($17) \n\ 183 ldq $5,8($18) \n\ 184 ldq $6,8($19) \n\ 185 ldq $7,8($20) \n\ 186 \n\ 187 ldq $21,16($17) \n\ 188 ldq $22,16($18) \n\ 189 ldq $23,16($19) \n\ 190 ldq $24,16($20) \n\ 191 \n\ 192 ldq $25,24($17) \n\ 193 xor $0,$1,$1 # 6 cycles from $1 load \n\ 194 ldq $27,24($18) \n\ 195 xor $2,$3,$3 # 6 cycles from $3 load \n\ 196 \n\ 197 ldq $0,24($19) \n\ 198 xor $1,$3,$3 \n\ 199 ldq $1,24($20) \n\ 200 xor $4,$5,$5 # 7 cycles from $5 load \n\ 201 \n\ 202 stq $3,0($17) \n\ 203 xor $6,$7,$7 \n\ 204 xor $21,$22,$22 # 7 cycles from $22 load \n\ 205 xor $5,$7,$7 \n\ 206 \n\ 207 stq $7,8($17) \n\ 208 xor $23,$24,$24 # 7 cycles from $24 load \n\ 209 ldq $2,32($17) \n\ 210 xor $22,$24,$24 \n\ 211 \n\ 212 ldq $3,32($18) \n\ 213 ldq $4,32($19) \n\ 214 ldq $5,32($20) \n\ 215 xor $25,$27,$27 # 8 cycles from $27 load \n\ 216 \n\ 217 ldq $6,40($17) \n\ 218 ldq $7,40($18) \n\ 219 ldq $21,40($19) \n\ 220 ldq $22,40($20) \n\ 221 \n\ 222 stq $24,16($17) \n\ 223 xor $0,$1,$1 # 9 cycles from $1 load \n\ 224 xor $2,$3,$3 # 5 cycles from $3 load \n\ 225 xor $27,$1,$1 \n\ 226 \n\ 227 stq $1,24($17) \n\ 228 xor $4,$5,$5 # 5 cycles from $5 load \n\ 229 ldq $23,48($17) \n\ 230 ldq $24,48($18) \n\ 231 \n\ 232 ldq $25,48($19) \n\ 233 xor $3,$5,$5 \n\ 234 ldq $27,48($20) \n\ 235 ldq $0,56($17) \n\ 236 \n\ 237 ldq $1,56($18) \n\ 238 ldq $2,56($19) \n\ 239 xor $6,$7,$7 # 8 cycles from $6 load \n\ 240 ldq $3,56($20) \n\ 241 \n\ 242 stq $5,32($17) \n\ 243 xor $21,$22,$22 # 8 cycles from $22 load \n\ 244 xor $7,$22,$22 \n\ 245 xor $23,$24,$24 # 5 cycles from $24 load \n\ 246 \n\ 247 stq $22,40($17) \n\ 248 xor $25,$27,$27 # 5 cycles from $27 load \n\ 249 xor $24,$27,$27 \n\ 250 xor $0,$1,$1 # 5 cycles from $1 load \n\ 251 \n\ 252 stq $27,48($17) \n\ 253 xor $2,$3,$3 # 4 cycles from $3 load \n\ 254 xor $1,$3,$3 \n\ 255 subq $16,1,$16 \n\ 256 \n\ 257 stq $3,56($17) \n\ 258 addq $20,64,$20 \n\ 259 addq $19,64,$19 \n\ 260 addq $18,64,$18 \n\ 261 \n\ 262 addq $17,64,$17 \n\ 263 bgt $16,4b \n\ 264 ret \n\ 265 .end xor_alpha_4 \n\ 266 \n\ 267 .align 3 \n\ 268 .ent xor_alpha_5 \n\ 269 xor_alpha_5: \n\ 270 .prologue 0 \n\ 271 srl $16, 6, $16 \n\ 272 .align 4 \n\ 273 5: \n\ 274 ldq $0,0($17) \n\ 275 ldq $1,0($18) \n\ 276 ldq $2,0($19) \n\ 277 ldq $3,0($20) \n\ 278 \n\ 279 ldq $4,0($21) \n\ 280 ldq $5,8($17) \n\ 281 ldq $6,8($18) \n\ 282 ldq $7,8($19) \n\ 283 \n\ 284 ldq $22,8($20) \n\ 285 ldq $23,8($21) \n\ 286 ldq $24,16($17) \n\ 287 ldq $25,16($18) \n\ 288 \n\ 289 ldq $27,16($19) \n\ 290 xor $0,$1,$1 # 6 cycles from $1 load \n\ 291 ldq $28,16($20) \n\ 292 xor $2,$3,$3 # 6 cycles from $3 load \n\ 293 \n\ 294 ldq $0,16($21) \n\ 295 xor $1,$3,$3 \n\ 296 ldq $1,24($17) \n\ 297 xor $3,$4,$4 # 7 cycles from $4 load \n\ 298 \n\ 299 stq $4,0($17) \n\ 300 xor $5,$6,$6 # 7 cycles from $6 load \n\ 301 xor $7,$22,$22 # 7 cycles from $22 load \n\ 302 xor $6,$23,$23 # 7 cycles from $23 load \n\ 303 \n\ 304 ldq $2,24($18) \n\ 305 xor $22,$23,$23 \n\ 306 ldq $3,24($19) \n\ 307 xor $24,$25,$25 # 8 cycles from $25 load \n\ 308 \n\ 309 stq $23,8($17) \n\ 310 xor $25,$27,$27 # 8 cycles from $27 load \n\ 311 ldq $4,24($20) \n\ 312 xor $28,$0,$0 # 7 cycles from $0 load \n\ 313 \n\ 314 ldq $5,24($21) \n\ 315 xor $27,$0,$0 \n\ 316 ldq $6,32($17) \n\ 317 ldq $7,32($18) \n\ 318 \n\ 319 stq $0,16($17) \n\ 320 xor $1,$2,$2 # 6 cycles from $2 load \n\ 321 ldq $22,32($19) \n\ 322 xor $3,$4,$4 # 4 cycles from $4 load \n\ 323 \n\ 324 ldq $23,32($20) \n\ 325 xor $2,$4,$4 \n\ 326 ldq $24,32($21) \n\ 327 ldq $25,40($17) \n\ 328 \n\ 329 ldq $27,40($18) \n\ 330 ldq $28,40($19) \n\ 331 ldq $0,40($20) \n\ 332 xor $4,$5,$5 # 7 cycles from $5 load \n\ 333 \n\ 334 stq $5,24($17) \n\ 335 xor $6,$7,$7 # 7 cycles from $7 load \n\ 336 ldq $1,40($21) \n\ 337 ldq $2,48($17) \n\ 338 \n\ 339 ldq $3,48($18) \n\ 340 xor $7,$22,$22 # 7 cycles from $22 load \n\ 341 ldq $4,48($19) \n\ 342 xor $23,$24,$24 # 6 cycles from $24 load \n\ 343 \n\ 344 ldq $5,48($20) \n\ 345 xor $22,$24,$24 \n\ 346 ldq $6,48($21) \n\ 347 xor $25,$27,$27 # 7 cycles from $27 load \n\ 348 \n\ 349 stq $24,32($17) \n\ 350 xor $27,$28,$28 # 8 cycles from $28 load \n\ 351 ldq $7,56($17) \n\ 352 xor $0,$1,$1 # 6 cycles from $1 load \n\ 353 \n\ 354 ldq $22,56($18) \n\ 355 ldq $23,56($19) \n\ 356 ldq $24,56($20) \n\ 357 ldq $25,56($21) \n\ 358 \n\ 359 xor $28,$1,$1 \n\ 360 xor $2,$3,$3 # 9 cycles from $3 load \n\ 361 xor $3,$4,$4 # 9 cycles from $4 load \n\ 362 xor $5,$6,$6 # 8 cycles from $6 load \n\ 363 \n\ 364 stq $1,40($17) \n\ 365 xor $4,$6,$6 \n\ 366 xor $7,$22,$22 # 7 cycles from $22 load \n\ 367 xor $23,$24,$24 # 6 cycles from $24 load \n\ 368 \n\ 369 stq $6,48($17) \n\ 370 xor $22,$24,$24 \n\ 371 subq $16,1,$16 \n\ 372 xor $24,$25,$25 # 8 cycles from $25 load \n\ 373 \n\ 374 stq $25,56($17) \n\ 375 addq $21,64,$21 \n\ 376 addq $20,64,$20 \n\ 377 addq $19,64,$19 \n\ 378 \n\ 379 addq $18,64,$18 \n\ 380 addq $17,64,$17 \n\ 381 bgt $16,5b \n\ 382 ret \n\ 383 .end xor_alpha_5 \n\ 384 \n\ 385 .align 3 \n\ 386 .ent xor_alpha_prefetch_2 \n\ 387 xor_alpha_prefetch_2: \n\ 388 .prologue 0 \n\ 389 srl $16, 6, $16 \n\ 390 \n\ 391 ldq $31, 0($17) \n\ 392 ldq $31, 0($18) \n\ 393 \n\ 394 ldq $31, 64($17) \n\ 395 ldq $31, 64($18) \n\ 396 \n\ 397 ldq $31, 128($17) \n\ 398 ldq $31, 128($18) \n\ 399 \n\ 400 ldq $31, 192($17) \n\ 401 ldq $31, 192($18) \n\ 402 .align 4 \n\ 403 2: \n\ 404 ldq $0,0($17) \n\ 405 ldq $1,0($18) \n\ 406 ldq $2,8($17) \n\ 407 ldq $3,8($18) \n\ 408 \n\ 409 ldq $4,16($17) \n\ 410 ldq $5,16($18) \n\ 411 ldq $6,24($17) \n\ 412 ldq $7,24($18) \n\ 413 \n\ 414 ldq $19,32($17) \n\ 415 ldq $20,32($18) \n\ 416 ldq $21,40($17) \n\ 417 ldq $22,40($18) \n\ 418 \n\ 419 ldq $23,48($17) \n\ 420 ldq $24,48($18) \n\ 421 ldq $25,56($17) \n\ 422 ldq $27,56($18) \n\ 423 \n\ 424 ldq $31,256($17) \n\ 425 xor $0,$1,$0 # 8 cycles from $1 load \n\ 426 ldq $31,256($18) \n\ 427 xor $2,$3,$2 \n\ 428 \n\ 429 stq $0,0($17) \n\ 430 xor $4,$5,$4 \n\ 431 stq $2,8($17) \n\ 432 xor $6,$7,$6 \n\ 433 \n\ 434 stq $4,16($17) \n\ 435 xor $19,$20,$19 \n\ 436 stq $6,24($17) \n\ 437 xor $21,$22,$21 \n\ 438 \n\ 439 stq $19,32($17) \n\ 440 xor $23,$24,$23 \n\ 441 stq $21,40($17) \n\ 442 xor $25,$27,$25 \n\ 443 \n\ 444 stq $23,48($17) \n\ 445 subq $16,1,$16 \n\ 446 stq $25,56($17) \n\ 447 addq $17,64,$17 \n\ 448 \n\ 449 addq $18,64,$18 \n\ 450 bgt $16,2b \n\ 451 ret \n\ 452 .end xor_alpha_prefetch_2 \n\ 453 \n\ 454 .align 3 \n\ 455 .ent xor_alpha_prefetch_3 \n\ 456 xor_alpha_prefetch_3: \n\ 457 .prologue 0 \n\ 458 srl $16, 6, $16 \n\ 459 \n\ 460 ldq $31, 0($17) \n\ 461 ldq $31, 0($18) \n\ 462 ldq $31, 0($19) \n\ 463 \n\ 464 ldq $31, 64($17) \n\ 465 ldq $31, 64($18) \n\ 466 ldq $31, 64($19) \n\ 467 \n\ 468 ldq $31, 128($17) \n\ 469 ldq $31, 128($18) \n\ 470 ldq $31, 128($19) \n\ 471 \n\ 472 ldq $31, 192($17) \n\ 473 ldq $31, 192($18) \n\ 474 ldq $31, 192($19) \n\ 475 .align 4 \n\ 476 3: \n\ 477 ldq $0,0($17) \n\ 478 ldq $1,0($18) \n\ 479 ldq $2,0($19) \n\ 480 ldq $3,8($17) \n\ 481 \n\ 482 ldq $4,8($18) \n\ 483 ldq $6,16($17) \n\ 484 ldq $7,16($18) \n\ 485 ldq $21,24($17) \n\ 486 \n\ 487 ldq $22,24($18) \n\ 488 ldq $24,32($17) \n\ 489 ldq $25,32($18) \n\ 490 ldq $5,8($19) \n\ 491 \n\ 492 ldq $20,16($19) \n\ 493 ldq $23,24($19) \n\ 494 ldq $27,32($19) \n\ 495 nop \n\ 496 \n\ 497 xor $0,$1,$1 # 8 cycles from $0 load \n\ 498 xor $3,$4,$4 # 7 cycles from $4 load \n\ 499 xor $6,$7,$7 # 6 cycles from $7 load \n\ 500 xor $21,$22,$22 # 5 cycles from $22 load \n\ 501 \n\ 502 xor $1,$2,$2 # 9 cycles from $2 load \n\ 503 xor $24,$25,$25 # 5 cycles from $25 load \n\ 504 stq $2,0($17) \n\ 505 xor $4,$5,$5 # 6 cycles from $5 load \n\ 506 \n\ 507 stq $5,8($17) \n\ 508 xor $7,$20,$20 # 7 cycles from $20 load \n\ 509 stq $20,16($17) \n\ 510 xor $22,$23,$23 # 7 cycles from $23 load \n\ 511 \n\ 512 stq $23,24($17) \n\ 513 xor $25,$27,$27 # 7 cycles from $27 load \n\ 514 stq $27,32($17) \n\ 515 nop \n\ 516 \n\ 517 ldq $0,40($17) \n\ 518 ldq $1,40($18) \n\ 519 ldq $3,48($17) \n\ 520 ldq $4,48($18) \n\ 521 \n\ 522 ldq $6,56($17) \n\ 523 ldq $7,56($18) \n\ 524 ldq $2,40($19) \n\ 525 ldq $5,48($19) \n\ 526 \n\ 527 ldq $20,56($19) \n\ 528 ldq $31,256($17) \n\ 529 ldq $31,256($18) \n\ 530 ldq $31,256($19) \n\ 531 \n\ 532 xor $0,$1,$1 # 6 cycles from $1 load \n\ 533 xor $3,$4,$4 # 5 cycles from $4 load \n\ 534 xor $6,$7,$7 # 5 cycles from $7 load \n\ 535 xor $1,$2,$2 # 4 cycles from $2 load \n\ 536 \n\ 537 xor $4,$5,$5 # 5 cycles from $5 load \n\ 538 xor $7,$20,$20 # 4 cycles from $20 load \n\ 539 stq $2,40($17) \n\ 540 subq $16,1,$16 \n\ 541 \n\ 542 stq $5,48($17) \n\ 543 addq $19,64,$19 \n\ 544 stq $20,56($17) \n\ 545 addq $18,64,$18 \n\ 546 \n\ 547 addq $17,64,$17 \n\ 548 bgt $16,3b \n\ 549 ret \n\ 550 .end xor_alpha_prefetch_3 \n\ 551 \n\ 552 .align 3 \n\ 553 .ent xor_alpha_prefetch_4 \n\ 554 xor_alpha_prefetch_4: \n\ 555 .prologue 0 \n\ 556 srl $16, 6, $16 \n\ 557 \n\ 558 ldq $31, 0($17) \n\ 559 ldq $31, 0($18) \n\ 560 ldq $31, 0($19) \n\ 561 ldq $31, 0($20) \n\ 562 \n\ 563 ldq $31, 64($17) \n\ 564 ldq $31, 64($18) \n\ 565 ldq $31, 64($19) \n\ 566 ldq $31, 64($20) \n\ 567 \n\ 568 ldq $31, 128($17) \n\ 569 ldq $31, 128($18) \n\ 570 ldq $31, 128($19) \n\ 571 ldq $31, 128($20) \n\ 572 \n\ 573 ldq $31, 192($17) \n\ 574 ldq $31, 192($18) \n\ 575 ldq $31, 192($19) \n\ 576 ldq $31, 192($20) \n\ 577 .align 4 \n\ 578 4: \n\ 579 ldq $0,0($17) \n\ 580 ldq $1,0($18) \n\ 581 ldq $2,0($19) \n\ 582 ldq $3,0($20) \n\ 583 \n\ 584 ldq $4,8($17) \n\ 585 ldq $5,8($18) \n\ 586 ldq $6,8($19) \n\ 587 ldq $7,8($20) \n\ 588 \n\ 589 ldq $21,16($17) \n\ 590 ldq $22,16($18) \n\ 591 ldq $23,16($19) \n\ 592 ldq $24,16($20) \n\ 593 \n\ 594 ldq $25,24($17) \n\ 595 xor $0,$1,$1 # 6 cycles from $1 load \n\ 596 ldq $27,24($18) \n\ 597 xor $2,$3,$3 # 6 cycles from $3 load \n\ 598 \n\ 599 ldq $0,24($19) \n\ 600 xor $1,$3,$3 \n\ 601 ldq $1,24($20) \n\ 602 xor $4,$5,$5 # 7 cycles from $5 load \n\ 603 \n\ 604 stq $3,0($17) \n\ 605 xor $6,$7,$7 \n\ 606 xor $21,$22,$22 # 7 cycles from $22 load \n\ 607 xor $5,$7,$7 \n\ 608 \n\ 609 stq $7,8($17) \n\ 610 xor $23,$24,$24 # 7 cycles from $24 load \n\ 611 ldq $2,32($17) \n\ 612 xor $22,$24,$24 \n\ 613 \n\ 614 ldq $3,32($18) \n\ 615 ldq $4,32($19) \n\ 616 ldq $5,32($20) \n\ 617 xor $25,$27,$27 # 8 cycles from $27 load \n\ 618 \n\ 619 ldq $6,40($17) \n\ 620 ldq $7,40($18) \n\ 621 ldq $21,40($19) \n\ 622 ldq $22,40($20) \n\ 623 \n\ 624 stq $24,16($17) \n\ 625 xor $0,$1,$1 # 9 cycles from $1 load \n\ 626 xor $2,$3,$3 # 5 cycles from $3 load \n\ 627 xor $27,$1,$1 \n\ 628 \n\ 629 stq $1,24($17) \n\ 630 xor $4,$5,$5 # 5 cycles from $5 load \n\ 631 ldq $23,48($17) \n\ 632 xor $3,$5,$5 \n\ 633 \n\ 634 ldq $24,48($18) \n\ 635 ldq $25,48($19) \n\ 636 ldq $27,48($20) \n\ 637 ldq $0,56($17) \n\ 638 \n\ 639 ldq $1,56($18) \n\ 640 ldq $2,56($19) \n\ 641 ldq $3,56($20) \n\ 642 xor $6,$7,$7 # 8 cycles from $6 load \n\ 643 \n\ 644 ldq $31,256($17) \n\ 645 xor $21,$22,$22 # 8 cycles from $22 load \n\ 646 ldq $31,256($18) \n\ 647 xor $7,$22,$22 \n\ 648 \n\ 649 ldq $31,256($19) \n\ 650 xor $23,$24,$24 # 6 cycles from $24 load \n\ 651 ldq $31,256($20) \n\ 652 xor $25,$27,$27 # 6 cycles from $27 load \n\ 653 \n\ 654 stq $5,32($17) \n\ 655 xor $24,$27,$27 \n\ 656 xor $0,$1,$1 # 7 cycles from $1 load \n\ 657 xor $2,$3,$3 # 6 cycles from $3 load \n\ 658 \n\ 659 stq $22,40($17) \n\ 660 xor $1,$3,$3 \n\ 661 stq $27,48($17) \n\ 662 subq $16,1,$16 \n\ 663 \n\ 664 stq $3,56($17) \n\ 665 addq $20,64,$20 \n\ 666 addq $19,64,$19 \n\ 667 addq $18,64,$18 \n\ 668 \n\ 669 addq $17,64,$17 \n\ 670 bgt $16,4b \n\ 671 ret \n\ 672 .end xor_alpha_prefetch_4 \n\ 673 \n\ 674 .align 3 \n\ 675 .ent xor_alpha_prefetch_5 \n\ 676 xor_alpha_prefetch_5: \n\ 677 .prologue 0 \n\ 678 srl $16, 6, $16 \n\ 679 \n\ 680 ldq $31, 0($17) \n\ 681 ldq $31, 0($18) \n\ 682 ldq $31, 0($19) \n\ 683 ldq $31, 0($20) \n\ 684 ldq $31, 0($21) \n\ 685 \n\ 686 ldq $31, 64($17) \n\ 687 ldq $31, 64($18) \n\ 688 ldq $31, 64($19) \n\ 689 ldq $31, 64($20) \n\ 690 ldq $31, 64($21) \n\ 691 \n\ 692 ldq $31, 128($17) \n\ 693 ldq $31, 128($18) \n\ 694 ldq $31, 128($19) \n\ 695 ldq $31, 128($20) \n\ 696 ldq $31, 128($21) \n\ 697 \n\ 698 ldq $31, 192($17) \n\ 699 ldq $31, 192($18) \n\ 700 ldq $31, 192($19) \n\ 701 ldq $31, 192($20) \n\ 702 ldq $31, 192($21) \n\ 703 .align 4 \n\ 704 5: \n\ 705 ldq $0,0($17) \n\ 706 ldq $1,0($18) \n\ 707 ldq $2,0($19) \n\ 708 ldq $3,0($20) \n\ 709 \n\ 710 ldq $4,0($21) \n\ 711 ldq $5,8($17) \n\ 712 ldq $6,8($18) \n\ 713 ldq $7,8($19) \n\ 714 \n\ 715 ldq $22,8($20) \n\ 716 ldq $23,8($21) \n\ 717 ldq $24,16($17) \n\ 718 ldq $25,16($18) \n\ 719 \n\ 720 ldq $27,16($19) \n\ 721 xor $0,$1,$1 # 6 cycles from $1 load \n\ 722 ldq $28,16($20) \n\ 723 xor $2,$3,$3 # 6 cycles from $3 load \n\ 724 \n\ 725 ldq $0,16($21) \n\ 726 xor $1,$3,$3 \n\ 727 ldq $1,24($17) \n\ 728 xor $3,$4,$4 # 7 cycles from $4 load \n\ 729 \n\ 730 stq $4,0($17) \n\ 731 xor $5,$6,$6 # 7 cycles from $6 load \n\ 732 xor $7,$22,$22 # 7 cycles from $22 load \n\ 733 xor $6,$23,$23 # 7 cycles from $23 load \n\ 734 \n\ 735 ldq $2,24($18) \n\ 736 xor $22,$23,$23 \n\ 737 ldq $3,24($19) \n\ 738 xor $24,$25,$25 # 8 cycles from $25 load \n\ 739 \n\ 740 stq $23,8($17) \n\ 741 xor $25,$27,$27 # 8 cycles from $27 load \n\ 742 ldq $4,24($20) \n\ 743 xor $28,$0,$0 # 7 cycles from $0 load \n\ 744 \n\ 745 ldq $5,24($21) \n\ 746 xor $27,$0,$0 \n\ 747 ldq $6,32($17) \n\ 748 ldq $7,32($18) \n\ 749 \n\ 750 stq $0,16($17) \n\ 751 xor $1,$2,$2 # 6 cycles from $2 load \n\ 752 ldq $22,32($19) \n\ 753 xor $3,$4,$4 # 4 cycles from $4 load \n\ 754 \n\ 755 ldq $23,32($20) \n\ 756 xor $2,$4,$4 \n\ 757 ldq $24,32($21) \n\ 758 ldq $25,40($17) \n\ 759 \n\ 760 ldq $27,40($18) \n\ 761 ldq $28,40($19) \n\ 762 ldq $0,40($20) \n\ 763 xor $4,$5,$5 # 7 cycles from $5 load \n\ 764 \n\ 765 stq $5,24($17) \n\ 766 xor $6,$7,$7 # 7 cycles from $7 load \n\ 767 ldq $1,40($21) \n\ 768 ldq $2,48($17) \n\ 769 \n\ 770 ldq $3,48($18) \n\ 771 xor $7,$22,$22 # 7 cycles from $22 load \n\ 772 ldq $4,48($19) \n\ 773 xor $23,$24,$24 # 6 cycles from $24 load \n\ 774 \n\ 775 ldq $5,48($20) \n\ 776 xor $22,$24,$24 \n\ 777 ldq $6,48($21) \n\ 778 xor $25,$27,$27 # 7 cycles from $27 load \n\ 779 \n\ 780 stq $24,32($17) \n\ 781 xor $27,$28,$28 # 8 cycles from $28 load \n\ 782 ldq $7,56($17) \n\ 783 xor $0,$1,$1 # 6 cycles from $1 load \n\ 784 \n\ 785 ldq $22,56($18) \n\ 786 ldq $23,56($19) \n\ 787 ldq $24,56($20) \n\ 788 ldq $25,56($21) \n\ 789 \n\ 790 ldq $31,256($17) \n\ 791 xor $28,$1,$1 \n\ 792 ldq $31,256($18) \n\ 793 xor $2,$3,$3 # 9 cycles from $3 load \n\ 794 \n\ 795 ldq $31,256($19) \n\ 796 xor $3,$4,$4 # 9 cycles from $4 load \n\ 797 ldq $31,256($20) \n\ 798 xor $5,$6,$6 # 8 cycles from $6 load \n\ 799 \n\ 800 stq $1,40($17) \n\ 801 xor $4,$6,$6 \n\ 802 xor $7,$22,$22 # 7 cycles from $22 load \n\ 803 xor $23,$24,$24 # 6 cycles from $24 load \n\ 804 \n\ 805 stq $6,48($17) \n\ 806 xor $22,$24,$24 \n\ 807 ldq $31,256($21) \n\ 808 xor $24,$25,$25 # 8 cycles from $25 load \n\ 809 \n\ 810 stq $25,56($17) \n\ 811 subq $16,1,$16 \n\ 812 addq $21,64,$21 \n\ 813 addq $20,64,$20 \n\ 814 \n\ 815 addq $19,64,$19 \n\ 816 addq $18,64,$18 \n\ 817 addq $17,64,$17 \n\ 818 bgt $16,5b \n\ 819 \n\ 820 ret \n\ 821 .end xor_alpha_prefetch_5 \n\ 822 "); 823 824 static struct xor_block_template xor_block_alpha = { 825 .name = "alpha", 826 .do_2 = xor_alpha_2, 827 .do_3 = xor_alpha_3, 828 .do_4 = xor_alpha_4, 829 .do_5 = xor_alpha_5, 830 }; 831 832 static struct xor_block_template xor_block_alpha_prefetch = { 833 .name = "alpha prefetch", 834 .do_2 = xor_alpha_prefetch_2, 835 .do_3 = xor_alpha_prefetch_3, 836 .do_4 = xor_alpha_prefetch_4, 837 .do_5 = xor_alpha_prefetch_5, 838 }; 839 840 /* For grins, also test the generic routines. */ 841 #include <asm-generic/xor.h> 842 843 #undef XOR_TRY_TEMPLATES 844 #define XOR_TRY_TEMPLATES \ 845 do { \ 846 xor_speed(&xor_block_8regs); \ 847 xor_speed(&xor_block_32regs); \ 848 xor_speed(&xor_block_alpha); \ 849 xor_speed(&xor_block_alpha_prefetch); \ 850 } while (0) 851 852 /* Force the use of alpha_prefetch if EV6, as it is significantly 853 faster in the cold cache case. */ 854 #define XOR_SELECT_TEMPLATE(FASTEST) \ 855 (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) 856