Lines Matching +full:base +full:- +full:64

1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* NG2memcpy.S: Niagara-2 optimized memcpy.
140 #define FREG_LOAD_1(base, x0) \ argument
141 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
142 #define FREG_LOAD_2(base, x0, x1) \ argument
143 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
144 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
145 #define FREG_LOAD_3(base, x0, x1, x2) \ argument
146 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
147 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
148 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
149 #define FREG_LOAD_4(base, x0, x1, x2, x3) \ argument
150 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
151 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
152 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
153 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
154 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ argument
155 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
156 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
157 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
158 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
159 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
160 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ argument
161 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
162 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
163 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
164 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
165 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
166 EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
167 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ argument
168 EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
169 EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
170 EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
171 EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
172 EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
173 EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
174 EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
223 add %g1, 64, %g1
245 .align 64
265 * the destination to a 64-byte boundary which can chew up
266 * to (64 - 1) bytes from the length before we perform the
269 * However, the cut-off point, performance wise, is around
270 * 4 64-byte blocks.
272 cmp %o2, (4 * 64)
289 /* Align destination on 64-byte boundary. */
290 andcc %o0, (64 - 1), %o4
292 sub %o4, 64, %o4
311 add %o1, (64 - 1), %o4
312 andn %o4, (64 - 1), %o4
313 andn %o2, (64 - 1), %g1
316 and %o1, (64 - 1), %g2
349 110: sub %o4, 64, %g2
356 subcc %g1, 64, %g1
357 add %o4, 64, %o4
359 LOAD(prefetch, %o4 + 64, #one_read)
370 subcc %g1, 64, %g1
371 add %o4, 64, %o4
373 LOAD(prefetch, %o4 + 64, #one_read)
384 subcc %g1, 64, %g1
385 add %o4, 64, %o4
387 LOAD(prefetch, %o4 + 64, #one_read)
398 subcc %g1, 64, %g1
399 add %o4, 64, %o4
401 LOAD(prefetch, %o4 + 64, #one_read)
412 subcc %g1, 64, %g1
413 add %o4, 64, %o4
415 LOAD(prefetch, %o4 + 64, #one_read)
426 subcc %g1, 64, %g1
427 add %o4, 64, %o4
429 LOAD(prefetch, %o4 + 64, #one_read)
440 subcc %g1, 64, %g1
441 add %o4, 64, %o4
443 LOAD(prefetch, %o4 + 64, #one_read)
454 subcc %g1, 64, %g1
455 add %o4, 64, %o4
457 LOAD(prefetch, %o4 + 64, #one_read)
463 subcc %g1, 64, %g1
466 add %o4, 64, %o4
468 LOAD(prefetch, %o4 + 64, #one_read)
484 .align 64
485 75: /* 16 < len <= 64 */
545 8: mov 64, GLOBAL_SPARE
568 .align 64
594 .size FUNC_NAME, .-FUNC_NAME