Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
038b0a6d | 2 | |
8d379dad | 3 | #include <linux/linkage.h> |
cd4d09ec | 4 | #include <asm/cpufeatures.h> |
59e97e4d | 5 | #include <asm/alternative-asm.h> |
8d379dad | 6 | |
090a3f61 BP |
7 | /* |
8 | * Some CPUs run faster using the string copy instructions (sane microcode). | |
9 | * It is also a lot simpler. Use this when possible. But, don't use streaming | |
10 | * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the | |
11 | * prefetch distance based on SMP/UP. | |
12 | */ | |
8d379dad | 13 | ALIGN |
090a3f61 | 14 | ENTRY(copy_page) |
090a3f61 | 15 | ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD |
269833bd ML |
16 | movl $4096/8, %ecx |
17 | rep movsq | |
8d379dad | 18 | ret |
090a3f61 | 19 | ENDPROC(copy_page) |
1da177e4 | 20 | |
090a3f61 | 21 | ENTRY(copy_page_regs) |
269833bd | 22 | subq $2*8, %rsp |
269833bd | 23 | movq %rbx, (%rsp) |
269833bd | 24 | movq %r12, 1*8(%rsp) |
7bcd3f34 | 25 | |
269833bd | 26 | movl $(4096/64)-5, %ecx |
7bcd3f34 AK |
27 | .p2align 4 |
28 | .Loop64: | |
269833bd ML |
29 | dec %rcx |
30 | movq 0x8*0(%rsi), %rax | |
31 | movq 0x8*1(%rsi), %rbx | |
32 | movq 0x8*2(%rsi), %rdx | |
33 | movq 0x8*3(%rsi), %r8 | |
34 | movq 0x8*4(%rsi), %r9 | |
35 | movq 0x8*5(%rsi), %r10 | |
36 | movq 0x8*6(%rsi), %r11 | |
37 | movq 0x8*7(%rsi), %r12 | |
7bcd3f34 AK |
38 | |
39 | prefetcht0 5*64(%rsi) | |
40 | ||
269833bd ML |
41 | movq %rax, 0x8*0(%rdi) |
42 | movq %rbx, 0x8*1(%rdi) | |
43 | movq %rdx, 0x8*2(%rdi) | |
44 | movq %r8, 0x8*3(%rdi) | |
45 | movq %r9, 0x8*4(%rdi) | |
46 | movq %r10, 0x8*5(%rdi) | |
47 | movq %r11, 0x8*6(%rdi) | |
48 | movq %r12, 0x8*7(%rdi) | |
7bcd3f34 | 49 | |
269833bd ML |
50 | leaq 64 (%rsi), %rsi |
51 | leaq 64 (%rdi), %rdi | |
7bcd3f34 | 52 | |
269833bd | 53 | jnz .Loop64 |
7bcd3f34 | 54 | |
269833bd | 55 | movl $5, %ecx |
7bcd3f34 AK |
56 | .p2align 4 |
57 | .Loop2: | |
269833bd ML |
58 | decl %ecx |
59 | ||
60 | movq 0x8*0(%rsi), %rax | |
61 | movq 0x8*1(%rsi), %rbx | |
62 | movq 0x8*2(%rsi), %rdx | |
63 | movq 0x8*3(%rsi), %r8 | |
64 | movq 0x8*4(%rsi), %r9 | |
65 | movq 0x8*5(%rsi), %r10 | |
66 | movq 0x8*6(%rsi), %r11 | |
67 | movq 0x8*7(%rsi), %r12 | |
68 | ||
69 | movq %rax, 0x8*0(%rdi) | |
70 | movq %rbx, 0x8*1(%rdi) | |
71 | movq %rdx, 0x8*2(%rdi) | |
72 | movq %r8, 0x8*3(%rdi) | |
73 | movq %r9, 0x8*4(%rdi) | |
74 | movq %r10, 0x8*5(%rdi) | |
75 | movq %r11, 0x8*6(%rdi) | |
76 | movq %r12, 0x8*7(%rdi) | |
77 | ||
78 | leaq 64(%rdi), %rdi | |
79 | leaq 64(%rsi), %rsi | |
7bcd3f34 AK |
80 | jnz .Loop2 |
81 | ||
269833bd | 82 | movq (%rsp), %rbx |
269833bd | 83 | movq 1*8(%rsp), %r12 |
269833bd | 84 | addq $2*8, %rsp |
7bcd3f34 | 85 | ret |
090a3f61 | 86 | ENDPROC(copy_page_regs) |