| 1 | #PIII SIMD instructions |
| 2 | |
| 3 | .text |
| 4 | foo: |
| 5 | addps (%ecx),%xmm0 |
| 6 | addps %xmm2,%xmm1 |
| 7 | addss (%ebx),%xmm2 |
| 8 | addss %xmm4,%xmm3 |
| 9 | andnps 0x0(%ebp),%xmm4 |
| 10 | andnps %xmm6,%xmm5 |
| 11 | andps (%edi),%xmm6 |
| 12 | andps %xmm0,%xmm7 |
| 13 | cmpps $0x2,%xmm1,%xmm0 |
| 14 | cmpps $0x3,(%edx),%xmm1 |
| 15 | cmpss $0x4,%xmm2,%xmm2 |
| 16 | cmpss $0x5,(%esp,1),%xmm3 |
| 17 | cmpps $0x6,%xmm5,%xmm4 |
| 18 | cmpps $0x7,(%esi),%xmm5 |
| 19 | cmpss $0x0,%xmm7,%xmm6 |
| 20 | cmpss $0x1,(%eax),%xmm7 |
| 21 | cmpeqps %xmm1,%xmm0 |
| 22 | cmpeqps (%edx),%xmm1 |
| 23 | cmpeqss %xmm2,%xmm2 |
| 24 | cmpeqss (%esp,1),%xmm3 |
| 25 | cmpltps %xmm5,%xmm4 |
| 26 | cmpltps (%esi),%xmm5 |
| 27 | cmpltss %xmm7,%xmm6 |
| 28 | cmpltss (%eax),%xmm7 |
| 29 | cmpleps (%ecx),%xmm0 |
| 30 | cmpleps %xmm2,%xmm1 |
| 31 | cmpless (%ebx),%xmm2 |
| 32 | cmpless %xmm4,%xmm3 |
| 33 | cmpunordps 0x0(%ebp),%xmm4 |
| 34 | cmpunordps %xmm6,%xmm5 |
| 35 | cmpunordss (%edi),%xmm6 |
| 36 | cmpunordss %xmm0,%xmm7 |
| 37 | cmpneqps %xmm1,%xmm0 |
| 38 | cmpneqps (%edx),%xmm1 |
| 39 | cmpneqss %xmm2,%xmm2 |
| 40 | cmpneqss (%esp,1),%xmm3 |
| 41 | cmpnltps %xmm5,%xmm4 |
| 42 | cmpnltps (%esi),%xmm5 |
| 43 | cmpnltss %xmm7,%xmm6 |
| 44 | cmpnltss (%eax),%xmm7 |
| 45 | cmpnleps (%ecx),%xmm0 |
| 46 | cmpnleps %xmm2,%xmm1 |
| 47 | cmpnless (%ebx),%xmm2 |
| 48 | cmpnless %xmm4,%xmm3 |
| 49 | cmpordps 0x0(%ebp),%xmm4 |
| 50 | cmpordps %xmm6,%xmm5 |
| 51 | cmpordss (%edi),%xmm6 |
| 52 | cmpordss %xmm0,%xmm7 |
| 53 | comiss %xmm1,%xmm0 |
| 54 | comiss (%edx),%xmm1 |
| 55 | cvtpi2ps %mm3,%xmm2 |
| 56 | cvtpi2ps (%esp,1),%xmm3 |
| 57 | cvtsi2ss %ebp,%xmm4 |
| 58 | cvtsi2ss (%esi),%xmm5 |
| 59 | cvtps2pi %xmm7,%mm6 |
| 60 | cvtps2pi (%eax),%mm7 |
| 61 | cvtss2si (%ecx),%eax |
| 62 | cvtss2si %xmm2,%ecx |
| 63 | cvttps2pi (%ebx),%mm2 |
| 64 | cvttps2pi %xmm4,%mm3 |
| 65 | cvttss2si 0x0(%ebp),%esp |
| 66 | cvttss2si %xmm6,%ebp |
| 67 | divps %xmm1,%xmm0 |
| 68 | divps (%edx),%xmm1 |
| 69 | divss %xmm3,%xmm2 |
| 70 | divss (%esp,1),%xmm3 |
| 71 | ldmxcsr 0x0(%ebp) |
| 72 | stmxcsr (%esi) |
| 73 | sfence |
| 74 | maxps %xmm1,%xmm0 |
| 75 | maxps (%edx),%xmm1 |
| 76 | maxss %xmm3,%xmm2 |
| 77 | maxss (%esp,1),%xmm3 |
| 78 | minps %xmm5,%xmm4 |
| 79 | minps (%esi),%xmm5 |
| 80 | minss %xmm7,%xmm6 |
| 81 | minss (%eax),%xmm7 |
| 82 | movaps %xmm1,%xmm0 |
| 83 | movaps %xmm2,(%ecx) |
| 84 | movaps (%edx),%xmm2 |
| 85 | movlhps %xmm4,%xmm3 |
| 86 | movhps %xmm5,(%esp,1) |
| 87 | movhps (%esi),%xmm5 |
| 88 | movhlps %xmm7,%xmm6 |
| 89 | movlps %xmm0,(%edi) |
| 90 | movlps (%eax),%xmm0 |
| 91 | movmskps %xmm2,%ecx |
| 92 | movups %xmm3,%xmm2 |
| 93 | movups %xmm4,(%edx) |
| 94 | movups 0x0(%ebp),%xmm4 |
| 95 | movss %xmm6,%xmm5 |
| 96 | movss %xmm7,(%esi) |
| 97 | movss (%eax),%xmm7 |
| 98 | mulps %xmm1,%xmm0 |
| 99 | mulps (%edx),%xmm1 |
| 100 | mulss %xmm2,%xmm2 |
| 101 | mulss (%esp,1),%xmm3 |
| 102 | orps %xmm5,%xmm4 |
| 103 | orps (%esi),%xmm5 |
| 104 | rcpps %xmm7,%xmm6 |
| 105 | rcpps (%eax),%xmm7 |
| 106 | rcpss (%ecx),%xmm0 |
| 107 | rcpss %xmm2,%xmm1 |
| 108 | rsqrtps (%ebx),%xmm2 |
| 109 | rsqrtps %xmm4,%xmm3 |
| 110 | rsqrtss 0x0(%ebp),%xmm4 |
| 111 | rsqrtss %xmm6,%xmm5 |
| 112 | shufps $0x2,(%edi),%xmm6 |
| 113 | shufps $0x3,%xmm0,%xmm7 |
| 114 | sqrtps %xmm1,%xmm0 |
| 115 | sqrtps (%edx),%xmm1 |
| 116 | sqrtss %xmm2,%xmm2 |
| 117 | sqrtss (%esp,1),%xmm3 |
| 118 | subps %xmm5,%xmm4 |
| 119 | subps (%esi),%xmm5 |
| 120 | subss %xmm7,%xmm6 |
| 121 | subss (%eax),%xmm7 |
| 122 | ucomiss (%ecx),%xmm0 |
| 123 | ucomiss %xmm2,%xmm1 |
| 124 | unpckhps (%ebx),%xmm2 |
| 125 | unpckhps %xmm4,%xmm3 |
| 126 | unpcklps 0x0(%ebp),%xmm4 |
| 127 | unpcklps %xmm6,%xmm5 |
| 128 | xorps (%edi),%xmm6 |
| 129 | xorps %xmm0,%xmm7 |
| 130 | pavgb %mm1,%mm0 |
| 131 | pavgb (%edx),%mm1 |
| 132 | pavgw %mm3,%mm2 |
| 133 | pavgw (%esp,1),%mm3 |
| 134 | pextrw $0x0,%mm1,%eax |
| 135 | pinsrw $0x1,(%ecx),%mm1 |
| 136 | pinsrw $0x2,%edx,%mm2 |
| 137 | pmaxsw %mm1,%mm0 |
| 138 | pmaxsw (%edx),%mm1 |
| 139 | pmaxub %mm2,%mm2 |
| 140 | pmaxub (%esp,1),%mm3 |
| 141 | pminsw %mm5,%mm4 |
| 142 | pminsw (%esi),%mm5 |
| 143 | pminub %mm7,%mm6 |
| 144 | pminub (%eax),%mm7 |
| 145 | pmovmskb %mm5,%eax |
| 146 | pmulhuw %mm5,%mm4 |
| 147 | pmulhuw (%esi),%mm5 |
| 148 | psadbw %mm7,%mm6 |
| 149 | psadbw (%eax),%mm7 |
| 150 | pshufw $0x1,%mm2,%mm3 |
| 151 | pshufw $0x4,0x0(%ebp),%mm6 |
| 152 | maskmovq %mm7,%mm0 |
| 153 | movntps %xmm6,(%ebx) |
| 154 | movntq %mm2,(%eax) |
| 155 | prefetchnta (%esi) |
| 156 | prefetcht0 (%eax,%ebx,4) |
| 157 | prefetcht1 (%edx) |
| 158 | prefetcht2 (%ecx) |
| 159 | |
| 160 | # A bad sfence modrm byte |
| 161 | .byte 0x65,0x0F,0xAE,0xff |
| 162 | # Pad out to good alignment |
| 163 | .p2align 4,0 |