From af5c13b01ecc416d26321a2d60943d787ba24c7f Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sun, 16 Feb 2020 08:36:51 -0800 Subject: [PATCH] x86: Don't disable SSE4a when disabling SSE4 commit 7deea9aad8 changed nosse4 to include CpuSSE4a. But AMD SSE4a is a superset of SSE3 and Intel SSE4 is a superset of SSSE3. Disable Intel SSE4 shouldn't disable AMD SSE4a. This patch restores nosse4. It also adds .sse4a and nosse4a. gas/ * config/tc-i386.c (cpu_arch): Add .sse4a and nosse4a. Restore nosse4. * doc/c-i386.texi: Document sse4a and nosse4a. opcodes/ * i386-gen.c (cpu_flag_init): Add CPU_ANY_SSE4A_FLAGS. Remove CPU_ANY_SSE4_FLAGS. --- gas/ChangeLog | 6 ++++++ gas/config/tc-i386.c | 5 ++++- gas/doc/c-i386.texi | 4 +++- opcodes/ChangeLog | 5 +++++ opcodes/i386-gen.c | 4 ++-- opcodes/i386-init.h | 4 ++-- 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index e30c0ddc00..a148526c5c 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,9 @@ +2020-02-16 H.J. Lu + + * config/tc-i386.c (cpu_arch): Add .sse4a and nosse4a. Restore + nosse4. + * doc/c-i386.texi: Document sse4a and nosse4a. + 2020-02-14 H.J. Lu * doc/c-i386.texi: Remove the old movsx and movzx documentation diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 9e59ecaedf..6cc7696fb5 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -983,6 +983,8 @@ static const arch_entry cpu_arch[] = CPU_SSE2_FLAGS, 0 }, { STRING_COMMA_LEN (".sse3"), PROCESSOR_UNKNOWN, CPU_SSE3_FLAGS, 0 }, + { STRING_COMMA_LEN (".sse4a"), PROCESSOR_UNKNOWN, + CPU_SSE4A_FLAGS, 0 }, { STRING_COMMA_LEN (".ssse3"), PROCESSOR_UNKNOWN, CPU_SSSE3_FLAGS, 0 }, { STRING_COMMA_LEN (".sse4.1"), PROCESSOR_UNKNOWN, @@ -1177,10 +1179,11 @@ static const noarch_entry cpu_noarch[] = { STRING_COMMA_LEN ("nosse"), CPU_ANY_SSE_FLAGS }, { STRING_COMMA_LEN ("nosse2"), CPU_ANY_SSE2_FLAGS }, { STRING_COMMA_LEN ("nosse3"), CPU_ANY_SSE3_FLAGS }, + { STRING_COMMA_LEN ("nosse4a"), CPU_ANY_SSE4A_FLAGS }, { STRING_COMMA_LEN ("nossse3"), CPU_ANY_SSSE3_FLAGS }, { STRING_COMMA_LEN ("nosse4.1"), CPU_ANY_SSE4_1_FLAGS }, { STRING_COMMA_LEN ("nosse4.2"), CPU_ANY_SSE4_2_FLAGS }, - { STRING_COMMA_LEN ("nosse4"), CPU_ANY_SSE4_FLAGS }, + { STRING_COMMA_LEN ("nosse4"), CPU_ANY_SSE4_1_FLAGS }, { STRING_COMMA_LEN ("noavx"), CPU_ANY_AVX_FLAGS }, { STRING_COMMA_LEN ("noavx2"), CPU_ANY_AVX2_FLAGS }, { STRING_COMMA_LEN ("noavx512f"), CPU_ANY_AVX512F_FLAGS }, diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index 235a3951db..91586cd999 100644 --- a/gas/doc/c-i386.texi +++ b/gas/doc/c-i386.texi @@ -151,6 +151,7 @@ accept various extension mnemonics. For example, @code{sse}, @code{sse2}, @code{sse3}, +@code{sse4a}, @code{ssse3}, @code{sse4.1}, @code{sse4.2}, @@ -158,6 +159,7 @@ accept various extension mnemonics. For example, @code{nosse}, @code{nosse2}, @code{nosse3}, +@code{nosse4a}, @code{nossse3}, @code{nosse4.1}, @code{nosse4.2}, @@ -1428,7 +1430,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are: @item @samp{bdver4} @tab @samp{znver1} @tab @samp{znver2} @tab @samp{btver1} @item @samp{btver2} @tab @samp{generic32} @tab @samp{generic64} @item @samp{.cmov} @tab @samp{.fxsr} @tab @samp{.mmx} -@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} +@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} @samp{.sse4a} @item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4} @item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept} @item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt} diff --git a/opcodes/ChangeLog b/opcodes/ChangeLog index 103c508be0..6eeddc7f0f 100644 --- a/opcodes/ChangeLog +++ b/opcodes/ChangeLog @@ -1,3 +1,8 @@ +2020-02-16 H.J. Lu + + * i386-gen.c (cpu_flag_init): Add CPU_ANY_SSE4A_FLAGS. Remove + CPU_ANY_SSE4_FLAGS. + 2020-02-14 H.J. Lu * i386-opc.tbl (movsx): Remove Intel syntax comments. diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c index 79f4cc9d25..45106bcf6d 100644 --- a/opcodes/i386-gen.c +++ b/opcodes/i386-gen.c @@ -326,6 +326,8 @@ static initializer cpu_flag_init[] = { "CPU_ANY_SSE2_FLAGS", "CPU_ANY_SSE3_FLAGS|CpuSSE2" }, { "CPU_ANY_SSE3_FLAGS", + { "CPU_ANY_SSE4A_FLAGS", + "CPU_ANY_SSE3_FLAGS|CpuSSE4a" }, "CPU_ANY_SSSE3_FLAGS|CpuSSE3|CpuSSE4a" }, { "CPU_ANY_SSSE3_FLAGS", "CPU_ANY_SSE4_1_FLAGS|CpuSSSE3" }, @@ -333,8 +335,6 @@ static initializer cpu_flag_init[] = "CPU_ANY_SSE4_2_FLAGS|CpuSSE4_1" }, { "CPU_ANY_SSE4_2_FLAGS", "CpuSSE4_2" }, - { "CPU_ANY_SSE4_FLAGS", - "CPU_ANY_SSE4_1_FLAGS|CpuSSE4a" }, { "CPU_ANY_AVX_FLAGS", "CPU_ANY_AVX2_FLAGS|CpuF16C|CpuFMA|CpuFMA4|CpuXOP|CpuAVX" }, { "CPU_ANY_AVX2_FLAGS", diff --git a/opcodes/i386-init.h b/opcodes/i386-init.h index 8ecf117196..d4674fc02a 100644 --- a/opcodes/i386-init.h +++ b/opcodes/i386-init.h @@ -1170,9 +1170,9 @@ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } -#define CPU_ANY_SSE4_FLAGS \ +#define CPU_ANY_SSE4A_FLAGS \ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ -- 2.34.1