cpu,gas,opcodes: remove no longer needed workaround from the BPF port

[deliverable/binutils-gdb.git] / gas / doc / c-i386.texi
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi

index a6e9d0400b614d9bbce120c7b81d12d878503236..551512f2a99885757ece662e12294f79a02e2021 100644 (file)
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -1,4 +1,4 @@
-@c Copyright (C) 1991-2018 Free Software Foundation, Inc.
+@c Copyright (C) 1991-2020 Free Software Foundation, Inc.
  @c This is part of the GAS manual.
  @c For copying conditions, see the file as.texinfo.
  @c man end
  @c This is part of the GAS manual.
  @c For copying conditions, see the file as.texinfo.
  @c man end
@@ -37,6 +37,7 @@ extending the Intel architecture to 64-bits.
  * i386-TBM::                    AMD's Trailing Bit Manipulation Instructions
  * i386-16bit::                  Writing 16-bit Code
  * i386-Arch::                   Specifying an x86 CPU architecture
  * i386-TBM::                    AMD's Trailing Bit Manipulation Instructions
  * i386-16bit::                  Writing 16-bit Code
  * i386-Arch::                   Specifying an x86 CPU architecture
+* i386-ISA::                    AMD64 ISA vs. Intel64 ISA
  * i386-Bugs::                   AT&T Syntax bugs
  * i386-Notes::                  Notes
  @end menu
  * i386-Bugs::                   AT&T Syntax bugs
  * i386-Notes::                  Notes
  @end menu
@@ -123,6 +124,7 @@ processor names are recognized:
  @code{bdver3},
  @code{bdver4},
  @code{znver1},
  @code{bdver3},
  @code{bdver4},
  @code{znver1},
+@code{znver2},
  @code{btver1},
  @code{btver2},
  @code{generic32} and
  @code{btver1},
  @code{btver2},
  @code{generic32} and
@@ -140,11 +142,16 @@ accept various extension mnemonics.  For example,
  @code{no287},
  @code{no387},
  @code{no687},
  @code{no287},
  @code{no387},
  @code{no687},
+@code{cmov},
+@code{nocmov},
+@code{fxsr},
+@code{nofxsr},
  @code{mmx},
  @code{nommx},
  @code{sse},
  @code{sse2},
  @code{sse3},
  @code{mmx},
  @code{nommx},
  @code{sse},
  @code{sse2},
  @code{sse3},
+@code{sse4a},
  @code{ssse3},
  @code{sse4.1},
  @code{sse4.2},
  @code{ssse3},
  @code{sse4.1},
  @code{sse4.2},
@@ -152,6 +159,7 @@ accept various extension mnemonics.  For example,
  @code{nosse},
  @code{nosse2},
  @code{nosse3},
  @code{nosse},
  @code{nosse2},
  @code{nosse3},
+@code{nosse4a},
  @code{nossse3},
  @code{nosse4.1},
  @code{nosse4.2},
  @code{nossse3},
  @code{nosse4.1},
  @code{nosse4.2},
@@ -176,6 +184,11 @@ accept various extension mnemonics.  For example,
  @code{clflushopt},
  @code{se1},
  @code{clwb},
  @code{clflushopt},
  @code{se1},
  @code{clwb},
+@code{movdiri},
+@code{movdir64b},
+@code{enqcmd},
+@code{serialize},
+@code{tsxldtrk},
  @code{avx512f},
  @code{avx512cd},
  @code{avx512er},
  @code{avx512f},
  @code{avx512cd},
  @code{avx512er},
@@ -191,6 +204,7 @@ accept various extension mnemonics.  For example,
  @code{avx512_vbmi2},
  @code{avx512_vnni},
  @code{avx512_bitalg},
  @code{avx512_vbmi2},
  @code{avx512_vnni},
  @code{avx512_bitalg},
+@code{avx512_bf16},
  @code{noavx512f},
  @code{noavx512cd},
  @code{noavx512er},
  @code{noavx512f},
  @code{noavx512cd},
  @code{noavx512er},
@@ -206,6 +220,11 @@ accept various extension mnemonics.  For example,
  @code{noavx512_vbmi2},
  @code{noavx512_vnni},
  @code{noavx512_bitalg},
  @code{noavx512_vbmi2},
  @code{noavx512_vnni},
  @code{noavx512_bitalg},
+@code{noavx512_vp2intersect},
+@code{noavx512_bf16},
+@code{noenqcmd},
+@code{noserialize},
+@code{notsxldtrk},
  @code{vmx},
  @code{vmfunc},
  @code{smx},
  @code{vmx},
  @code{vmfunc},
  @code{smx},
@@ -223,6 +242,7 @@ accept various extension mnemonics.  For example,
  @code{movbe},
  @code{ept},
  @code{lzcnt},
  @code{movbe},
  @code{ept},
  @code{lzcnt},
+@code{popcnt},
  @code{hle},
  @code{rtm},
  @code{invpcid},
  @code{hle},
  @code{rtm},
  @code{invpcid},
@@ -232,6 +252,10 @@ accept various extension mnemonics.  For example,
  @code{wbnoinvd},
  @code{pconfig},
  @code{waitpkg},
  @code{wbnoinvd},
  @code{pconfig},
  @code{waitpkg},
+@code{cldemote},
+@code{rdpru},
+@code{mcommit},
+@code{sev_es},
  @code{lwp},
  @code{fma4},
  @code{xop},
  @code{lwp},
  @code{fma4},
  @code{xop},
@@ -242,8 +266,7 @@ accept various extension mnemonics.  For example,
  @code{3dnowa},
  @code{sse4a},
  @code{sse5},
  @code{3dnowa},
  @code{sse4a},
  @code{sse5},
-@code{svme},
-@code{abm} and
+@code{svme} and
  @code{padlock}.
  Note that rather than extending a basic instruction set, the extension
  mnemonics starting with @code{no} revoke the respective functionality.
  @code{padlock}.
  Note that rather than extending a basic instruction set, the extension
  mnemonics starting with @code{no} revoke the respective functionality.
@@ -290,6 +313,22 @@ AVX instructions with 128bit vector length, which is the default.
  @option{-mavxscalar=@var{256}} will encode scalar AVX instructions
  with 256bit vector length.
  
  @option{-mavxscalar=@var{256}} will encode scalar AVX instructions
  with 256bit vector length.
  
+WARNING: Don't use this for production code - due to CPU errata the
+resulting code may not work on certain models.
+
+@cindex @samp{-mvexwig=} option, i386
+@cindex @samp{-mvexwig=} option, x86-64
+@item -mvexwig=@var{0}
+@itemx -mvexwig=@var{1}
+These options control how the assembler should encode VEX.W-ignored (WIG)
+VEX instructions.  @option{-mvexwig=@var{0}} will encode WIG VEX
+instructions with vex.w = 0, which is the default.
+@option{-mvexwig=@var{1}} will encode WIG EVEX instructions with
+vex.w = 1.
+
+WARNING: Don't use this for production code - due to CPU errata the
+resulting code may not work on certain models.
+
  @cindex @samp{-mevexlig=} option, i386
  @cindex @samp{-mevexlig=} option, x86-64
  @item -mevexlig=@var{128}
  @cindex @samp{-mevexlig=} option, i386
  @cindex @samp{-mevexlig=} option, x86-64
  @item -mevexlig=@var{128}
@@ -351,9 +390,10 @@ with default visibility can be preempted.  The resulting code is
  slightly bigger.  This option only affects the handling of branch
  instructions.
  
  slightly bigger.  This option only affects the handling of branch
  instructions.
  
+@cindex @samp{-mbig-obj} option, i386
  @cindex @samp{-mbig-obj} option, x86-64
  @item -mbig-obj
  @cindex @samp{-mbig-obj} option, x86-64
  @item -mbig-obj
-On x86-64 PE/COFF target this option forces the use of big object file
+On PE/COFF target this option forces the use of big object file
  format, which allows more than 32768 sections.
  
  @cindex @samp{-momit-lock-prefix=} option, i386
  format, which allows more than 32768 sections.
  
  @cindex @samp{-momit-lock-prefix=} option, i386
@@ -392,6 +432,99 @@ R_X86_64_REX_GOTPCRELX, in 64-bit mode.
  relocations.  The default can be controlled by a configure option
  @option{--enable-x86-relax-relocations}.
  
  relocations.  The default can be controlled by a configure option
  @option{--enable-x86-relax-relocations}.
  
+@cindex @samp{-malign-branch-boundary=} option, i386
+@cindex @samp{-malign-branch-boundary=} option, x86-64
+@item -malign-branch-boundary=@var{NUM}
+This option controls how the assembler should align branches with segment
+prefixes or NOP.  @var{NUM} must be a power of 2.  It should be 0 or
+no less than 16.  Branches will be aligned within @var{NUM} byte
+boundary.  @option{-malign-branch-boundary=0}, which is the default,
+doesn't align branches.
+
+@cindex @samp{-malign-branch=} option, i386
+@cindex @samp{-malign-branch=} option, x86-64
+@item -malign-branch=@var{TYPE}[+@var{TYPE}...]
+This option specifies types of branches to align. @var{TYPE} is
+combination of @samp{jcc}, which aligns conditional jumps,
+@samp{fused}, which aligns fused conditional jumps, @samp{jmp},
+which aligns unconditional jumps, @samp{call} which aligns calls,
+@samp{ret}, which aligns rets, @samp{indirect}, which aligns indirect
+jumps and calls.  The default is @option{-malign-branch=jcc+fused+jmp}.
+
+@cindex @samp{-malign-branch-prefix-size=} option, i386
+@cindex @samp{-malign-branch-prefix-size=} option, x86-64
+@item -malign-branch-prefix-size=@var{NUM}
+This option specifies the maximum number of prefixes on an instruction
+to align branches.  @var{NUM} should be between 0 and 5.  The default
+@var{NUM} is 5.
+
+@cindex @samp{-mbranches-within-32B-boundaries} option, i386
+@cindex @samp{-mbranches-within-32B-boundaries} option, x86-64
+@item -mbranches-within-32B-boundaries
+This option aligns conditional jumps, fused conditional jumps and
+unconditional jumps within 32 byte boundary with up to 5 segment prefixes
+on an instruction.  It is equivalent to
+@option{-malign-branch-boundary=32}
+@option{-malign-branch=jcc+fused+jmp}
+@option{-malign-branch-prefix-size=5}.
+The default doesn't align branches.
+
+@cindex @samp{-mlfence-after-load=} option, i386
+@cindex @samp{-mlfence-after-load=} option, x86-64
+@item -mlfence-after-load=@var{no}
+@itemx -mlfence-after-load=@var{yes}
+These options control whether the assembler should generate lfence
+after load instructions.  @option{-mlfence-after-load=@var{yes}} will
+generate lfence.  @option{-mlfence-after-load=@var{no}} will not generate
+lfence, which is the default.
+
+@cindex @samp{-mlfence-before-indirect-branch=} option, i386
+@cindex @samp{-mlfence-before-indirect-branch=} option, x86-64
+@item -mlfence-before-indirect-branch=@var{none}
+@item -mlfence-before-indirect-branch=@var{all}
+@item -mlfence-before-indirect-branch=@var{register}
+@itemx -mlfence-before-indirect-branch=@var{memory}
+These options control whether the assembler should generate lfence
+before indirect near branch instructions.
+@option{-mlfence-before-indirect-branch=@var{all}} will generate lfence
+before indirect near branch via register and issue a warning before
+indirect near branch via memory.
+It also implicitly sets @option{-mlfence-before-ret=@var{shl}} when
+there's no explict @option{-mlfence-before-ret=}.
+@option{-mlfence-before-indirect-branch=@var{register}} will generate
+lfence before indirect near branch via register.
+@option{-mlfence-before-indirect-branch=@var{memory}} will issue a
+warning before indirect near branch via memory.
+@option{-mlfence-before-indirect-branch=@var{none}} will not generate
+lfence nor issue warning, which is the default.  Note that lfence won't
+be generated before indirect near branch via register with
+@option{-mlfence-after-load=@var{yes}} since lfence will be generated
+after loading branch target register.
+
+@cindex @samp{-mlfence-before-ret=} option, i386
+@cindex @samp{-mlfence-before-ret=} option, x86-64
+@item -mlfence-before-ret=@var{none}
+@item -mlfence-before-ret=@var{shl}
+@item -mlfence-before-ret=@var{or}
+@item -mlfence-before-ret=@var{yes}
+@itemx -mlfence-before-ret=@var{not}
+These options control whether the assembler should generate lfence
+before ret.  @option{-mlfence-before-ret=@var{or}} will generate
+generate or instruction with lfence.
+@option{-mlfence-before-ret=@var{shl/yes}} will generate shl instruction
+with lfence. @option{-mlfence-before-ret=@var{not}} will generate not
+instruction with lfence. @option{-mlfence-before-ret=@var{none}} will not
+generate lfence, which is the default.
+
+@cindex @samp{-mx86-used-note=} option, i386
+@cindex @samp{-mx86-used-note=} option, x86-64
+@item -mx86-used-note=@var{no}
+@itemx -mx86-used-note=@var{yes}
+These options control whether the assembler should generate
+GNU_PROPERTY_X86_ISA_1_USED and GNU_PROPERTY_X86_FEATURE_2_USED
+GNU property notes.  The default can be controlled by the
+@option{--enable-x86-used-note} configure option.
+
  @cindex @samp{-mevexrcig=} option, i386
  @cindex @samp{-mevexrcig=} option, x86-64
  @item -mevexrcig=@var{rne}
  @cindex @samp{-mevexrcig=} option, i386
  @cindex @samp{-mevexrcig=} option, x86-64
  @item -mevexrcig=@var{rne}
@@ -410,7 +543,8 @@ with 01, 10 and 11 RC bits, respectively.
  @item -mamd64
  @itemx -mintel64
  This option specifies that the assembler should accept only AMD64 or
  @item -mamd64
  @itemx -mintel64
  This option specifies that the assembler should accept only AMD64 or
-Intel64 ISA in 64-bit mode.  The default is to accept both.
+Intel64 ISA in 64-bit mode.  The default is to accept common, Intel64
+only and AMD64 ISAs.
  
  @cindex @samp{-O0} option, i386
  @cindex @samp{-O0} option, x86-64
  
  @cindex @samp{-O0} option, i386
  @cindex @samp{-O0} option, x86-64
@@ -426,10 +560,22 @@ Intel64 ISA in 64-bit mode.  The default is to accept both.
  Optimize instruction encoding with smaller instruction size.  @samp{-O}
  and @samp{-O1} encode 64-bit register load instructions with 64-bit
  immediate as 32-bit register load instructions with 31-bit or 32-bits
  Optimize instruction encoding with smaller instruction size.  @samp{-O}
  and @samp{-O1} encode 64-bit register load instructions with 64-bit
  immediate as 32-bit register load instructions with 31-bit or 32-bits
-immediates and encode 64-bit register clearing instructions with 32-bit
-register clearing instructions.  @samp{-O2} includes @samp{-O1}
-optimization plus encodes 256-bit and 512-bit vector register clearing
-instructions with 128-bit vector register clearing instructions.
+immediates, encode 64-bit register clearing instructions with 32-bit
+register clearing instructions, encode 256-bit/512-bit VEX/EVEX vector
+register clearing instructions with 128-bit VEX vector register
+clearing instructions, encode 128-bit/256-bit EVEX vector
+register load/store instructions with VEX vector register load/store
+instructions, and encode 128-bit/256-bit EVEX packed integer logical
+instructions with 128-bit/256-bit VEX packed integer logical.
+
+@samp{-O2} includes @samp{-O1} optimization plus encodes
+256-bit/512-bit EVEX vector register clearing instructions with 128-bit
+EVEX vector register clearing instructions.  In 64-bit mode VEX encoded
+instructions with commutative source operands will also have their
+source operands swapped if this allows using the 2-byte VEX prefix form
+instead of the 3-byte one.  Certain forms of AND as well as OR with the
+same (register) operand specified twice will also be changed to TEST.
+
  @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit
  and 64-bit register tests with immediate as 8-bit register test with
  immediate.  @samp{-O0} turns off this optimization.
  @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit
  and 64-bit register tests with immediate as 8-bit register test with
  immediate.  @samp{-O0} turns off this optimization.
@@ -466,6 +612,12 @@ The directive is intended to be used for data which requires a large
  amount of space, and it is only available for ELF based x86_64
  targets.
  
  amount of space, and it is only available for ELF based x86_64
  targets.
  
+@cindex @code{value} directive
+@item .value @var{expression} [, @var{expression}]
+This directive behaves in the same way as the @code{.short} directive,
+taking a series of comma separated expressions and storing them as
+two-byte wide values into the current section.
+
  @c FIXME: Document other x86 specific directives ?  Eg: .code16gcc,
  
  @end table
  @c FIXME: Document other x86 specific directives ?  Eg: .code16gcc,
  
  @end table
@@ -548,11 +700,16 @@ instruction, do @emph{not} have reversed order.  @ref{i386-Bugs}.
  In AT&T syntax the size of memory operands is determined from the last
  character of the instruction mnemonic.  Mnemonic suffixes of @samp{b},
  @samp{w}, @samp{l} and @samp{q} specify byte (8-bit), word (16-bit), long
  In AT&T syntax the size of memory operands is determined from the last
  character of the instruction mnemonic.  Mnemonic suffixes of @samp{b},
  @samp{w}, @samp{l} and @samp{q} specify byte (8-bit), word (16-bit), long
-(32-bit) and quadruple word (64-bit) memory references.  Intel syntax accomplishes
-this by prefixing memory operands (@emph{not} the instruction mnemonics) with
-@samp{byte ptr}, @samp{word ptr}, @samp{dword ptr} and @samp{qword ptr}.  Thus,
-Intel @samp{mov al, byte ptr @var{foo}} is @samp{movb @var{foo}, %al} in AT&T
-syntax.
+(32-bit) and quadruple word (64-bit) memory references.  Mnemonic suffixes
+of @samp{x}, @samp{y} and @samp{z} specify xmm (128-bit vector), ymm
+(256-bit vector) and zmm (512-bit vector) memory references, only when there's
+no other way to disambiguate an instruction.  Intel syntax accomplishes this by
+prefixing memory operands (@emph{not} the instruction mnemonics) with
+@samp{byte ptr}, @samp{word ptr}, @samp{dword ptr}, @samp{qword ptr},
+@samp{xmmword ptr}, @samp{ymmword ptr} and @samp{zmmword ptr}.  Thus, Intel
+syntax @samp{mov al, byte ptr @var{foo}} is @samp{movb @var{foo}, %al} in AT&T
+syntax.  In Intel syntax, @samp{fword ptr}, @samp{tbyte ptr} and
+@samp{oword ptr} specify 48-bit, 80-bit and 128-bit memory references.
  
  In 64-bit code, @samp{movabs} can be used to encode the @samp{mov}
  instruction with the 64-bit displacement or immediate operand.
  
  In 64-bit code, @samp{movabs} can be used to encode the @samp{mov}
  instruction with the 64-bit displacement or immediate operand.
@@ -592,7 +749,7 @@ line is treated as a comment, but in this case the line can also be a
  logical line number directive (@pxref{Comments}) or a preprocessor
  control command (@pxref{Preprocessing}).
  
  logical line number directive (@pxref{Comments}) or a preprocessor
  control command (@pxref{Preprocessing}).
  
-If the @option{--divide} command line option has not been specified
+If the @option{--divide} command-line option has not been specified
  then the @samp{/} character appearing anywhere on a line also
  introduces a line comment.
  
  then the @samp{/} character appearing anywhere on a line also
  introduces a line comment.
  
@@ -623,21 +780,30 @@ assembler which assumes that a missing mnemonic suffix implies long
  operand size.  (This incompatibility does not affect compiler output
  since compilers always explicitly specify the mnemonic suffix.)
  
  operand size.  (This incompatibility does not affect compiler output
  since compilers always explicitly specify the mnemonic suffix.)
  
-Almost all instructions have the same names in AT&T and Intel format.
-There are a few exceptions.  The sign extend and zero extend
-instructions need two sizes to specify them.  They need a size to
-sign/zero extend @emph{from} and a size to zero extend @emph{to}.  This
-is accomplished by using two instruction mnemonic suffixes in AT&T
-syntax.  Base names for sign extend and zero extend are
-@samp{movs@dots{}} and @samp{movz@dots{}} in AT&T syntax (@samp{movsx}
-and @samp{movzx} in Intel syntax).  The instruction mnemonic suffixes
-are tacked on to this base name, the @emph{from} suffix before the
-@emph{to} suffix.  Thus, @samp{movsbl %al, %edx} is AT&T syntax for
-``move sign extend @emph{from} %al @emph{to} %edx.''  Possible suffixes,
-thus, are @samp{bl} (from byte to long), @samp{bw} (from byte to word),
-@samp{wl} (from word to long), @samp{bq} (from byte to quadruple word),
-@samp{wq} (from word to quadruple word), and @samp{lq} (from long to
-quadruple word).
+When there is no sizing suffix and no (suitable) register operands to
+deduce the size of memory operands, with a few exceptions and where long
+operand size is possible in the first place, operand size will default
+to long in 32- and 64-bit modes.  Similarly it will default to short in
+16-bit mode. Noteworthy exceptions are
+
+@itemize @bullet
+@item
+Instructions with an implicit on-stack operand as well as branches,
+which default to quad in 64-bit mode.
+
+@item
+Sign- and zero-extending moves, which default to byte size source
+operands.
+
+@item
+Floating point insns with integer operands, which default to short (for
+perhaps historical reasons).
+
+@item
+CRC32 with a 64-bit destination, which defaults to a quad source
+operand.
+
+@end itemize
  
  @cindex encoding options, i386
  @cindex encoding options, x86-64
  
  @cindex encoding options, i386
  @cindex encoding options, x86-64
@@ -658,10 +824,10 @@ Different encoding options can be specified via pseudo prefixes:
  @samp{@{store@}} -- prefer store-form instruction.
  
  @item
  @samp{@{store@}} -- prefer store-form instruction.
  
  @item
-@samp{@{vex2@}} -- prefer 2-byte VEX prefix for VEX instruction.
+@samp{@{vex@}} --  encode with VEX prefix.
  
  @item
  
  @item
-@samp{@{vex3@}} -- prefer 3-byte VEX prefix for VEX instruction.
+@samp{@{vex3@}} -- encode with 3-byte VEX prefix.
  
  @item
  @samp{@{evex@}} --  encode with EVEX prefix.
  
  @item
  @samp{@{evex@}} --  encode with EVEX prefix.
@@ -708,6 +874,59 @@ are called @samp{cbtw}, @samp{cwtl}, @samp{cwtd}, @samp{cltd}, @samp{cltq}, and
  @samp{cqto} in AT&T naming.  @code{@value{AS}} accepts either naming for these
  instructions.
  
  @samp{cqto} in AT&T naming.  @code{@value{AS}} accepts either naming for these
  instructions.
  
+@cindex extension instructions, i386
+@cindex i386 extension instructions
+@cindex extension instructions, x86-64
+@cindex x86-64 extension instructions
+The Intel-syntax extension instructions
+
+@itemize @bullet
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg16}.
+
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg32}.
+
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movsx} --- sign-extend @samp{reg16/mem16} to @samp{reg32}
+
+@item
+@samp{movsx} --- sign-extend @samp{reg16/mem16} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movsxd} --- sign-extend @samp{reg32/mem32} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg16}.
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg32}.
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movzx} --- zero-extend @samp{reg16/mem16} to @samp{reg32}
+
+@item
+@samp{movzx} --- zero-extend @samp{reg16/mem16} to @samp{reg64}
+(x86-64 only).
+@end itemize
+
+@noindent
+are called @samp{movsbw/movsxb/movsx}, @samp{movsbl/movsxb/movsx},
+@samp{movsbq/movsb/movsx}, @samp{movswl/movsxw}, @samp{movswq/movsxw},
+@samp{movslq/movsxl}, @samp{movzbw/movzxb/movzx},
+@samp{movzbl/movzxb/movzx}, @samp{movzbq/movzxb/movzx},
+@samp{movzwl/movzxw} and @samp{movzwq/movzxw} in AT&T syntax.
+
  @cindex jump instructions, i386
  @cindex call instructions, i386
  @cindex jump instructions, x86-64
  @cindex jump instructions, i386
  @cindex call instructions, i386
  @cindex jump instructions, x86-64
@@ -731,6 +950,12 @@ Several x87 instructions, @samp{fadd}, @samp{fdiv}, @samp{fdivp},
  assembler with different mnemonics from those in Intel IA32 specification.
  @code{@value{GCC}} generates those instructions with AT&T mnemonic.
  
  assembler with different mnemonics from those in Intel IA32 specification.
  @code{@value{GCC}} generates those instructions with AT&T mnemonic.
  
+@itemize @bullet
+@item @samp{movslq} with AT&T mnemonic only accepts 64-bit destination
+register.  @samp{movsxd} should be used to encode 16-bit or 32-bit
+destination register with both AT&T and Intel mnemonics.
+@end itemize
+
  @node i386-Regs
  @section Register Naming
  
  @node i386-Regs
  @section Register Naming
  
@@ -1252,18 +1477,20 @@ supported on the CPU specified.  The choices for @var{cpu_type} are:
  @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
  @item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
  @item @samp{prescott} @tab @samp{nocona} @tab @samp{core} @tab @samp{core2}
  @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
  @item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
  @item @samp{prescott} @tab @samp{nocona} @tab @samp{core} @tab @samp{core2}
-@item @samp{corei7} @tab @samp{l1om} @tab @samp{k1om} @samp{iamcu}
+@item @samp{corei7} @tab @samp{l1om} @tab @samp{k1om} @tab @samp{iamcu}
  @item @samp{k6} @tab @samp{k6_2} @tab @samp{athlon} @tab @samp{k8}
  @item @samp{amdfam10} @tab @samp{bdver1} @tab @samp{bdver2} @tab @samp{bdver3}
  @item @samp{k6} @tab @samp{k6_2} @tab @samp{athlon} @tab @samp{k8}
  @item @samp{amdfam10} @tab @samp{bdver1} @tab @samp{bdver2} @tab @samp{bdver3}
-@item @samp{bdver4} @tab @samp{znver1} @tab @samp{btver1} @tab @samp{btver2}
-@item @samp{generic32} @tab @samp{generic64}
-@item @samp{.mmx} @tab @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{bdver4} @tab @samp{znver1} @tab @samp{znver2} @tab @samp{btver1}
+@item @samp{btver2} @tab @samp{generic32} @tab @samp{generic64}
+@item @samp{.cmov} @tab @samp{.fxsr} @tab @samp{.mmx}
+@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} @tab @samp{.sse4a}
  @item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4}
  @item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept}
  @item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt}
  @item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
  @item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
  @item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4}
  @item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept}
  @item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt}
  @item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
  @item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
-@item @samp{.lzcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc} @tab @samp{.hle}
+@item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
+@item @samp{.hle}
  @item @samp{.rtm} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
  @item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
  @item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
  @item @samp{.rtm} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
  @item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
  @item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
@@ -1271,14 +1498,16 @@ supported on the CPU specified.  The choices for @var{cpu_type} are:
  @item @samp{.avx512vl} @tab @samp{.avx512bw} @tab @samp{.avx512dq} @tab @samp{.avx512ifma}
  @item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw}
  @item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni}
  @item @samp{.avx512vl} @tab @samp{.avx512bw} @tab @samp{.avx512dq} @tab @samp{.avx512ifma}
  @item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw}
  @item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni}
-@item @samp{.avx512_bitalg}
+@item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect}
  @item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt}
  @item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt}
-@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg}
+@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
  @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
  @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
+@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
  @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
  @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
-@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme} @tab @samp{.abm}
+@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
  @item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
  @item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
-@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx}
+@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx} @tab @samp{.rdpru}
+@item @samp{.mcommit} @tab @samp{.sev_es}
  @end multitable
  
  Apart from the warning, there are only two other effects on
  @end multitable
  
  Apart from the warning, there are only two other effects on
@@ -1310,6 +1539,29 @@ For example
   .arch i8086,nojumps
  @end smallexample
  
   .arch i8086,nojumps
  @end smallexample
  
+@node i386-ISA
+@section AMD64 ISA vs. Intel64 ISA
+
+There are some discrepancies between AMD64 and Intel64 ISAs.
+
+@itemize @bullet
+@item For @samp{movsxd} with 16-bit destination register, AMD64
+supports 32-bit source operand and Intel64 supports 16-bit source
+operand.
+
+@item For far branches (with explicit memory operand), both ISAs support
+32- and 16-bit operand size.  Intel64 additionally supports 64-bit
+operand size, encoded as @samp{ljmpq} and @samp{lcallq} in AT&T syntax
+and with an explicit @samp{tbyte ptr} operand size specifier in Intel
+syntax.
+
+@item @samp{lfs}, @samp{lgs}, and @samp{lss} similarly allow for 16-
+and 32-bit operand size (32- and 48-bit memory operand) in both ISAs,
+while Intel64 additionally supports 64-bit operand sise (80-bit memory
+operands).
+
+@end itemize
+
  @node i386-Bugs
  @section AT&T Syntax bugs
  
  @node i386-Bugs
  @section AT&T Syntax bugs