From dda20ea73d958584e6b162b34dd421582c52ddbb Mon Sep 17 00:00:00 2001 From: David Truby Date: Wed, 30 Oct 2024 15:05:18 +0000 Subject: [PATCH 01/69] [flang] Add fir-lsp-server (#114059) This patch adds a fir-lsp-server tool for editor support for editing fir files, using the existing MLIR lsp server support. See https://mlir.llvm.org/docs/Tools/MLIRLSP/ for more information. --- flang/tools/CMakeLists.txt | 1 + flang/tools/fir-lsp-server/CMakeLists.txt | 17 +++++++++++++++++ flang/tools/fir-lsp-server/fir-lsp-server.cpp | 9 +++++++++ 3 files changed, 27 insertions(+) create mode 100644 flang/tools/fir-lsp-server/CMakeLists.txt create mode 100644 flang/tools/fir-lsp-server/fir-lsp-server.cpp diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 337545ae0d4d7..1d2d2c608faf9 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -12,3 +12,4 @@ add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) add_subdirectory(fir-opt) +add_subdirectory(fir-lsp-server) diff --git a/flang/tools/fir-lsp-server/CMakeLists.txt b/flang/tools/fir-lsp-server/CMakeLists.txt new file mode 100644 index 0000000000000..ff0ced6693b97 --- /dev/null +++ b/flang/tools/fir-lsp-server/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + AsmParser + ) + +add_flang_tool(fir-lsp-server fir-lsp-server.cpp) + +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) +target_link_libraries(fir-lsp-server PRIVATE + CUFDialect + FIRDialect + HLFIRDialect + MLIRLspServerLib + ${dialect_libs} + ${extension_libs}) diff --git a/flang/tools/fir-lsp-server/fir-lsp-server.cpp b/flang/tools/fir-lsp-server/fir-lsp-server.cpp new file mode 100644 index 0000000000000..8b724e292b5ab --- /dev/null +++ b/flang/tools/fir-lsp-server/fir-lsp-server.cpp @@ -0,0 +1,9 @@ +#include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h" +#include "flang/Optimizer/Support/InitFIR.h" + +int main(int argc, char **argv) { + mlir::DialectRegistry registry; + fir::support::registerNonCodegenDialects(registry); + fir::support::addFIRExtensions(registry); + return mlir::failed(mlir::MlirLspServerMain(argc, argv, registry)); +} From 4015e18d6713cdceb0640e77b2d5aa3b256d5ddb Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 30 Oct 2024 15:14:14 +0000 Subject: [PATCH 02/69] [AArch64] Add assembly/disassembly for BFMOP4{A,S} (non-widening) instructions (#113342) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 5 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../bfmop4as-non-widening-diagnostics.s | 220 ++++++++++++++++++ .../MC/AArch64/SME2p2/bfmop4as-non-widening.s | 178 ++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index b716529428894..e78cd7146df2a 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1047,3 +1047,8 @@ let Predicates = [HasSME2p2, HasSMEF8F32] in { defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">; } } + +let Predicates = [HasSME2p2, HasSMEB16B16] in { + defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a">; + defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s">; +} diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index e7c90b0ed14e0..b31bea712a76d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5417,3 +5417,40 @@ multiclass sme2_fmop4a_fp8_fp32_4way { // Multiple vectors def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; } + +class sme2_bf16_fp16_quarter_tile_outer_product + : I<(outs TileOp16:$ZAda), + (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bit ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001001; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-1} = 0b100; + let Inst{0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_bfmop4as_non_widening { + // Single vectors + def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s new file mode 100644 index 0000000000000..231d4cd9967a4 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s @@ -0,0 +1,220 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 < %s 2>&1 | FileCheck %s + +// BFMOP4A + +// Single vectors + +bfmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4a za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4s za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s new file mode 100644 index 0000000000000..b98bb99def056 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// BFMOP4A + +// Single vectors + +bfmop4a za0.h, z0.h, z16.h // 10000001-00100000-00000000-00001000 +// CHECK-INST: bfmop4a za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x08,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200008 + +bfmop4a za1.h, z12.h, z24.h // 10000001-00101000-00000001-10001001 +// CHECK-INST: bfmop4a za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x89,0x01,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280189 + +bfmop4a za1.h, z14.h, z30.h // 10000001-00101110-00000001-11001001 +// CHECK-INST: bfmop4a za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xc9,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e01c9 + +// Single and multiple vectors + +bfmop4a za0.h, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00001000 +// CHECK-INST: bfmop4a za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300008 + +bfmop4a za1.h, z12.h, {z24.h-z25.h} // 10000001-00111000-00000001-10001001 +// CHECK-INST: bfmop4a za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x01,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380189 + +bfmop4a za1.h, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11001001 +// CHECK-INST: bfmop4a za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e01c9 + +// Multiple and single vectors + +bfmop4a za0.h, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00001000 +// CHECK-INST: bfmop4a za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x08,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200208 + +bfmop4a za1.h, {z12.h-z13.h}, z24.h // 10000001-00101000-00000011-10001001 +// CHECK-INST: bfmop4a za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x89,0x03,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280389 + +bfmop4a za1.h, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11001001 +// CHECK-INST: bfmop4a za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc9,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e03c9 + +// Multiple vectors + +bfmop4a za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00001000 +// CHECK-INST: bfmop4a za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300208 + +bfmop4a za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00111000-00000011-10001001 +// CHECK-INST: bfmop4a za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x03,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380389 + +bfmop4a za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11001001 +// CHECK-INST: bfmop4a za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e03c9 + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.h, z0.h, z16.h // 10000001-00100000-00000000-00011000 +// CHECK-INST: bfmop4s za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x18,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200018 + +bfmop4s za1.h, z12.h, z24.h // 10000001-00101000-00000001-10011001 +// CHECK-INST: bfmop4s za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x99,0x01,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280199 + +bfmop4s za1.h, z14.h, z30.h // 10000001-00101110-00000001-11011001 +// CHECK-INST: bfmop4s za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xd9,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e01d9 + +// Single and multiple vectors + +bfmop4s za0.h, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00011000 +// CHECK-INST: bfmop4s za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300018 + +bfmop4s za1.h, z12.h, {z24.h-z25.h} // 10000001-00111000-00000001-10011001 +// CHECK-INST: bfmop4s za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x01,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380199 + +bfmop4s za1.h, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11011001 +// CHECK-INST: bfmop4s za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e01d9 + +// Multiple and single vectors + +bfmop4s za0.h, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00011000 +// CHECK-INST: bfmop4s za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x18,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200218 + +bfmop4s za1.h, {z12.h-z13.h}, z24.h // 10000001-00101000-00000011-10011001 +// CHECK-INST: bfmop4s za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x99,0x03,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280399 + +bfmop4s za1.h, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11011001 +// CHECK-INST: bfmop4s za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd9,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e03d9 + +// Multiple vectors + +bfmop4s za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00011000 +// CHECK-INST: bfmop4s za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300218 + +bfmop4s za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00111000-00000011-10011001 +// CHECK-INST: bfmop4s za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x03,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380399 + +bfmop4s za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11011001 +// CHECK-INST: bfmop4s za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e03d9 From f405c683ba929fcd0bcaa435ca2fbe4bb221d04b Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 30 Oct 2024 11:19:23 -0400 Subject: [PATCH 03/69] [OPT] Search whole BB for convergence token. (#112728) The spec for llvm.experimental.convergence.entry says that is must be in the entry block for a function, and must preceed any other convergent operation. It does not have to be the first instruction in the entry block. Inlining assumes that the call to llvm.experimental.convergence.entry will be the first instruction after any phi instructions. This commit modifies inlining to search the entire block for the call. --- llvm/lib/Transforms/Utils/InlineFunction.cpp | 38 ++++++++++--------- .../Transforms/Inline/convergence-inline.ll | 24 ++++++++++++ 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 4ad426285ce2f..a27cb4dd219c3 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -181,9 +181,21 @@ namespace { } } }; - } // end anonymous namespace +static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) { + auto *I = BB.getFirstNonPHI(); + while (I) { + if (auto *IntrinsicCall = dyn_cast(I)) { + if (IntrinsicCall->isEntry()) { + return IntrinsicCall; + } + } + I = I->getNextNode(); + } + return nullptr; +} + /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; @@ -2496,15 +2508,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // fully implements convergence control tokens, there is no mixing of // controlled and uncontrolled convergent operations in the whole program. if (CB.isConvergent()) { - auto *I = CalledFunc->getEntryBlock().getFirstNonPHI(); - if (auto *IntrinsicCall = dyn_cast(I)) { - if (IntrinsicCall->getIntrinsicID() == - Intrinsic::experimental_convergence_entry) { - if (!ConvergenceControlToken) { - return InlineResult::failure( - "convergent call needs convergencectrl operand"); - } - } + if (!ConvergenceControlToken && + getConvergenceEntry(CalledFunc->getEntryBlock())) { + return InlineResult::failure( + "convergent call needs convergencectrl operand"); } } @@ -2795,13 +2802,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } if (ConvergenceControlToken) { - auto *I = FirstNewBlock->getFirstNonPHI(); - if (auto *IntrinsicCall = dyn_cast(I)) { - if (IntrinsicCall->getIntrinsicID() == - Intrinsic::experimental_convergence_entry) { - IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); - IntrinsicCall->eraseFromParent(); - } + IntrinsicInst *IntrinsicCall = getConvergenceEntry(*FirstNewBlock); + if (IntrinsicCall) { + IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); + IntrinsicCall->eraseFromParent(); } } diff --git a/llvm/test/Transforms/Inline/convergence-inline.ll b/llvm/test/Transforms/Inline/convergence-inline.ll index 8c67e6a59b7db..4996a2376be63 100644 --- a/llvm/test/Transforms/Inline/convergence-inline.ll +++ b/llvm/test/Transforms/Inline/convergence-inline.ll @@ -185,6 +185,30 @@ define void @test_two_calls() convergent { ret void } +define i32 @token_not_first(i32 %x) convergent alwaysinline { +; CHECK-LABEL: @token_not_first( +; CHECK-NEXT: {{%.*}} = alloca ptr, align 8 +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[Y:%.*]] = call i32 @g(i32 [[X:%.*]]) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret i32 [[Y]] +; + %p = alloca ptr, align 8 + %token = call token @llvm.experimental.convergence.entry() + %y = call i32 @g(i32 %x) [ "convergencectrl"(token %token) ] + ret i32 %y +} + +define void @test_token_not_first() convergent { +; CHECK-LABEL: @test_token_not_first( +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: {{%.*}} = call i32 @g(i32 23) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; + %token = call token @llvm.experimental.convergence.entry() + %x = call i32 @token_not_first(i32 23) [ "convergencectrl"(token %token) ] + ret void +} + declare void @f(i32) convergent declare i32 @g(i32) convergent From e989e31a47375a7d556269eead538dc65edcef2b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 30 Oct 2024 15:21:18 +0000 Subject: [PATCH 04/69] [RISCV] Mark f16/bf16 lrint and llrint cost as invalid (#113924) We currently can't lower scalable vector lrint and llrint nodes for bf16 and f16, even with zvfh, and will crash. Mark the cost as invalid for now to prevent the vectorizers from emitting them. Note that we can actually lower fixed-length vectors fine by scalarizing them, but we were still undercosting these too so I've also included them. I presume there's an opportunity to improve the codegen later on. --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 9 ++- llvm/test/Analysis/CostModel/RISCV/fround.ll | 72 +++++++++---------- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 395baa5f1aab9..988cb194cd603 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -948,12 +948,17 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { auto *RetTy = ICA.getReturnType(); switch (ICA.getID()) { + case Intrinsic::lrint: + case Intrinsic::llrint: + // We can't currently lower half or bfloat vector lrint/llrint. + if (auto *VecTy = dyn_cast(ICA.getArgTypes()[0]); + VecTy && VecTy->getElementType()->is16bitFPTy()) + return InstructionCost::getInvalid(); + [[fallthrough]]; case Intrinsic::ceil: case Intrinsic::floor: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::lrint: - case Intrinsic::llrint: case Intrinsic::round: case Intrinsic::roundeven: { // These all use the same code. diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index c6826760a45be..b096498355081 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -425,15 +425,15 @@ define void @rint_fp16() { define void @lrint() { ; CHECK-LABEL: 'lrint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.bf16(bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) @@ -490,15 +490,15 @@ define void @lrint() { define void @lrint_fp16() { ; CHECK-LABEL: 'lrint_fp16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f16(half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i64 @llvm.lrint.f16(half undef) @@ -517,15 +517,15 @@ define void @lrint_fp16() { define void @llrint() { ; CHECK-LABEL: 'llrint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.bf16(bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) @@ -582,15 +582,15 @@ define void @llrint() { define void @llrint_fp16() { ; CHECK-LABEL: 'llrint_fp16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f16(half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i64 @llvm.llrint.f16(half undef) From 7d1e283bd3b4440aea9ac375ca51e2ee6b0e86f5 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 30 Oct 2024 09:24:10 -0600 Subject: [PATCH 05/69] [Clang][Sema] Ignore previous partial specializations of member templates explicitly specialized for an implicitly instantiated class template specialization (#113464) Consider the following: ``` template struct A { template struct B { static constexpr int x = 0; // #1 }; template struct B { static constexpr int x = 1; // #2 }; }; template<> template struct A::B { static constexpr int x = 2; // #3 }; static_assert(A::B::y == 0); // uses #1 static_assert(A::B::y == 1); // uses #2 static_assert(A::B::y == 2); // uses #3 static_assert(A::B::y == 2); // uses #3 ``` According to [temp.spec.partial.member] p2: > If the primary member template is explicitly specialized for a given (implicit) specialization of the enclosing class template, the partial specializations of the member template are ignored for this specialization of the enclosing class template. If a partial specialization of the member template is explicitly specialized for a given (implicit) specialization of the enclosing class template, the primary member template and its other partial specializations are still considered for this specialization of the enclosing class template. The example above fails to compile because we currently don't implement [temp.spec.partial.member] p2. This patch implements the wording, fixing #51051. --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/Sema/SemaTemplate.cpp | 16 +++- clang/lib/Sema/SemaTemplateInstantiate.cpp | 38 +++++++-- .../temp.spec.partial.member/p2.cpp | 85 +++++++++++++++++++ 4 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6085352dfafe6..1a179e63f902f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -574,6 +574,8 @@ Bug Fixes to C++ Support (#GH95854). - Fixed an assertion failure when evaluating an invalid expression in an array initializer. (#GH112140) - Fixed an assertion failure in range calculations for conditional throw expressions. (#GH111854) +- Clang now correctly ignores previous partial specializations of member templates explicitly specialized for + an implicitly instantiated class template specialization. (#GH51051) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index fcf05798d9c70..4503e60cff8c2 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4381,8 +4381,20 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc, SmallVector PartialSpecs; Template->getPartialSpecializations(PartialSpecs); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { - VarTemplatePartialSpecializationDecl *Partial = PartialSpecs[I]; + for (VarTemplatePartialSpecializationDecl *Partial : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the partial + // specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are still + // considered for this specialization of the enclosing class template. + if (Template->getMostRecentDecl()->isMemberSpecialization() && + !Partial->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(FailedCandidates.getLocation()); if (TemplateDeductionResult Result = diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index dea97bfce532c..b63063813f1b5 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -3978,11 +3978,24 @@ bool Sema::usesPartialOrExplicitSpecialization( return true; SmallVector PartialSpecs; - ClassTemplateSpec->getSpecializedTemplate() - ->getPartialSpecializations(PartialSpecs); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { + ClassTemplateDecl *CTD = ClassTemplateSpec->getSpecializedTemplate(); + CTD->getPartialSpecializations(PartialSpecs); + for (ClassTemplatePartialSpecializationDecl *CTPSD : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the partial + // specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are still + // considered for this specialization of the enclosing class template. + if (CTD->getMostRecentDecl()->isMemberSpecialization() && + !CTPSD->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(Loc); - if (DeduceTemplateArguments(PartialSpecs[I], + if (DeduceTemplateArguments(CTPSD, ClassTemplateSpec->getTemplateArgs().asArray(), Info) == TemplateDeductionResult::Success) return true; @@ -4025,8 +4038,21 @@ getPatternForClassTemplateSpecialization( SmallVector PartialSpecs; Template->getPartialSpecializations(PartialSpecs); TemplateSpecCandidateSet FailedCandidates(PointOfInstantiation); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { - ClassTemplatePartialSpecializationDecl *Partial = PartialSpecs[I]; + for (ClassTemplatePartialSpecializationDecl *Partial : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the + // partial specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are + // still considered for this specialization of the enclosing class + // template. + if (Template->getMostRecentDecl()->isMemberSpecialization() && + !Partial->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(FailedCandidates.getLocation()); if (TemplateDeductionResult Result = S.DeduceTemplateArguments( Partial, ClassTemplateSpec->getTemplateArgs().asArray(), Info); diff --git a/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp b/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp new file mode 100644 index 0000000000000..7969b7efe597f --- /dev/null +++ b/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +template +struct A { + template + struct B { + static constexpr int y = 0; + }; + + template + struct B { + static constexpr int y = 1; + }; + + template + static constexpr int x = 0; + + template + static constexpr int x = 1; +}; + +template +template +struct A::B { + static constexpr int y = 2; +}; + +template +template +constexpr int A::x = 2; + +static_assert(A::B::y == 0); +static_assert(A::B::y == 1); +static_assert(A::B::y == 2); +static_assert(A::x == 0); +static_assert(A::x == 1); +static_assert(A::x == 2); + +template<> +template +struct A::B { + static constexpr int y = 3; +}; + +template<> +template +struct A::B { + static constexpr int y = 4; +}; + +template<> +template +struct A::B { + static constexpr int y = 5; +}; + +template<> +template +constexpr int A::x = 3; + +template<> +template +constexpr int A::x = 4; + +template<> +template +constexpr int A::x = 5; + +static_assert(A::B::y == 3); +static_assert(A::B::y == 3); +static_assert(A::B::y == 3); +static_assert(A::B::y == 4); +static_assert(A::x == 3); +static_assert(A::x == 3); +static_assert(A::x == 3); +static_assert(A::x == 4); +static_assert(A::B::y == 0); +static_assert(A::B::y == 1); +static_assert(A::B::y == 2); +static_assert(A::B::y == 5); +static_assert(A::x == 0); +static_assert(A::x == 1); +static_assert(A::x == 2); +static_assert(A::x == 5); From 475e736bb5eeea8ec70aca51d1a3d98179c69530 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 30 Oct 2024 08:28:22 -0700 Subject: [PATCH 06/69] [MemProf] Include to avoid MSVC failure (#114246) My change in bb3915149a7c9b1660db9caebfc96343352e8454 added a call to std::time which worked generally as there must be some transitive include of . However, I saw one MSVC bot failure: InstrProfWriter.cpp(202): error C2039: 'time': is not a member of 'std' from https://lab.llvm.org/buildbot/#/builders/63/builds/2325. Presumably explictly including should fix this. --- llvm/lib/ProfileData/InstrProfWriter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index f09241681b92a..0ab9f942a0858 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include From 72b115301d1c0d56f40f5030bb8d16f422ac211b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Wed, 30 Oct 2024 16:34:01 +0100 Subject: [PATCH 07/69] [GlobalISel] Import samesign flag (#113090) Credits: https://github.com/llvm/llvm-project/pull/111419 --- .../CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 3 +- llvm/include/llvm/CodeGen/MachineInstr.h | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 9 +-- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 5 +- llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.h | 1 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 5 +- llvm/lib/CodeGen/MIRPrinter.cpp | 2 + llvm/lib/CodeGen/MachineInstr.cpp | 7 ++ .../GlobalISel/irtranslator-samesign.ll | 69 +++++++++++++++++++ llvm/test/CodeGen/MIR/icmp-flags.mir | 50 ++++++++++++++ 12 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll create mode 100644 llvm/test/CodeGen/MIR/icmp-flags.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index b6309a9ea0ec7..cd7ebcf54c9e1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -28,7 +28,7 @@ namespace llvm { class GenericMachineInstr : public MachineInstr { constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs; + FmNoNans | FmNoInfs | SameSign; public: GenericMachineInstr() = delete; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index c41e74ec7ebdc..a38dd34a17097 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1266,7 +1266,8 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + std::optional Flags = std::nullopt); /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1 /// diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 3605173247463..ead6bbe1d5f64 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -119,6 +119,7 @@ class MachineInstr Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. + SameSign = 1 << 21 // Both operands have the same sign. }; private: diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5381dce58f9e6..a87754389cc8e 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -340,20 +340,17 @@ bool IRTranslator::translateCompare(const User &U, Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI->getPredicate(); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(*CI); if (CmpInst::isIntPredicate(Pred)) - MIRBuilder.buildICmp(Pred, Res, Op0, Op1); + MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); - else { - uint32_t Flags = 0; - if (CI) - Flags = MachineInstr::copyFlagsFromInstruction(*CI); + else MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); - } return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 59f2fc633f5de..15b9164247846 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -898,8 +898,9 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op, MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}); + const SrcOp &Op1, + std::optional Flags) { + return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 5a3806ce57335..1c450b05f49e9 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -216,6 +216,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("exact", MIToken::kw_exact) .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) + .Case("samesign", MIToken::kw_samesign) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 3931da3eaae1d..d7cd06759cfbb 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -77,6 +77,7 @@ struct MIToken { kw_unpredictable, kw_nneg, kw_disjoint, + kw_samesign, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 45847b5830da6..059814c70f828 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1476,7 +1476,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_noconvergent) || Token.is(MIToken::kw_unpredictable) || Token.is(MIToken::kw_nneg) || - Token.is(MIToken::kw_disjoint)) { + Token.is(MIToken::kw_disjoint) || + Token.is(MIToken::kw_samesign)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1513,6 +1514,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NonNeg; if (Token.is(MIToken::kw_disjoint)) Flags |= MachineInstr::Disjoint; + if (Token.is(MIToken::kw_samesign)) + Flags |= MachineInstr::SameSign; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index a015cd3c2a55f..658bbe0e577e5 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -837,6 +837,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "disjoint "; if (MI.getFlag(MachineInstr::NoUSWrap)) OS << "nusw "; + if (MI.getFlag(MachineInstr::SameSign)) + OS << "samesign "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index c1bd0bb5b7162..941861da5c569 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -596,6 +596,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::Disjoint; } + // Copy the samesign flag. + if (const ICmpInst *ICmp = dyn_cast(&I)) + if (ICmp->hasSameSign()) + MIFlags |= MachineInstr::MIFlag::SameSign; + // Copy the exact flag. if (const PossiblyExactOperator *PE = dyn_cast(&I)) if (PE->isExact()) @@ -1770,6 +1775,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::SameSign)) + OS << "samesign "; // Print the opcode name. if (TII) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll new file mode 100644 index 0000000000000..0173f92c98220 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator < %s | FileCheck %s + + +define <2 x i1> @call_icmp_samesign_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_samesign_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: %2:_(<2 x s1>) = samesign G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT %2(<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp samesign ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define <2 x i1> @call_icmp_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s1>) = G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[ICMP]](<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define i1 @call_icmp(i32 %a) { + ; CHECK-LABEL: name: call_icmp + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp ult i32 %a, 3 + ret i1 %result +} + +define i1 @call_icmp_samesign(i32 %a) { + ; CHECK-LABEL: name: call_icmp_samesign + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %2:_(s1) = samesign G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %2(s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp samesign ult i32 %a, 3 + ret i1 %result +} diff --git a/llvm/test/CodeGen/MIR/icmp-flags.mir b/llvm/test/CodeGen/MIR/icmp-flags.mir new file mode 100644 index 0000000000000..3c03a7aaa9bcd --- /dev/null +++ b/llvm/test/CodeGen/MIR/icmp-flags.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=none -verify-machineinstrs %s -o - | FileCheck %s + + +--- +name: icmp_samesign +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: icmp_samesign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = samesign G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = samesign G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 + + +... +--- +name: icmp_differentsign +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: icmp_differentsign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +--- From 4c03d373f043700e3c8feeea8855125c718de31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Wed, 30 Oct 2024 16:40:36 +0100 Subject: [PATCH 08/69] [SPIR-V] Fix broken test due to G_BITCAST (#114242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit G_BITCAST emission in the SPIR-V backend is not accepted by the verifier. DIsabling verifier for impacted tests until https://github.com/llvm/llvm-project/pull/114216 is merged. Signed-off-by: Nathan Gauër --- .../hlsl-intrinsics/group_memory_barrier_with_group_sync.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll index 6955411a0e4e9..e314361fe4181 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpMemoryModel Logical GLSL450 From 45f420e34476d2963e13b2f916be1e5a73ec95ae Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 30 Oct 2024 08:41:30 -0700 Subject: [PATCH 09/69] [lldb] Use Py_InitializeFromConfig with Python >= 3.8 (NFC) (#114112) This fixes the deprecation warning for Py_SetPythonHome, which was deprecated in Python 3.11. With this patch, when building against Python 3.8 or later, we now use Py_InitializeFromConfig instead. Fixes #113475 --- .../Python/ScriptInterpreterPython.cpp | 68 +++++++++++-------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 7cc38da6a6a94..6158083a98280 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -92,7 +92,38 @@ namespace { struct InitializePythonRAII { public: InitializePythonRAII() { - InitializePythonHome(); +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) + PyConfig config; + PyConfig_InitPythonConfig(&config); +#endif + +#if LLDB_EMBED_PYTHON_HOME + typedef wchar_t *str_type; + static str_type g_python_home = []() -> str_type { + const char *lldb_python_home = LLDB_PYTHON_HOME; + const char *absolute_python_home = nullptr; + llvm::SmallString<64> path; + if (llvm::sys::path::is_absolute(lldb_python_home)) { + absolute_python_home = lldb_python_home; + } else { + FileSpec spec = HostInfo::GetShlibDir(); + if (!spec) + return nullptr; + spec.GetPath(path); + llvm::sys::path::append(path, lldb_python_home); + absolute_python_home = path.c_str(); + } + size_t size = 0; + return Py_DecodeLocale(absolute_python_home, &size); + }(); + if (g_python_home != nullptr) { +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) + PyConfig_SetBytesString(&config, &config.home, g_python_home); +#else + Py_SetPythonHome(g_python_home); +#endif + } +#endif // The table of built-in modules can only be extended before Python is // initialized. @@ -117,15 +148,22 @@ struct InitializePythonRAII { PyImport_AppendInittab("_lldb", LLDBSwigPyInit); } +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) + config.install_signal_handlers = 0; + Py_InitializeFromConfig(&config); + PyConfig_Clear(&config); + InitializeThreadsPrivate(); +#else // Python < 3.2 and Python >= 3.2 reversed the ordering requirements for // calling `Py_Initialize` and `PyEval_InitThreads`. < 3.2 requires that you // call `PyEval_InitThreads` first, and >= 3.2 requires that you call it last. -#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 2) || (PY_MAJOR_VERSION > 3) +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 2) Py_InitializeEx(0); InitializeThreadsPrivate(); #else InitializeThreadsPrivate(); Py_InitializeEx(0); +#endif #endif } @@ -142,32 +180,6 @@ struct InitializePythonRAII { } private: - void InitializePythonHome() { -#if LLDB_EMBED_PYTHON_HOME - typedef wchar_t *str_type; - static str_type g_python_home = []() -> str_type { - const char *lldb_python_home = LLDB_PYTHON_HOME; - const char *absolute_python_home = nullptr; - llvm::SmallString<64> path; - if (llvm::sys::path::is_absolute(lldb_python_home)) { - absolute_python_home = lldb_python_home; - } else { - FileSpec spec = HostInfo::GetShlibDir(); - if (!spec) - return nullptr; - spec.GetPath(path); - llvm::sys::path::append(path, lldb_python_home); - absolute_python_home = path.c_str(); - } - size_t size = 0; - return Py_DecodeLocale(absolute_python_home, &size); - }(); - if (g_python_home != nullptr) { - Py_SetPythonHome(g_python_home); - } -#endif - } - void InitializeThreadsPrivate() { // Since Python 3.7 `Py_Initialize` calls `PyEval_InitThreads` inside itself, // so there is no way to determine whether the embedded interpreter From eac2c182c6f852fc187af9952250a43d6fb17b28 Mon Sep 17 00:00:00 2001 From: Adrian Prantl Date: Wed, 30 Oct 2024 08:59:08 -0700 Subject: [PATCH 10/69] Remove a flaky and unnecessary check (#114251) The order in which the libraries appear is not always stable and even if it were, this test is not the right place to check for this. --- .../libcxx/initializerlist/TestInitializerList.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py index 0919eb3c5dd81..93d5392830b50 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py @@ -40,5 +40,3 @@ def test(self): "frame variable ils", substrs=['[4] = "surprise it is a long string!! yay!!"'], ) - - self.expect("image list", substrs=self.getLibcPlusPlusLibs()) From 4b028773b2c977eb8494a39e4b3fb2f114d1e2b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Wed, 30 Oct 2024 17:03:17 +0100 Subject: [PATCH 11/69] Revert "[GlobalISel] Import samesign flag" (#114256) Reverts llvm/llvm-project#113090 --- .../CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 3 +- llvm/include/llvm/CodeGen/MachineInstr.h | 1 - llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 9 ++- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 5 +- llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 - llvm/lib/CodeGen/MIRParser/MILexer.h | 1 - llvm/lib/CodeGen/MIRParser/MIParser.cpp | 5 +- llvm/lib/CodeGen/MIRPrinter.cpp | 2 - llvm/lib/CodeGen/MachineInstr.cpp | 7 -- .../GlobalISel/irtranslator-samesign.ll | 69 ------------------- llvm/test/CodeGen/MIR/icmp-flags.mir | 50 -------------- 12 files changed, 11 insertions(+), 144 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll delete mode 100644 llvm/test/CodeGen/MIR/icmp-flags.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index cd7ebcf54c9e1..b6309a9ea0ec7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -28,7 +28,7 @@ namespace llvm { class GenericMachineInstr : public MachineInstr { constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs | SameSign; + FmNoNans | FmNoInfs; public: GenericMachineInstr() = delete; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index a38dd34a17097..c41e74ec7ebdc 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1266,8 +1266,7 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1, - std::optional Flags = std::nullopt); + const SrcOp &Op0, const SrcOp &Op1); /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1 /// diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index ead6bbe1d5f64..3605173247463 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -119,7 +119,6 @@ class MachineInstr Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. - SameSign = 1 << 21 // Both operands have the same sign. }; private: diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a87754389cc8e..5381dce58f9e6 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -340,17 +340,20 @@ bool IRTranslator::translateCompare(const User &U, Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI->getPredicate(); - uint32_t Flags = MachineInstr::copyFlagsFromInstruction(*CI); if (CmpInst::isIntPredicate(Pred)) - MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags); + MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); - else + else { + uint32_t Flags = 0; + if (CI) + Flags = MachineInstr::copyFlagsFromInstruction(*CI); MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); + } return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 15b9164247846..59f2fc633f5de 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -898,9 +898,8 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op, MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1, - std::optional Flags) { - return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}, Flags); + const SrcOp &Op1) { + return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}); } MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 1c450b05f49e9..5a3806ce57335 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -216,7 +216,6 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("exact", MIToken::kw_exact) .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) - .Case("samesign", MIToken::kw_samesign) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index d7cd06759cfbb..3931da3eaae1d 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -77,7 +77,6 @@ struct MIToken { kw_unpredictable, kw_nneg, kw_disjoint, - kw_samesign, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 059814c70f828..45847b5830da6 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1476,8 +1476,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_noconvergent) || Token.is(MIToken::kw_unpredictable) || Token.is(MIToken::kw_nneg) || - Token.is(MIToken::kw_disjoint) || - Token.is(MIToken::kw_samesign)) { + Token.is(MIToken::kw_disjoint)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1514,8 +1513,6 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NonNeg; if (Token.is(MIToken::kw_disjoint)) Flags |= MachineInstr::Disjoint; - if (Token.is(MIToken::kw_samesign)) - Flags |= MachineInstr::SameSign; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 658bbe0e577e5..a015cd3c2a55f 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -837,8 +837,6 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "disjoint "; if (MI.getFlag(MachineInstr::NoUSWrap)) OS << "nusw "; - if (MI.getFlag(MachineInstr::SameSign)) - OS << "samesign "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 941861da5c569..c1bd0bb5b7162 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -596,11 +596,6 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::Disjoint; } - // Copy the samesign flag. - if (const ICmpInst *ICmp = dyn_cast(&I)) - if (ICmp->hasSameSign()) - MIFlags |= MachineInstr::MIFlag::SameSign; - // Copy the exact flag. if (const PossiblyExactOperator *PE = dyn_cast(&I)) if (PE->isExact()) @@ -1775,8 +1770,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; - if (getFlag(MachineInstr::SameSign)) - OS << "samesign "; // Print the opcode name. if (TII) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll deleted file mode 100644 index 0173f92c98220..0000000000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-samesign.ll +++ /dev/null @@ -1,69 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator < %s | FileCheck %s - - -define <2 x i1> @call_icmp_samesign_vector(<2 x i32> %a, <2 x i32> %b) { - ; CHECK-LABEL: name: call_icmp_samesign_vector - ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; CHECK-NEXT: %2:_(<2 x s1>) = samesign G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT %2(<2 x s1>) - ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $d0 -entry: - %result = icmp samesign ult <2 x i32> %a, %b - ret <2 x i1> %result -} - -define <2 x i1> @call_icmp_vector(<2 x i32> %a, <2 x i32> %b) { - ; CHECK-LABEL: name: call_icmp_vector - ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s1>) = G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[ICMP]](<2 x s1>) - ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) - ; CHECK-NEXT: RET_ReallyLR implicit $d0 -entry: - %result = icmp ult <2 x i32> %a, %b - ret <2 x i1> %result -} - -define i1 @call_icmp(i32 %a) { - ; CHECK-LABEL: name: call_icmp - ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: RET_ReallyLR implicit $w0 -entry: - %result = icmp ult i32 %a, 3 - ret i1 %result -} - -define i1 @call_icmp_samesign(i32 %a) { - ; CHECK-LABEL: name: call_icmp_samesign - ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: %2:_(s1) = samesign G_ICMP intpred(ult), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %2(s1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: RET_ReallyLR implicit $w0 -entry: - %result = icmp samesign ult i32 %a, 3 - ret i1 %result -} diff --git a/llvm/test/CodeGen/MIR/icmp-flags.mir b/llvm/test/CodeGen/MIR/icmp-flags.mir deleted file mode 100644 index 3c03a7aaa9bcd..0000000000000 --- a/llvm/test/CodeGen/MIR/icmp-flags.mir +++ /dev/null @@ -1,50 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=none -verify-machineinstrs %s -o - | FileCheck %s - - ---- -name: icmp_samesign -body: | - bb.0: - liveins: $w0, $w1 - - ; CHECK-LABEL: name: icmp_samesign - ; CHECK: liveins: $w0, $w1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %x:_(s32) = COPY $w0 - ; CHECK-NEXT: %y:_(s32) = COPY $w1 - ; CHECK-NEXT: %cmp:_(s1) = samesign G_ICMP intpred(eq), %y(s32), %y - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) - ; CHECK-NEXT: $w0 = COPY %zext(s32) - ; CHECK-NEXT: RET_ReallyLR implicit $w0 - %x:_(s32) = COPY $w0 - %y:_(s32) = COPY $w1 - %cmp:_(s1) = samesign G_ICMP intpred(eq), %y:_(s32), %y:_ - %zext:_(s32) = G_ZEXT %cmp:_(s1) - $w0 = COPY %zext - RET_ReallyLR implicit $w0 - - -... ---- -name: icmp_differentsign -body: | - bb.0: - liveins: $w0, $w1 - - ; CHECK-LABEL: name: icmp_differentsign - ; CHECK: liveins: $w0, $w1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %x:_(s32) = COPY $w0 - ; CHECK-NEXT: %y:_(s32) = COPY $w1 - ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %y(s32), %y - ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) - ; CHECK-NEXT: $w0 = COPY %zext(s32) - ; CHECK-NEXT: RET_ReallyLR implicit $w0 - %x:_(s32) = COPY $w0 - %y:_(s32) = COPY $w1 - %cmp:_(s1) = G_ICMP intpred(eq), %y:_(s32), %y:_ - %zext:_(s32) = G_ZEXT %cmp:_(s1) - $w0 = COPY %zext - RET_ReallyLR implicit $w0 ---- From 8ee5e19c879ee2d467aa0f1eb8f1d8ed34321496 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Oct 2024 16:12:37 +0000 Subject: [PATCH 12/69] [AMDGPU] Fix @llvm.amdgcn.cs.chain with SGPR args not provably uniform (#114232) The correct behaviour is to insert a readfirstlane. SelectionDAG was already doing this in some cases, but not in the general case for chain calls. GlobalISel was already doing this for return values but not for arguments. --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 7 - llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +- .../irtranslator-amdgcn-cs-chain.ll | 36 ++- .../GlobalISel/irtranslator-call-non-fixed.ll | 9 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 96 ++++--- .../test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll | 73 ++++++ .../isel-amdgcn-cs-chain-intrinsic-w32.ll | 236 ++++++++++++------ .../isel-amdgcn-cs-chain-intrinsic-w64.ll | 236 ++++++++++++------ 8 files changed, 497 insertions(+), 208 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 351e9f25e29cf..ab62e530a18d0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -230,13 +230,6 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler { return AddrReg.getReg(0); } - void assignValueToReg(Register ValVReg, Register PhysReg, - const CCValAssign &VA) override { - MIB.addUse(PhysReg, RegState::Implicit); - Register ExtReg = extendRegisterMin32(*this, ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - } - void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA) override { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 52ca38aca5c77..059b415b75ff1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3855,10 +3855,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned ArgIdx = 0; for (auto [Reg, Val] : RegsToPass) { - if (ArgIdx++ >= NumSpecialInputs && !Val->isDivergent() && - TRI->isSGPRPhysReg(Reg)) { - // Speculatively insert a readfirstlane in case this is a uniform value in - // a VGPR. + if (ArgIdx++ >= NumSpecialInputs && + (IsChainCallConv || !Val->isDivergent()) && TRI->isSGPRPhysReg(Reg)) { + // For chain calls, the inreg arguments are required to be + // uniform. Speculatively Insert a readfirstlane in case we cannot prove + // they are uniform. + // + // For other calls, if an inreg arguments is known to be uniform, + // speculatively insert a readfirstlane in case it is in a VGPR. // // FIXME: We need to execute this in a waterfall loop if it is a divergent // value, so let that continue to produce invalid code. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index 3438cbdd476d8..4b0ff1b2eb470 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -24,9 +24,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -50,9 +53,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -82,9 +88,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -108,9 +117,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 5effd24a75208..adad38de380d7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -50,7 +50,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -99,8 +100,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index c3694158e7b97..96c3575e3190c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -942,7 +942,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -3984,8 +3985,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -5309,7 +5312,8 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5354,7 +5358,8 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5402,8 +5407,10 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5451,8 +5458,10 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5499,7 +5508,8 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5546,7 +5556,8 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5591,7 +5602,8 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5639,8 +5651,10 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5685,7 +5699,9 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5738,8 +5754,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5787,8 +5807,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5836,8 +5860,10 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5885,8 +5911,10 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5931,7 +5959,9 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](p3) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY9]](p3) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5983,10 +6013,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) @@ -6034,8 +6068,10 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 06f66e05d6747..8ca3e8255b634 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -501,6 +501,79 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { unreachable } +; Chain call with SGPR arguments that we cannot prove are uniform. +define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { ; GISEL-GFX11-LABEL: chain_to_chain: ; GISEL-GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll index 469d0453b9dfb..75616d276754c 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -613,9 +673,15 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY6]] @@ -623,8 +689,8 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -638,20 +704,26 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY7]] - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY9]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY12]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -734,9 +806,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY8]] @@ -758,15 +836,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY9]] - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY13]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll index 51c28a02b7f82..6deac9f55f320 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -20,9 +20,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +36,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +50,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +148,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +164,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +178,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +276,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +292,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +306,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +404,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +420,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +434,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +535,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +564,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -615,9 +675,15 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -625,8 +691,8 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY9]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY12]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -642,20 +708,26 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY13]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -744,9 +816,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY9]] @@ -770,15 +848,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY9]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY10]] - ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] + ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY14]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec From 1ddea4fc13eb12ddb4e71f7675a496de6d517ec4 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 30 Oct 2024 15:50:09 +0000 Subject: [PATCH 13/69] [AArch64] NFC: Refactoring of the SubRegIndexes in AArch64RegisterInfo.td This is just moving some of the definitions around to all have them in the same place. This is preparation for a follow-up patch that redefines the SubRegIndexes to require less bits, and to define the top bits of registers. --- .../lib/Target/AArch64/AArch64RegisterInfo.td | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 8516ab2c7dd71..4117d74d10c1e 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -19,18 +19,24 @@ class AArch64Reg enc, string n, list subregs = [], } let Namespace = "AArch64" in { + // SubRegIndexes for GPR registers def sub_32 : SubRegIndex<32>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; + // SubRegIndexes for FPR/Vector registers def bsub : SubRegIndex<8>; def hsub : SubRegIndex<16>; def ssub : SubRegIndex<32>; def dsub : SubRegIndex<64>; - def sube32 : SubRegIndex<32>; - def subo32 : SubRegIndex<32>; - def sube64 : SubRegIndex<64>; - def subo64 : SubRegIndex<64>; - // SVE - def zsub : SubRegIndex<128>; + def zsub : SubRegIndex<128>; + // Note: Code depends on these having consecutive numbers + def zsub0 : SubRegIndex<128, -1>; + def zsub1 : SubRegIndex<128, -1>; + def zsub2 : SubRegIndex<128, -1>; + def zsub3 : SubRegIndex<128, -1>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; def dsub1 : SubRegIndex<64>; @@ -41,7 +47,8 @@ let Namespace = "AArch64" in { def qsub1 : SubRegIndex<128>; def qsub2 : SubRegIndex<128>; def qsub3 : SubRegIndex<128>; - // Note: Code depends on these having consecutive numbers + + // SubRegIndexes for SME Matrix tiles def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits @@ -52,7 +59,11 @@ let Namespace = "AArch64" in { def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits - def psub : SubRegIndex<16>; + // SubRegIndexes for SVE Predicates + def psub : SubRegIndex<16>; + // Note: Code depends on these having consecutive numbers + def psub0 : SubRegIndex<16, -1>; + def psub1 : SubRegIndex<16, -1>; } let Namespace = "AArch64" in { @@ -1026,11 +1037,6 @@ def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PNR_p8to15>; def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PNR_p8to15>; def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PNR_p8to15>; -let Namespace = "AArch64" in { - def psub0 : SubRegIndex<16, -1>; - def psub1 : SubRegIndex<16, -1>; -} - class PPRorPNRClass : RegisterClass< "AArch64", [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, @@ -1123,8 +1129,7 @@ let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>", } // end let EncoderMethod/DecoderMethod -//****************************************************************************** - +//===----------------------------------------------------------------------===// // SVE vector register classes class ZPRClass : RegisterClass<"AArch64", [nxv16i8, nxv8i16, nxv4i32, nxv2i64, @@ -1245,13 +1250,6 @@ def FPR32asZPR : FPRasZPROperand<32>; def FPR64asZPR : FPRasZPROperand<64>; def FPR128asZPR : FPRasZPROperand<128>; -let Namespace = "AArch64" in { - def zsub0 : SubRegIndex<128, -1>; - def zsub1 : SubRegIndex<128, -1>; - def zsub2 : SubRegIndex<128, -1>; - def zsub3 : SubRegIndex<128, -1>; -} - // Pairs, triples, and quads of SVE vector registers. def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>; def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>; From 6bf4476ffb6bab661d59dee361ab845b2f68d9b1 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Oct 2024 16:18:29 +0000 Subject: [PATCH 14/69] [AMDGPU] Fix @llvm.amdgcn.cs.chain with callee not provably uniform (#114200) The correct behavior is to insert a readfirstlane. This worked except for an inappropriate assertion in SITargetLowering::LowerCall. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 - .../isel-amdgcn-cs-chain-intrinsic-w32.ll | 84 ++++++++++++++++++- .../isel-amdgcn-cs-chain-intrinsic-w64.ll | 83 +++++++++++++++++- 3 files changed, 165 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 059b415b75ff1..bddb6e822b81b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3897,9 +3897,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64)); } else { if (IsTailCall) { - assert(!Callee->isDivergent() && - "cannot tail call a divergent call target"); - // isEligibleForTailCallOptimization considered whether the call target is // divergent, but we may still end up with a uniform value in a VGPR. // Insert a readfirstlane just in case. diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll index 75616d276754c..c202476d85baf 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -6,7 +6,6 @@ declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) -declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: chain_to_chain @@ -660,6 +659,89 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, unreachable } +; Indirect with callee that we cannot prove is uniform. +define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 %vgpr) { + ; GISEL-GFX11-LABEL: name: nonuniform_callee + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; GISEL-GFX10-LABEL: name: nonuniform_callee + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: nonuniform_callee + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; DAGISEL-GFX10-LABEL: name: nonuniform_callee + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $vgpr8 + call void(ptr, i32, i32, i32, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 -1, i32 inreg %sgpr, i32 %vgpr, i32 0) + unreachable +} + define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: non_imm_exec ; GISEL-GFX11: bb.1 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll index 6deac9f55f320..a456f549174c9 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -6,7 +6,6 @@ declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) -declare void @llvm.amdgcn.cs.chain(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: chain_to_chain @@ -660,6 +659,88 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, unreachable } +; Indirect with callee that we cannot prove is uniform. +define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 %vgpr) { + ; GISEL-GFX11-LABEL: name: nonuniform_callee + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; GISEL-GFX10-LABEL: name: nonuniform_callee + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: nonuniform_callee + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; DAGISEL-GFX10-LABEL: name: nonuniform_callee + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $vgpr8 + call void(ptr, i64, i32, i32, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 -1, i32 inreg %sgpr, i32 %vgpr, i32 0) + unreachable +} define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: non_imm_exec ; GISEL-GFX11: bb.1 (%ir-block.0): From a575e6e5ca1eb7b2ae4b906f9bf3be2ba20a80a0 Mon Sep 17 00:00:00 2001 From: jimingham Date: Wed, 30 Oct 2024 09:25:47 -0700 Subject: [PATCH 15/69] Fix a couple of tests that were incorrectly using configuration.dwarf_version (#114161) The tests were using the variable directly to get the dwarf version used for the test. That's only the overridden value, and won't be set if we're using the compiler default. I also put a comment by the variable to make sure people don't make the same mistake in the future. --- lldb/packages/Python/lldbsuite/test/configuration.py | 4 ++++ lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py | 7 +++++-- lldb/test/API/python_api/type/TestTypeList.py | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 1bacd74a968c3..bcc179346836d 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -46,6 +46,10 @@ make_path = None # The overriden dwarf verison. +# Don't use this to test the current compiler's +# DWARF version, as this won't be set if the +# version isn't overridden. +# Use lldbplatformutils.getDwarfVersion() instead. dwarf_version = 0 # Any overridden settings. diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py index b5e8115160d20..41141164769ec 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py @@ -8,7 +8,7 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - +from lldbsuite.test import lldbplatformutil class NamespaceLookupTestCase(TestBase): def setUp(self): @@ -167,7 +167,10 @@ def test_scope_lookup_with_run_command(self): self.runToBkpt("continue") # FIXME: In DWARF 5 with dsyms, the ordering of functions is slightly # different, which also hits the same issues mentioned previously. - if configuration.dwarf_version <= 4 or self.getDebugInfo() == "dwarf": + if ( + int(lldbplatformutil.getDwarfVersion()) <= 4 + or self.getDebugInfo() == "dwarf" + ): self.expect_expr("func()", result_type="int", result_value="2") # Continue to BP_ns_scope at ns scope diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py index bc4d00c17c555..09879276b44aa 100644 --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -6,7 +6,7 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - +from lldbsuite.test import lldbplatformutil class TypeAndTypeListTestCase(TestBase): def setUp(self): @@ -248,7 +248,7 @@ def test(self): self.assertEqual(myint_arr_element_type, myint_type) # Test enum methods. Requires DW_AT_enum_class which was added in Dwarf 4. - if configuration.dwarf_version >= 4: + if int(lldbplatformutil.getDwarfVersion()) >= 4: enum_type = target.FindFirstType("EnumType") self.assertTrue(enum_type) self.DebugSBType(enum_type) From 9cd30b1ef311edb0aa0527bead52e2fc490160ef Mon Sep 17 00:00:00 2001 From: jimingham Date: Wed, 30 Oct 2024 09:26:37 -0700 Subject: [PATCH 16/69] Fix the sort function for languages to have "strict weak ordering". (#114160) If you build libstdc++ with "debug" strictness, the test TestTypeLookup.py will assert. That's because we're calling llvm::sort (which redirects to std::sort) with a function that doesn't obey strict weak ordering. The error was that when the two languages were equal, we're sometimes returning `true` but strict weak ordering requires that always be false. This patch just makes the function behave properly. --- lldb/source/Commands/CommandObjectType.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index f9786529bcdb1..e4c6e374446e8 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -2649,6 +2649,8 @@ class CommandObjectTypeLookup : public CommandObjectRaw { return false; LanguageType lt1 = lang1->GetLanguageType(); LanguageType lt2 = lang2->GetLanguageType(); + if (lt1 == lt2) + return false; if (lt1 == guessed_language) return true; // make the selected frame's language come first if (lt2 == guessed_language) From 7dbbd2b251412b7b0809aabe672f3f57f0805dbb Mon Sep 17 00:00:00 2001 From: jimingham Date: Wed, 30 Oct 2024 09:28:38 -0700 Subject: [PATCH 17/69] Fix call site breakpoint patch (#114158) This fixes the two test suite failures that I missed in the PR: https://github.com/llvm/llvm-project/pull/112939 One was a poorly written test case - it assumed that on connect to a gdb-remote with a running process, lldb MUST have fetched all the frame 0 registers. In fact, there's no need for it to do so (as the CallSite patch showed...) and if we don't need to we shouldn't. So I fixed the test to only expect a `g` packet AFTER calling read_registers. The other was a place where some code had used 0 when it meant LLDB_INVALID_LINE_NUMBER, which I had fixed but missed one place where it was still compared to 0. --- .../lldb/Breakpoint/BreakpointLocation.h | 36 ++++ lldb/include/lldb/Breakpoint/BreakpointSite.h | 5 + lldb/include/lldb/Core/Declaration.h | 6 +- lldb/include/lldb/Target/StopInfo.h | 12 ++ .../lldb/Target/ThreadPlanStepInRange.h | 4 +- lldb/source/Breakpoint/BreakpointLocation.cpp | 63 ++++++- lldb/source/Breakpoint/BreakpointResolver.cpp | 15 ++ lldb/source/Breakpoint/BreakpointSite.cpp | 17 ++ lldb/source/Core/Declaration.cpp | 5 +- lldb/source/Symbol/Block.cpp | 2 +- lldb/source/Symbol/CompileUnit.cpp | 113 +++++++++++- lldb/source/Target/StackFrameList.cpp | 171 ++++++------------ lldb/source/Target/StopInfo.cpp | 55 ++++++ lldb/source/Target/Thread.cpp | 8 + lldb/source/Target/ThreadPlanStepInRange.cpp | 24 ++- .../source/Target/ThreadPlanStepOverRange.cpp | 2 +- .../gdb_remote_client/TestGDBRemoteClient.py | 35 +++- .../inline-stepping/TestInlineStepping.py | 63 +++++++ .../inline-stepping/calling.cpp | 25 +++ 19 files changed, 525 insertions(+), 136 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index cca00335bc3c6..3592291bb2d06 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -11,10 +11,12 @@ #include #include +#include #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Breakpoint/StoppointHitCounter.h" #include "lldb/Core/Address.h" +#include "lldb/Symbol/LineEntry.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-private.h" @@ -282,6 +284,25 @@ class BreakpointLocation /// Returns the breakpoint location ID. lldb::break_id_t GetID() const { return m_loc_id; } + /// Set the line entry that should be shown to users for this location. + /// It is up to the caller to verify that this is a valid entry to show. + /// The current use of this is to distinguish among line entries from a + /// virtual inlined call stack that all share the same address. + /// The line entry must have the same start address as the address for this + /// location. + bool SetPreferredLineEntry(const LineEntry &line_entry) { + if (m_address == line_entry.range.GetBaseAddress()) { + m_preferred_line_entry = line_entry; + return true; + } + assert(0 && "Tried to set a preferred line entry with a different address"); + return false; + } + + const std::optional GetPreferredLineEntry() { + return m_preferred_line_entry; + } + protected: friend class BreakpointSite; friend class BreakpointLocationList; @@ -306,6 +327,16 @@ class BreakpointLocation /// If it returns false we should continue, otherwise stop. bool IgnoreCountShouldStop(); + /// If this location knows that the virtual stack frame it represents is + /// not frame 0, return the suggested stack frame instead. This will happen + /// when the location's address contains a "virtual inlined call stack" and + /// the breakpoint was set on a file & line that are not at the bottom of that + /// stack. For now we key off the "preferred line entry" - looking for that + /// in the blocks that start with the stop PC. + /// This version of the API doesn't take an "inlined" parameter because it + /// only changes frames in the inline stack. + std::optional GetSuggestedStackFrameIndex(); + private: void SwapLocation(lldb::BreakpointLocationSP swap_from); @@ -369,6 +400,11 @@ class BreakpointLocation lldb::break_id_t m_loc_id; ///< Breakpoint location ID. StoppointHitCounter m_hit_counter; ///< Number of times this breakpoint /// location has been hit. + /// If this exists, use it to print the stop description rather than the + /// LineEntry m_address resolves to directly. Use this for instance when the + /// location was given somewhere in the virtual inlined call stack since the + /// Address always resolves to the lowest entry in the stack. + std::optional m_preferred_line_entry; void SetShouldResolveIndirectFunctions(bool do_resolve) { m_should_resolve_indirect_functions = do_resolve; diff --git a/lldb/include/lldb/Breakpoint/BreakpointSite.h b/lldb/include/lldb/Breakpoint/BreakpointSite.h index 17b76d51c1ae5..7b3f7be23639f 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointSite.h +++ b/lldb/include/lldb/Breakpoint/BreakpointSite.h @@ -170,6 +170,11 @@ class BreakpointSite : public std::enable_shared_from_this, /// \see lldb::DescriptionLevel void GetDescription(Stream *s, lldb::DescriptionLevel level); + // This runs through all the breakpoint locations owning this site and returns + // the greatest of their suggested stack frame indexes. This only handles + // inlined stack changes. + std::optional GetSuggestedStackFrameIndex(); + /// Tell whether a breakpoint has a location at this site. /// /// \param[in] bp_id diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index 4a0e9047b5469..c864b88c6b32a 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -84,10 +84,14 @@ class Declaration { /// \param[in] declaration /// The const Declaration object to compare with. /// + /// \param[in] full + /// Same meaning as Full in FileSpec::Equal. True means an empty + /// directory is not equal to a specified one, false means it is equal. + /// /// \return /// Returns \b true if \b declaration is at the same file and /// line, \b false otherwise. - bool FileAndLineEqual(const Declaration &declaration) const; + bool FileAndLineEqual(const Declaration &declaration, bool full) const; /// Dump a description of this object to a Stream. /// diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index fae90364deaf0..45beac129e86f 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -77,6 +77,18 @@ class StopInfo : public std::enable_shared_from_this { m_description.clear(); } + /// This gives the StopInfo a chance to suggest a stack frame to select. + /// Passing true for inlined_stack will request changes to the inlined + /// call stack. Passing false will request changes to the real stack + /// frame. The inlined stack gets adjusted before we call into the thread + /// plans so they can reason based on the correct values. The real stack + /// adjustment is handled after the frame recognizers get a chance to adjust + /// the frame. + virtual std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) { + return {}; + } + virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } /// A Continue operation can result in a false stop event diff --git a/lldb/include/lldb/Target/ThreadPlanStepInRange.h b/lldb/include/lldb/Target/ThreadPlanStepInRange.h index f9ef87942a7c0..9da8370ef1c92 100644 --- a/lldb/include/lldb/Target/ThreadPlanStepInRange.h +++ b/lldb/include/lldb/Target/ThreadPlanStepInRange.h @@ -80,8 +80,8 @@ class ThreadPlanStepInRange : public ThreadPlanStepRange, bool m_step_past_prologue; // FIXME: For now hard-coded to true, we could put // a switch in for this if there's // demand for that. - bool m_virtual_step; // true if we've just done a "virtual step", i.e. just - // moved the inline stack depth. + LazyBool m_virtual_step; // true if we've just done a "virtual step", i.e. + // just moved the inline stack depth. ConstString m_step_into_target; ThreadPlanStepInRange(const ThreadPlanStepInRange &) = delete; const ThreadPlanStepInRange & diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index ad9057c8141e9..c7ea50407ae1c 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -508,8 +508,20 @@ void BreakpointLocation::GetDescription(Stream *s, s->PutCString("re-exported target = "); else s->PutCString("where = "); + + // If there's a preferred line entry for printing, use that. + bool show_function_info = true; + if (auto preferred = GetPreferredLineEntry()) { + sc.line_entry = *preferred; + // FIXME: We're going to get the function name wrong when the preferred + // line entry is not the lowest one. For now, just leave the function + // out in this case, but we really should also figure out how to easily + // fake the function name here. + show_function_info = false; + } sc.DumpStopContext(s, m_owner.GetTarget().GetProcessSP().get(), m_address, - false, true, false, true, true, true); + false, true, false, show_function_info, + show_function_info, show_function_info); } else { if (sc.module_sp) { s->EOL(); @@ -537,7 +549,10 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.line_entry.line > 0) { s->EOL(); s->Indent("location = "); - sc.line_entry.DumpStopContext(s, true); + if (auto preferred = GetPreferredLineEntry()) + preferred->DumpStopContext(s, true); + else + sc.line_entry.DumpStopContext(s, true); } } else { @@ -656,6 +671,50 @@ void BreakpointLocation::SendBreakpointLocationChangedEvent( } } +std::optional BreakpointLocation::GetSuggestedStackFrameIndex() { + auto preferred_opt = GetPreferredLineEntry(); + if (!preferred_opt) + return {}; + LineEntry preferred = *preferred_opt; + SymbolContext sc; + if (!m_address.CalculateSymbolContext(&sc)) + return {}; + // Don't return anything special if frame 0 is the preferred line entry. + // We not really telling the stack frame list to do anything special in that + // case. + if (!LineEntry::Compare(sc.line_entry, preferred)) + return {}; + + if (!sc.block) + return {}; + + // Blocks have their line info in Declaration form, so make one here: + Declaration preferred_decl(preferred.GetFile(), preferred.line, + preferred.column); + + uint32_t depth = 0; + Block *inlined_block = sc.block->GetContainingInlinedBlock(); + while (inlined_block) { + // If we've moved to a block that this isn't the start of, that's not + // our inlining info or call site, so we can stop here. + Address start_address; + if (!inlined_block->GetStartAddress(start_address) || + start_address != m_address) + return {}; + + const InlineFunctionInfo *info = inlined_block->GetInlinedFunctionInfo(); + if (info) { + if (preferred_decl == info->GetDeclaration()) + return depth; + if (preferred_decl == info->GetCallSite()) + return depth + 1; + } + inlined_block = inlined_block->GetInlinedParent(); + depth++; + } + return {}; +} + void BreakpointLocation::SwapLocation(BreakpointLocationSP swap_from) { m_address = swap_from->m_address; m_should_resolve_indirect_functions = diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 8307689c7640c..9643602d78c75 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -340,6 +340,21 @@ void BreakpointResolver::AddLocation(SearchFilter &filter, } BreakpointLocationSP bp_loc_sp(AddLocation(line_start)); + // If the address that we resolved the location to returns a different + // LineEntry from the one in the incoming SC, we're probably dealing with an + // inlined call site, so set that as the preferred LineEntry: + LineEntry resolved_entry; + if (!skipped_prologue && bp_loc_sp && + line_start.CalculateSymbolContextLineEntry(resolved_entry) && + LineEntry::Compare(resolved_entry, sc.line_entry)) { + // FIXME: The function name will also be wrong here. Do we need to record + // that as well, or can we figure that out again when we report this + // breakpoint location. + if (!bp_loc_sp->SetPreferredLineEntry(sc.line_entry)) { + LLDB_LOG(log, "Tried to add a preferred line entry that didn't have the " + "same address as this location's address."); + } + } if (log && bp_loc_sp && !GetBreakpoint()->IsInternal()) { StreamString s; bp_loc_sp->GetDescription(&s, lldb::eDescriptionLevelVerbose); diff --git a/lldb/source/Breakpoint/BreakpointSite.cpp b/lldb/source/Breakpoint/BreakpointSite.cpp index 3ca93f908e30b..9700a57d3346e 100644 --- a/lldb/source/Breakpoint/BreakpointSite.cpp +++ b/lldb/source/Breakpoint/BreakpointSite.cpp @@ -87,6 +87,23 @@ void BreakpointSite::GetDescription(Stream *s, lldb::DescriptionLevel level) { m_constituents.GetDescription(s, level); } +std::optional BreakpointSite::GetSuggestedStackFrameIndex() { + + std::optional result; + std::lock_guard guard(m_constituents_mutex); + for (BreakpointLocationSP loc_sp : m_constituents.BreakpointLocations()) { + std::optional loc_frame_index = + loc_sp->GetSuggestedStackFrameIndex(); + if (loc_frame_index) { + if (result) + result = std::max(*loc_frame_index, *result); + else + result = loc_frame_index; + } + } + return result; +} + bool BreakpointSite::IsInternal() const { return m_constituents.IsInternal(); } uint8_t *BreakpointSite::GetTrapOpcodeBytes() { return &m_trap_opcode[0]; } diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index 579a3999d14ea..a485c4b9ba48a 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -70,8 +70,9 @@ int Declaration::Compare(const Declaration &a, const Declaration &b) { return 0; } -bool Declaration::FileAndLineEqual(const Declaration &declaration) const { - int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, true); +bool Declaration::FileAndLineEqual(const Declaration &declaration, + bool full) const { + int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, full); return file_compare == 0 && this->m_line == declaration.m_line; } diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index f7d9c0d2d3306..5c7772a6db780 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite( const auto *function_info = inlined_block->GetInlinedFunctionInfo(); if (function_info && - function_info->GetCallSite().FileAndLineEqual(find_call_site)) + function_info->GetCallSite().FileAndLineEqual(find_call_site, true)) return inlined_block; inlined_block = inlined_block->GetInlinedParent(); } diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index db8f8ce6bcbc9..73389b2e8479b 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -251,7 +251,10 @@ void CompileUnit::ResolveSymbolContext( SymbolContextItem resolve_scope, SymbolContextList &sc_list, RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); - const uint32_t line = src_location_spec.GetLine().value_or(0); + const uint32_t line = + src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER); + const uint32_t column_num = + src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER); const bool check_inlines = src_location_spec.GetCheckInlines(); // First find all of the file indexes that match our "file_spec". If @@ -268,7 +271,7 @@ void CompileUnit::ResolveSymbolContext( SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line == 0) { + if (line == LLDB_INVALID_LINE_NUMBER) { if (file_spec_matches_cu_file_spec && !check_inlines) { // only append the context if we aren't looking for inline call sites by // file and line and if the file spec matches that of the compile unit @@ -312,6 +315,112 @@ void CompileUnit::ResolveSymbolContext( 0, file_indexes, src_location_spec, &line_entry); } + // If we didn't manage to find a breakpoint that matched the line number + // requested, that might be because it is only an inline call site, and + // doesn't have a line entry in the line table. Scan for that here. + // + // We are making the assumption that if there was an inlined function it will + // contribute at least 1 non-call-site entry to the line table. That's handy + // because we don't move line breakpoints over function boundaries, so if we + // found a hit, and there were also a call site entry, it would have to be in + // the function containing the PC of the line table match. That way we can + // limit the call site search to that function. + // We will miss functions that ONLY exist as a call site entry. + + if (line_entry.IsValid() && + (line_entry.line != line || line_entry.column != column_num) && + resolve_scope & eSymbolContextLineEntry && check_inlines) { + // We don't move lines over function boundaries, so the address in the + // line entry will be the in function that contained the line that might + // be a CallSite, and we can just iterate over that function to find any + // inline records, and dig up their call sites. + Address start_addr = line_entry.range.GetBaseAddress(); + Function *function = start_addr.CalculateSymbolContextFunction(); + + Declaration sought_decl(file_spec, line, column_num); + // We use this recursive function to descend the block structure looking + // for a block that has this Declaration as in it's CallSite info. + // This function recursively scans the sibling blocks of the incoming + // block parameter. + std::function examine_block = + [&sought_decl, &sc_list, &src_location_spec, resolve_scope, + &examine_block](Block &block) -> void { + // Iterate over the sibling child blocks of the incoming block. + Block *sibling_block = block.GetFirstChild(); + while (sibling_block) { + // We only have to descend through the regular blocks, looking for + // immediate inlines, since those are the only ones that will have this + // callsite. + const InlineFunctionInfo *inline_info = + sibling_block->GetInlinedFunctionInfo(); + if (inline_info) { + // If this is the call-site we are looking for, record that: + // We need to be careful because the call site from the debug info + // will generally have a column, but the user might not have specified + // it. + Declaration found_decl = inline_info->GetCallSite(); + uint32_t sought_column = sought_decl.GetColumn(); + if (found_decl.FileAndLineEqual(sought_decl, false) && + (sought_column == LLDB_INVALID_COLUMN_NUMBER || + sought_column == found_decl.GetColumn())) { + // If we found a call site, it belongs not in this inlined block, + // but in the parent block that inlined it. + Address parent_start_addr; + if (sibling_block->GetParent()->GetStartAddress( + parent_start_addr)) { + SymbolContext sc; + parent_start_addr.CalculateSymbolContext(&sc, resolve_scope); + // Now swap out the line entry for the one we found. + LineEntry call_site_line = sc.line_entry; + call_site_line.line = found_decl.GetLine(); + call_site_line.column = found_decl.GetColumn(); + bool matches_spec = true; + // If the user asked for an exact match, we need to make sure the + // call site we found actually matches the location. + if (src_location_spec.GetExactMatch()) { + matches_spec = false; + if ((src_location_spec.GetFileSpec() == + sc.line_entry.GetFile()) && + (src_location_spec.GetLine() && + *src_location_spec.GetLine() == call_site_line.line) && + (src_location_spec.GetColumn() && + *src_location_spec.GetColumn() == call_site_line.column)) + matches_spec = true; + } + if (matches_spec && + sibling_block->GetRangeAtIndex(0, call_site_line.range)) { + SymbolContext call_site_sc(sc.target_sp, sc.module_sp, + sc.comp_unit, sc.function, sc.block, + &call_site_line, sc.symbol); + sc_list.Append(call_site_sc); + } + } + } + } + + // Descend into the child blocks: + examine_block(*sibling_block); + // Now go to the next sibling: + sibling_block = sibling_block->GetSibling(); + } + }; + + if (function) { + // We don't need to examine the function block, it can't be inlined. + Block &func_block = function->GetBlock(true); + examine_block(func_block); + } + // If we found entries here, we are done. We only get here because we + // didn't find an exact line entry for this line & column, but if we found + // an exact match from the call site info that's strictly better than + // continuing to look for matches further on in the file. + // FIXME: Should I also do this for "call site line exists between the + // given line number and the later line we found in the line table"? That's + // a closer approximation to our general sliding algorithm. + if (sc_list.GetSize()) + return; + } + // If "exact == true", then "found_line" will be the same as "line". If // "exact == false", the "found_line" will be the closest line entry // with a line number greater than "line" and we will use this for our diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 3849ec5ed178d..94a381edd5e20 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -85,121 +85,32 @@ void StackFrameList::ResetCurrentInlinedDepth() { return; std::lock_guard guard(m_mutex); - - GetFramesUpTo(0, DoNotAllowInterruption); - if (m_frames.empty()) - return; - if (!m_frames[0]->IsInlined()) { - m_current_inlined_depth = UINT32_MAX; - m_current_inlined_pc = LLDB_INVALID_ADDRESS; - Log *log = GetLog(LLDBLog::Step); - if (log && log->GetVerbose()) - LLDB_LOGF( - log, - "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); - return; - } - // We only need to do something special about inlined blocks when we are - // at the beginning of an inlined function: - // FIXME: We probably also have to do something special if the PC is at - // the END of an inlined function, which coincides with the end of either - // its containing function or another inlined function. - - Block *block_ptr = m_frames[0]->GetFrameBlock(); - if (!block_ptr) - return; + m_current_inlined_pc = LLDB_INVALID_ADDRESS; + m_current_inlined_depth = UINT32_MAX; - Address pc_as_address; - lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC(); - pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget())); - AddressRange containing_range; - if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) || - pc_as_address != containing_range.GetBaseAddress()) - return; - - // If we got here because of a breakpoint hit, then set the inlined depth - // depending on where the breakpoint was set. If we got here because of a - // crash, then set the inlined depth to the deepest most block. Otherwise, - // we stopped here naturally as the result of a step, so set ourselves in the - // containing frame of the whole set of nested inlines, so the user can then - // "virtually" step into the frames one by one, or next over the whole mess. - // Note: We don't have to handle being somewhere in the middle of the stack - // here, since ResetCurrentInlinedDepth doesn't get called if there is a - // valid inlined depth set. StopInfoSP stop_info_sp = m_thread.GetStopInfo(); if (!stop_info_sp) return; - switch (stop_info_sp->GetStopReason()) { - case eStopReasonWatchpoint: - case eStopReasonException: - case eStopReasonExec: - case eStopReasonFork: - case eStopReasonVFork: - case eStopReasonVForkDone: - case eStopReasonSignal: - // In all these cases we want to stop in the deepest frame. - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - case eStopReasonBreakpoint: { - // FIXME: Figure out what this break point is doing, and set the inline - // depth appropriately. Be careful to take into account breakpoints that - // implement step over prologue, since that should do the default - // calculation. For now, if the breakpoints corresponding to this hit are - // all internal, I set the stop location to the top of the inlined stack, - // since that will make things like stepping over prologues work right. - // But if there are any non-internal breakpoints I do to the bottom of the - // stack, since that was the old behavior. - uint32_t bp_site_id = stop_info_sp->GetValue(); - BreakpointSiteSP bp_site_sp( - m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id)); - bool all_internal = true; - if (bp_site_sp) { - uint32_t num_owners = bp_site_sp->GetNumberOfConstituents(); - for (uint32_t i = 0; i < num_owners; i++) { - Breakpoint &bp_ref = - bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint(); - if (!bp_ref.IsInternal()) { - all_internal = false; - } - } - } - if (!all_internal) { - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - } - } - [[fallthrough]]; - default: { - // Otherwise, we should set ourselves at the container of the inlining, so - // that the user can descend into them. So first we check whether we have - // more than one inlined block sharing this PC: - int num_inlined_functions = 0; - - for (Block *container_ptr = block_ptr->GetInlinedParent(); - container_ptr != nullptr; - container_ptr = container_ptr->GetInlinedParent()) { - if (!container_ptr->GetRangeContainingAddress(pc_as_address, - containing_range)) - break; - if (pc_as_address != containing_range.GetBaseAddress()) - break; - num_inlined_functions++; - } - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = num_inlined_functions + 1; - Log *log = GetLog(LLDBLog::Step); + bool inlined = true; + auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined); + // We're only adjusting the inlined stack here. + Log *log = GetLog(LLDBLog::Step); + if (inline_depth) { + m_current_inlined_depth = *inline_depth; + m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC(); + if (log && log->GetVerbose()) LLDB_LOGF(log, "ResetCurrentInlinedDepth: setting inlined " "depth: %d 0x%" PRIx64 ".\n", - m_current_inlined_depth, curr_pc); - - break; - } + m_current_inlined_depth, m_current_inlined_pc); + } else { + if (log && log->GetVerbose()) + LLDB_LOGF( + log, + "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); } } @@ -816,19 +727,48 @@ void StackFrameList::SelectMostRelevantFrame() { RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame(); - if (!recognized_frame_sp) { - LLDB_LOG(log, "Frame #0 not recognized"); - return; + if (recognized_frame_sp) { + if (StackFrameSP most_relevant_frame_sp = + recognized_frame_sp->GetMostRelevantFrame()) { + LLDB_LOG(log, "Found most relevant frame at index {0}", + most_relevant_frame_sp->GetFrameIndex()); + SetSelectedFrame(most_relevant_frame_sp.get()); + return; + } } + LLDB_LOG(log, "Frame #0 not recognized"); - if (StackFrameSP most_relevant_frame_sp = - recognized_frame_sp->GetMostRelevantFrame()) { - LLDB_LOG(log, "Found most relevant frame at index {0}", - most_relevant_frame_sp->GetFrameIndex()); - SetSelectedFrame(most_relevant_frame_sp.get()); - } else { - LLDB_LOG(log, "No relevant frame!"); + // If this thread has a non-trivial StopInof, then let it suggest + // a most relevant frame: + StopInfoSP stop_info_sp = m_thread.GetStopInfo(); + uint32_t stack_idx = 0; + bool found_relevant = false; + if (stop_info_sp) { + // Here we're only asking the stop info if it wants to adjust the real stack + // index. We have to ask about the m_inlined_stack_depth in + // Thread::ShouldStop since the plans need to reason with that info. + bool inlined = false; + std::optional stack_opt = + stop_info_sp->GetSuggestedStackFrameIndex(inlined); + if (stack_opt) { + stack_idx = *stack_opt; + found_relevant = true; + } } + + frame_sp = GetFrameAtIndex(stack_idx); + if (!frame_sp) + LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist", + stack_idx); + else if (found_relevant) + LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx); + // Note, we don't have to worry about "inlined" frames here, because we've + // already calculated the inlined frame in Thread::ShouldStop, and + // SetSelectedFrame will take care of that adjustment for us. + SetSelectedFrame(frame_sp.get()); + + if (!found_relevant) + LLDB_LOG(log, "No relevant frame!"); } uint32_t StackFrameList::GetSelectedFrameIndex( @@ -841,6 +781,7 @@ uint32_t StackFrameList::GetSelectedFrameIndex( // isn't set, then don't force a selection here, just return 0. if (!select_most_relevant) return 0; + // If the inlined stack frame is set, then use that: m_selected_frame_idx = 0; } return *m_selected_frame_idx; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 60aa65ed38c74..f6387d47504e6 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -15,6 +15,7 @@ #include "lldb/Breakpoint/WatchpointResource.h" #include "lldb/Core/Debugger.h" #include "lldb/Expression/UserExpression.h" +#include "lldb/Symbol/Block.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -246,6 +247,22 @@ class StopInfoBreakpoint : public StopInfo { return m_description.c_str(); } + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + if (!inlined_stack) + return {}; + + ThreadSP thread_sp(m_thread_wp.lock()); + if (!thread_sp) + return {}; + BreakpointSiteSP bp_site_sp( + thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + if (!bp_site_sp) + return {}; + + return bp_site_sp->GetSuggestedStackFrameIndex(); + } + protected: bool ShouldStop(Event *event_ptr) override { // This just reports the work done by PerformAction or the synchronous @@ -1164,6 +1181,44 @@ class StopInfoTrace : public StopInfo { else return m_description.c_str(); } + + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + // Trace only knows how to adjust inlined stacks: + if (!inlined_stack) + return {}; + + ThreadSP thread_sp = GetThread(); + StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0); + if (!frame_0_sp) + return {}; + if (!frame_0_sp->IsInlined()) + return {}; + Block *block_ptr = frame_0_sp->GetFrameBlock(); + if (!block_ptr) + return {}; + Address pc_address = frame_0_sp->GetFrameCodeAddress(); + AddressRange containing_range; + if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) || + pc_address != containing_range.GetBaseAddress()) + return {}; + + int num_inlined_functions = 0; + + for (Block *container_ptr = block_ptr->GetInlinedParent(); + container_ptr != nullptr; + container_ptr = container_ptr->GetInlinedParent()) { + if (!container_ptr->GetRangeContainingAddress(pc_address, + containing_range)) + break; + if (pc_address != containing_range.GetBaseAddress()) + break; + + num_inlined_functions++; + } + inlined_stack = true; + return num_inlined_functions + 1; + } }; // StopInfoException diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 8373cdc36268f..735295e6f2593 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -619,6 +619,14 @@ void Thread::WillStop() { void Thread::SetupForResume() { if (GetResumeState() != eStateSuspended) { + // First check whether this thread is going to "actually" resume at all. + // For instance, if we're stepping from one level to the next of an + // virtual inlined call stack, we just change the inlined call stack index + // without actually running this thread. In that case, for this thread we + // shouldn't push a step over breakpoint plan or do that work. + if (GetCurrentPlan()->IsVirtualStep()) + return; + // If we're at a breakpoint push the step-over breakpoint plan. Do this // before telling the current plan it will resume, since we might change // what the current plan is. diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 567dcc26d0d37..325a70619908b 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange( "Step Range stepping in", thread, range, addr_context, stop_others), ThreadPlanShouldStopHere(this), m_step_past_prologue(true), - m_virtual_step(false), m_step_into_target(step_into_target) { + m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) { SetCallbacks(); SetFlagsToDefault(); SetupAvoidNoDebug(step_in_avoids_code_without_debug_info, @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) { m_sub_plan_sp.reset(); } - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { // If we've just completed a virtual step, all we need to do is check for a // ShouldStopHere plan, and otherwise we're done. // FIXME - This can be both a step in and a step out. Probably should @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool return_value = false; - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { return_value = true; } else { StopInfoSP stop_info_sp = GetPrivateStopInfo(); @@ -460,10 +460,13 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, bool current_plan) { - m_virtual_step = false; + m_virtual_step = eLazyBoolCalculate; if (resume_state == eStateStepping && current_plan) { Thread &thread = GetThread(); // See if we are about to step over a virtual inlined call. + // But if we already know we're virtual stepping, don't decrement the + // inlined depth again... + bool step_without_resume = thread.DecrementCurrentInlinedDepth(); if (step_without_resume) { Log *log = GetLog(LLDBLog::Step); @@ -476,11 +479,20 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, // FIXME: Maybe it would be better to create a InlineStep stop reason, but // then // the whole rest of the world would have to handle that stop reason. - m_virtual_step = true; + m_virtual_step = eLazyBoolYes; } return !step_without_resume; } return true; } -bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; } +bool ThreadPlanStepInRange::IsVirtualStep() { + if (m_virtual_step == eLazyBoolCalculate) { + Thread &thread = GetThread(); + if (thread.GetCurrentInlinedDepth() == UINT32_MAX) + m_virtual_step = eLazyBoolNo; + else + m_virtual_step = eLazyBoolYes; + } + return m_virtual_step == eLazyBoolYes; +} diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp index ef5b4b5c434d1..643ee827c865c 100644 --- a/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state, if (in_inlined_stack) { Log *log = GetLog(LLDBLog::Step); LLDB_LOGF(log, - "ThreadPlanStepInRange::DoWillResume: adjusting range to " + "ThreadPlanStepOverRange::DoWillResume: adjusting range to " "the frame at inlined depth %d.", thread.GetCurrentInlinedDepth()); StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0); diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py index 5eb3fc3cada92..08ac9290ee85a 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py @@ -132,12 +132,39 @@ def test_read_registers_using_g_packets(self): target = self.createTarget("a.yaml") process = self.connect(target) - self.assertEqual(1, self.server.responder.packetLog.count("g")) - self.server.responder.packetLog = [] + # We want to make sure that the process is using the g packet, but it's + # not required the "connect" should read all registers. However, it might + # have... So we need to wait till we explicitly 'read_registers' to do + # test. + # Also, even with the use-g-packet-for-reading lldb will sometimes send p0 + # early on to see if the packet is supported. So we can't say that there + # will be NO p packets. + # But there certainly should be no p packets after the g packet. + self.read_registers(process) - # Reading registers should not cause any 'p' packets to be exchanged. + print(f"\nPACKET LOG:\n{self.server.responder.packetLog}\n") + g_pos = 0 + try: + g_pos = self.server.responder.packetLog.index("g") + except err: + self.fail("'g' packet not found after fetching registers") + + try: + second_g = self.server.responder.packetLog.index("g", g_pos) + self.fail("Found more than one 'g' packet") + except: + pass + + # Make sure there aren't any `p` packets after the `g` packet: self.assertEqual( - 0, len([p for p in self.server.responder.packetLog if p.startswith("p")]) + 0, + len( + [ + p + for p in self.server.responder.packetLog[g_pos:] + if p.startswith("p") + ] + ), ) def test_read_registers_using_p_packets(self): diff --git a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py index 752c3a9cbd286..f52e0f0fd5bcf 100644 --- a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py @@ -32,6 +32,12 @@ def test_step_in_template_with_python_api(self): self.build() self.step_in_template() + @add_test_categories(["pyapi"]) + def test_virtual_inline_stepping(self): + """Test stepping through a virtual inlined call stack""" + self.build() + self.virtual_inline_stepping() + def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -357,3 +363,60 @@ def step_in_template(self): step_sequence = [["// In max_value specialized", "into"]] self.run_step_sequence(step_sequence) + + def run_to_call_site_and_step(self, source_regex, func_name, start_pos): + main_spec = lldb.SBFileSpec("calling.cpp") + # Set the breakpoint by file and line, not sourced regex because + # we want to make sure we can set breakpoints on call sites: + call_site_line_num = line_number(self.main_source, source_regex) + target, process, thread, bkpt = lldbutil.run_to_line_breakpoint( + self, main_spec, call_site_line_num + ) + + # Make sure that the location is at the call site (run_to_line_breakpoint already asserted + # that there's one location.): + bkpt_loc = bkpt.location[0] + strm = lldb.SBStream() + result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull) + + self.assertTrue(result, "Got a location description") + desc = strm.GetData() + self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed") + # We don't get the function name right yet - so we omit it in printing. + # Turn on this test when that is working. + # self.assertIn(func_name, desc, "Right function listed") + + pc = thread.frame[0].pc + for i in range(start_pos, 3): + thread.StepInto() + frame_0 = thread.frame[0] + + trivial_line_num = line_number( + self.main_source, f"In caller_trivial_inline_{i}." + ) + self.assertEqual( + frame_0.line_entry.line, + trivial_line_num, + f"Stepped into the caller_trivial_inline_{i}", + ) + if pc != frame_0.pc: + # If we get here, we stepped to the expected line number, but + # the compiler on this system has decided to insert an instruction + # between the call site of an inlined function with no arguments, + # returning void, and its immediate call to another void inlined function + # with no arguments. We aren't going to be testing virtual inline + # stepping for this function... + break + + process.Kill() + target.Clear() + + def virtual_inline_stepping(self): + """Use the Python API's to step through a virtual inlined stack""" + self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1) + self.run_to_call_site_and_step( + "In caller_trivial_inline_1", "caller_trivial_inline_1", 2 + ) + self.run_to_call_site_and_step( + "In caller_trivial_inline_2", "caller_trivial_inline_2", 3 + ) diff --git a/lldb/test/API/functionalities/inline-stepping/calling.cpp b/lldb/test/API/functionalities/inline-stepping/calling.cpp index 49179ce7c9788..d7ee56b3c0790 100644 --- a/lldb/test/API/functionalities/inline-stepping/calling.cpp +++ b/lldb/test/API/functionalities/inline-stepping/calling.cpp @@ -13,6 +13,12 @@ int called_by_inline_ref (int &value); inline void inline_trivial_1 () __attribute__((always_inline)); inline void inline_trivial_2 () __attribute__((always_inline)); +// These three should share the same initial pc so we can test +// virtual inline stepping. +inline void caller_trivial_inline_1() __attribute__((always_inline)); +inline void caller_trivial_inline_2() __attribute__((always_inline)); +inline void caller_trivial_inline_3() __attribute__((always_inline)); + void caller_trivial_1 (); void caller_trivial_2 (); @@ -79,6 +85,23 @@ caller_trivial_2 () inline_value += 1; // At increment in caller_trivial_2. } +// When you call caller_trivial_inline_1, the inlined call-site +// should share a PC with all three of the following inlined +// functions, so we can exercise "virtual inline stepping". +void caller_trivial_inline_1() { + caller_trivial_inline_2(); // In caller_trivial_inline_1. + inline_value += 1; +} + +void caller_trivial_inline_2() { + caller_trivial_inline_3(); // In caller_trivial_inline_2. + inline_value += 1; +} + +void caller_trivial_inline_3() { + inline_value += 1; // In caller_trivial_inline_3. +} + void called_by_inline_trivial () { @@ -132,5 +155,7 @@ main (int argc, char **argv) max_value(123, 456); // Call max_value template max_value(std::string("abc"), std::string("0022")); // Call max_value specialized + caller_trivial_inline_1(); // At caller_trivial_inline_1. + return 0; // About to return from main. } From 49277253f016268e4a10109f1db2e53c60d35881 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 30 Oct 2024 09:31:32 -0700 Subject: [PATCH 18/69] [lldb] Use LLVM's helper for Unicode conversion (NFC) (#112582) The codecvt header has been deprecated in C++17. Use LLVM's unicode helpers to convert between UTF-8 and UTF-16. --- lldb/include/lldb/Host/Editline.h | 25 --------------- lldb/source/Host/common/Editline.cpp | 48 +++++++++++++++------------- 2 files changed, 26 insertions(+), 47 deletions(-) diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h index a02f90891599a..57e2c831e3499 100644 --- a/lldb/include/lldb/Host/Editline.h +++ b/lldb/include/lldb/Host/Editline.h @@ -30,9 +30,6 @@ #include "lldb/Host/Config.h" -#if LLDB_EDITLINE_USE_WCHAR -#include -#endif #include #include #include @@ -57,23 +54,6 @@ #include "llvm/ADT/FunctionExtras.h" -#if defined(__clang__) && defined(__has_warning) -#if __has_warning("-Wdeprecated-declarations") -#define LLDB_DEPRECATED_WARNING_DISABLE \ - _Pragma("clang diagnostic push") \ - _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("clang diagnostic pop") -#endif -#elif defined(__GNUC__) && __GNUC__ > 6 -#define LLDB_DEPRECATED_WARNING_DISABLE \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("GCC diagnostic pop") -#else -#define LLDB_DEPRECATED_WARNING_DISABLE -#define LLDB_DEPRECATED_WARNING_RESTORE -#endif - namespace lldb_private { namespace line_editor { @@ -383,11 +363,6 @@ class Editline { void SetEditLinePromptCallback(EditlinePromptCallbackType callbackFn); void SetGetCharacterFunction(EditlineGetCharCallbackType callbackFn); -#if LLDB_EDITLINE_USE_WCHAR - LLDB_DEPRECATED_WARNING_DISABLE - std::wstring_convert> m_utf8conv; - LLDB_DEPRECATED_WARNING_RESTORE -#endif ::EditLine *m_editline = nullptr; EditlineHistorySP m_history_sp; bool m_in_history = false; diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp index 60117cb5f0e61..f95f854c5f220 100644 --- a/lldb/source/Host/common/Editline.cpp +++ b/lldb/source/Host/common/Editline.cpp @@ -10,9 +10,8 @@ #include #include -#include "lldb/Host/Editline.h" - #include "lldb/Host/ConnectionFileDescriptor.h" +#include "lldb/Host/Editline.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" #include "lldb/Utility/CompletionRequest.h" @@ -23,6 +22,7 @@ #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StringList.h" #include "lldb/Utility/Timeout.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Locale.h" @@ -444,7 +444,9 @@ StringList Editline::GetInputAsStringList(int line_count) { if (line_count == 0) break; #if LLDB_EDITLINE_USE_WCHAR - lines.AppendString(m_utf8conv.to_bytes(line)); + std::string buffer; + llvm::convertWideToUTF8(line, buffer); + lines.AppendString(buffer); #else lines.AppendString(line); #endif @@ -636,7 +638,9 @@ unsigned char Editline::BreakLineCommand(int ch) { if (m_fix_indentation_callback) { StringList lines = GetInputAsStringList(m_current_line_index + 1); #if LLDB_EDITLINE_USE_WCHAR - lines.AppendString(m_utf8conv.to_bytes(new_line_fragment)); + std::string buffer; + llvm::convertWideToUTF8(new_line_fragment, buffer); + lines.AppendString(buffer); #else lines.AppendString(new_line_fragment); #endif @@ -684,8 +688,9 @@ unsigned char Editline::EndOrAddLineCommand(int ch) { m_input_lines.clear(); for (unsigned index = 0; index < lines.GetSize(); index++) { #if LLDB_EDITLINE_USE_WCHAR - m_input_lines.insert(m_input_lines.end(), - m_utf8conv.from_bytes(lines[index])); + std::wstring wbuffer; + llvm::ConvertUTF8toWide(lines[index], wbuffer); + m_input_lines.insert(m_input_lines.end(), wbuffer); #else m_input_lines.insert(m_input_lines.end(), lines[index]); #endif @@ -869,7 +874,9 @@ unsigned char Editline::FixIndentationCommand(int ch) { currentLine = currentLine.erase(0, -indent_correction); } #if LLDB_EDITLINE_USE_WCHAR - m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine); + std::wstring wbuffer; + llvm::ConvertUTF8toWide(currentLine, wbuffer); + m_input_lines[m_current_line_index] = wbuffer; #else m_input_lines[m_current_line_index] = currentLine; #endif @@ -1502,7 +1509,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) { } else { m_history_sp->Enter(input); #if LLDB_EDITLINE_USE_WCHAR - line = m_utf8conv.to_bytes(SplitLines(input)[0]); + llvm::convertWideToUTF8(SplitLines(input)[0], line); #else line = SplitLines(input)[0]; #endif @@ -1574,25 +1581,22 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) { out = (unsigned char)ch; return true; #else - LLDB_DEPRECATED_WARNING_DISABLE - std::codecvt_utf8 cvt; - LLDB_DEPRECATED_WARNING_RESTORE llvm::SmallString<4> input; for (;;) { - const char *from_next; - wchar_t *to_next; - std::mbstate_t state = std::mbstate_t(); input.push_back(ch); - switch (cvt.in(state, input.begin(), input.end(), from_next, &out, &out + 1, - to_next)) { - case std::codecvt_base::ok: + auto *cur_ptr = reinterpret_cast(input.begin()); + auto *end_ptr = reinterpret_cast(input.end()); + llvm::UTF32 code_point = 0; + llvm::ConversionResult cr = llvm::convertUTF8Sequence( + &cur_ptr, end_ptr, &code_point, llvm::lenientConversion); + switch (cr) { + case llvm::conversionOK: + out = code_point; return out != (EditLineGetCharType)WEOF; - - case std::codecvt_base::error: - case std::codecvt_base::noconv: + case llvm::targetExhausted: + case llvm::sourceIllegal: return false; - - case std::codecvt_base::partial: + case llvm::sourceExhausted: lldb::ConnectionStatus status; size_t read_count = m_input_connection.Read( &ch, 1, std::chrono::seconds(0), status, nullptr); From 0c9a02355abc3b037be53c072fc46a13bb5aa2c1 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 30 Oct 2024 09:50:27 -0700 Subject: [PATCH 19/69] [flang][fir] always use memcpy for fir.box (#113949) @jeanPerier explained the importance of converting box loads and stores into `memcpy`s instead of aggregate loads and stores, and I'll do my best to explain it here. * [(godbolt link) Example comparing opt transformations on memcpys vs aggregate load/stores](https://godbolt.org/z/be7xM83cG) * LLVM can more effectively reason about memcpys compared to aggregate load/stores. * This came up when others were discussing array descriptors for assumed-rank arrays passed to `bind(c)` subroutines, with the implication that the array descriptors are known to have lower bounds of 1 and that they are not pointer/allocatable types. * [(godbolt link) Clang also uses memcpys so we should probably follow them, assuming the clang developers are generatign what they know Opt will handle more effectively.](https://godbolt.org/z/YT4x7387W) * This currently may not help much without the `nocapture` attribute being propagated to function calls, but [it looks like someone may do this soon (discourse link)](https://discourse.llvm.org/t/applying-the-nocapture-attribute-to-reference-passed-arguments-in-fortran-subroutines/81401/23) or I can do this in a follow-up patch. Note on test `flang/test/Fir/embox-char.fir`: it looks like the original test was auto-generated. I wasn't too sure which parts were especially important to test, so I regenerated the test. If we want the updated version to look more like the old version, I'll make those changes. --- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 59 ++--- flang/test/Fir/box.fir | 19 +- .../Fir/convert-to-llvm-openmp-and-fir.fir | 4 +- flang/test/Fir/convert-to-llvm.fir | 28 +- flang/test/Fir/embox-char.fir | 239 +++++++++--------- flang/test/Fir/polymorphic.fir | 12 +- flang/test/Fir/tbaa.fir | 4 +- .../Integration/OpenMP/private-global.f90 | 5 +- ...privatization-allocatable-firstprivate.f90 | 3 +- .../Lower/OpenMP/parallel-reduction-mixed.f90 | 2 +- flang/test/Lower/allocatable-polymorphic.f90 | 18 +- 11 files changed, 188 insertions(+), 205 deletions(-) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index e6eeb0d5db4a8..4c8c56e0f21ce 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2949,9 +2949,10 @@ struct LoadOpConversion : public fir::FIROpConversion { llvm::LogicalResult matchAndRewrite(fir::LoadOp load, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type llvmLoadTy = convertObjectType(load.getType()); if (auto boxTy = mlir::dyn_cast(load.getType())) { - // fir.box is a special case because it is considered as an ssa values in + // fir.box is a special case because it is considered an ssa value in // fir, but it is lowered as a pointer to a descriptor. So // fir.ref and fir.box end up being the same llvm types and // loading a fir.ref is implemented as taking a snapshot of the @@ -2960,30 +2961,17 @@ struct LoadOpConversion : public fir::FIROpConversion { mlir::Location loc = load.getLoc(); auto newBoxStorage = genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter); - // TODO: always generate llvm.memcpy, LLVM is better at optimizing it than - // aggregate loads + stores. - if (boxTy.isAssumedRank()) { - - TypePair boxTypePair{boxTy, llvmLoadTy}; - mlir::Value boxSize = - computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter); - auto memcpy = rewriter.create( - loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false); - if (std::optional optionalTag = load.getTbaa()) - memcpy.setTBAATags(*optionalTag); - else - attachTBAATag(memcpy, boxTy, boxTy, nullptr); - } else { - auto boxValue = rewriter.create(loc, llvmLoadTy, - inputBoxStorage); - if (std::optional optionalTag = load.getTbaa()) - boxValue.setTBAATags(*optionalTag); - else - attachTBAATag(boxValue, boxTy, boxTy, nullptr); - auto storeOp = - rewriter.create(loc, boxValue, newBoxStorage); - attachTBAATag(storeOp, boxTy, boxTy, nullptr); - } + + TypePair boxTypePair{boxTy, llvmLoadTy}; + mlir::Value boxSize = + computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter); + auto memcpy = rewriter.create( + loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false); + + if (std::optional optionalTag = load.getTbaa()) + memcpy.setTBAATags(*optionalTag); + else + attachTBAATag(memcpy, boxTy, boxTy, nullptr); rewriter.replaceOp(load, newBoxStorage); } else { auto loadOp = rewriter.create( @@ -3227,20 +3215,13 @@ struct StoreOpConversion : public fir::FIROpConversion { mlir::LLVM::AliasAnalysisOpInterface newOp; if (auto boxTy = mlir::dyn_cast(storeTy)) { mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy); - // fir.box value is actually in memory, load it first before storing it, - // or do a memcopy for assumed-rank descriptors. - if (boxTy.isAssumedRank()) { - TypePair boxTypePair{boxTy, llvmBoxTy}; - mlir::Value boxSize = - computeBoxSize(loc, boxTypePair, llvmValue, rewriter); - newOp = rewriter.create( - loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false); - } else { - auto val = - rewriter.create(loc, llvmBoxTy, llvmValue); - attachTBAATag(val, boxTy, boxTy, nullptr); - newOp = rewriter.create(loc, val, llvmMemref); - } + // Always use memcpy because LLVM is not as effective at optimizing + // aggregate loads/stores as it is optimizing memcpy. + TypePair boxTypePair{boxTy, llvmBoxTy}; + mlir::Value boxSize = + computeBoxSize(loc, boxTypePair, llvmValue, rewriter); + newOp = rewriter.create( + loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false); } else { newOp = rewriter.create(loc, llvmValue, llvmMemref); } diff --git a/flang/test/Fir/box.fir b/flang/test/Fir/box.fir index 81a4d8bc13bf0..fd9fa1f2b3aab 100644 --- a/flang/test/Fir/box.fir +++ b/flang/test/Fir/box.fir @@ -56,12 +56,14 @@ func.func @fa(%a : !fir.ref>) { // CHECK-LABEL: define void @b1( // CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]]) func.func @b1(%arg0 : !fir.ref>, %arg1 : index) -> !fir.box> { + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %x = fir.embox %arg0 typeparams %arg1 : (!fir.ref>, index) -> !fir.box> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 24, i1 false) return %x : !fir.box> } @@ -71,11 +73,13 @@ func.func @b1(%arg0 : !fir.ref>, %arg1 : index) -> !fir.box>>, %arg1 : index) -> !fir.box>> { %1 = fir.shape %arg1 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: insertvalue {{.*}} { ptr undef, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), i32 20240719, i8 1, i8 40, i8 0, i8 0, {{.*}} }, i64 %[[arg1]], 7, 0, 1 // CHECK: insertvalue {{.*}} %{{.*}}, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %2 = fir.embox %arg0(%1) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %2 : !fir.box>> } @@ -84,6 +88,7 @@ func.func @b2(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]], i64 %[[arg2:.*]]) func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %arg2 : index) -> !fir.box>> { %1 = fir.shape %arg2 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 @@ -91,7 +96,8 @@ func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %ar // CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %2 = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %2 : !fir.box>> } @@ -101,6 +107,7 @@ func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %ar func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> !fir.box>> { %c_7 = arith.constant 7 : index %1 = fir.shape %c_7 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 @@ -108,7 +115,8 @@ func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %x = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %x : !fir.box>> } @@ -117,8 +125,7 @@ func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK-SAME: ptr %[[arg0:.*]], ptr %[[arg1:.*]]) func.func @b5(%arg0 : !fir.ref>>>, %arg1 : !fir.box>>) { fir.store %arg1 to %arg0 : !fir.ref>>> - // CHECK: %[[boxLoad:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] }, ptr %[[arg1]] - // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } %[[boxLoad]], ptr %[[arg0]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %0, ptr %1, i32 72, i1 false) return } diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 335877e7c9a87..168526518865b 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -799,8 +799,8 @@ func.func @_QPs(%arg0: !fir.ref> {fir.bindc_name = "x"}) { //CHECK: omp.parallel { //CHECK: %[[CONST_1:.*]] = llvm.mlir.constant(1 : i32) : i32 //CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[CONST_1:.*]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr -//CHECK: %[[LOAD:.*]] = llvm.load %[[ALLOCA]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -//CHECK: llvm.store %[[LOAD]], %[[ALLOCA_1]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr +//CHECK: %[[SIZE:.*]] = llvm.mlir.constant(24 : i32) : i32 +//CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[ALLOCA]], %[[SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () //CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA_1]][0, 0] : (!llvm.ptr) -> !llvm.ptr //CHECK: %[[LOAD_2:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> !llvm.ptr //CHECK: omp.terminator diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 1182a0a10f218..fa391fa6cc7a7 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -862,8 +862,8 @@ func.func @test_store_box(%array : !fir.ref>>, %box // CHECK-LABEL: llvm.func @test_store_box // CHECK-SAME: (%[[arg0:.*]]: !llvm.ptr, // CHECK-SAME: %[[arg1:.*]]: !llvm.ptr) { -// CHECK-NEXT: %[[box_to_store:.*]] = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)> -// CHECK-NEXT: llvm.store %[[box_to_store]], %[[arg0]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>, !llvm.ptr +// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK-NEXT: "llvm.intr.memcpy"(%[[arg0]], %[[arg1]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } @@ -875,15 +875,17 @@ func.func @store_unlimited_polymorphic_box(%arg0 : !fir.class, %arg1 : !fi fir.store %arg3 to %arg3r : !fir.ref>> return } -// CHECK-LABEL: llvm.func @store_unlimited_polymorphic_box( -// CHECK: %[[VAL_8:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_8]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_9:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_9]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_10:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_10]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_11:.*]] = llvm.load %{{.*}}: !llvm.ptr -// CHECK: llvm.store %[[VAL_11]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr +// CHECK: llvm.func @store_unlimited_polymorphic_box(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: !llvm.ptr, %[[VAL_2:.*]]: !llvm.ptr, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: !llvm.ptr, %[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr) { +// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_4]], %[[VAL_0]], %[[VAL_8]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_5]], %[[VAL_1]], %[[VAL_9]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_6]], %[[VAL_2]], %[[VAL_10]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_7]], %[[VAL_3]], %[[VAL_11]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: llvm.return +// CHECK: } // ----- @@ -935,8 +937,8 @@ func.func @test_load_box(%addr : !fir.ref>>) { // GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])> // AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5> // AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr -// CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])> -// CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr +// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(48 : i32) : i32 +// CHECK-NEXT: "llvm.intr.memcpy"(%[[box_copy]], %[[arg0]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } diff --git a/flang/test/Fir/embox-char.fir b/flang/test/Fir/embox-char.fir index bf8344dbb60fc..efb069f96520d 100644 --- a/flang/test/Fir/embox-char.fir +++ b/flang/test/Fir/embox-char.fir @@ -1,3 +1,10 @@ +// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +// The script is designed to make adding checks to +// a test case fast, it is *not* designed to be authoritative +// about what constitutes a good test! The CHECK should be +// minimized and named to reflect the test intent. + // Test that the offset of the first element of the slice // is computed in elements of the type used for the GEP // computing the base of the slice. @@ -10,42 +17,40 @@ // print *, x(2,:) // end subroutine -// CHECK-LABEL: llvm.func @test_char4( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-SAME: %[[VAL_1_SLICE_LB0:.*]]: i64, %[[VAL_2_SLICE_EX0:.*]]: i64, %[[VAL_3_SLICE_ST0:.*]]: i64, %[[VAL_4_SLICE_LB1:.*]]: i64, %[[VAL_5_SLICE_EX1:.*]]: i64, %[[VAL_6_SLICE_ST1:.*]]: i64) { +// CHECK: llvm.func @test_char4(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: i64, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i64, %[[VAL_5:.*]]: i64, %[[VAL_6:.*]]: i64) { // CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[VAL_13_WIDTH:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_0]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_14]], %[[VAL_10]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(4 : index) : i64 +// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_10]], %[[VAL_0]], %[[VAL_14]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK: %[[VAL_15:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_16_BYTESIZE:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 // CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_18_LB1:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_18:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 // CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_20_EX1:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_20:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 // CHECK: %[[VAL_21:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_22_ST1:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_22:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 // CHECK: %[[VAL_23:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_24_BASEPTR:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr +// CHECK: %[[VAL_24:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[VAL_25:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_26_LB0:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_26:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 // CHECK: %[[VAL_27:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_28_EX0:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_28:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 // CHECK: %[[VAL_29:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_30_ST0:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_31_LEN:.*]] = llvm.sdiv %[[VAL_16_BYTESIZE]], %[[VAL_13_WIDTH]] : i64 +// CHECK: %[[VAL_30:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_31:.*]] = llvm.sdiv %[[VAL_16]], %[[VAL_13]] : i64 // CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(44 : i32) : i32 // CHECK: %[[VAL_33:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]][1] : (!llvm.ptr) -> !llvm.ptr, i32 // CHECK: %[[VAL_35:.*]] = llvm.ptrtoint %[[VAL_34]] : !llvm.ptr to i64 -// CHECK: %[[VAL_36_BYTESIZE:.*]] = llvm.mul %[[VAL_35]], %[[VAL_31_LEN]] : i64 +// CHECK: %[[VAL_36:.*]] = llvm.mul %[[VAL_35]], %[[VAL_31]] : i64 // CHECK: %[[VAL_37:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36_BYTESIZE]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(20240719 : i32) : i32 // CHECK: %[[VAL_40:.*]] = llvm.insertvalue %[[VAL_39]], %[[VAL_38]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_41:.*]] = llvm.mlir.constant(2 : i32) : i32 @@ -59,39 +64,39 @@ // CHECK: %[[VAL_49:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[VAL_50:.*]] = llvm.trunc %[[VAL_49]] : i32 to i8 // CHECK: %[[VAL_51:.*]] = llvm.insertvalue %[[VAL_50]], %[[VAL_48]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_52_c0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_52:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[VAL_53:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1_SLICE_LB0]], %[[VAL_26_LB0]] : i64 -// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_31_LEN]] : i64 -// CHECK: %[[VAL_56_SLICE_OFF0:.*]] = llvm.add %[[VAL_55]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2_SLICE_EX0]], %[[VAL_1_SLICE_LB0]] : i64 -// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52_c0]] : i1, i64 +// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_31]] : i64 +// CHECK: %[[VAL_56:.*]] = llvm.add %[[VAL_55]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52]] : i1, i64 // CHECK: %[[VAL_62:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_51]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_61]], %[[VAL_62]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_3_SLICE_ST0]] : i64 +// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36]], %[[VAL_3]] : i64 // CHECK: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_31_LEN]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4_SLICE_LB1]], %[[VAL_18_LB1]] : i64 -// CHECK: %[[VAL_69_SLICE_OFF1:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 -// CHECK: %[[VAL_70_OFFSET:.*]] = llvm.add %[[VAL_69_SLICE_OFF1]], %[[VAL_56_SLICE_OFF0]] : i64 -// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5_SLICE_EX1]], %[[VAL_4_SLICE_LB1]] : i64 -// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52_c0]] : i1, i64 +// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36]], %[[VAL_28]] : i64 +// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_31]], %[[VAL_28]] : i64 +// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_69:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 +// CHECK: %[[VAL_70:.*]] = llvm.add %[[VAL_69]], %[[VAL_56]] : i64 +// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52]] : i1, i64 // CHECK: %[[VAL_76:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_65]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_75]], %[[VAL_76]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6_SLICE_ST1]] : i64 +// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6]] : i64 // CHECK: %[[VAL_79:.*]] = llvm.insertvalue %[[VAL_78]], %[[VAL_77]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24_BASEPTR]]{{\[}}%[[VAL_70_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32 -// CHECK: %[[VAL_84:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_84]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24]]{{\[}}%[[VAL_70]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32 +// CHECK: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_83]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr // CHECK: llvm.return // CHECK: } func.func @test_char4(%arg0: !fir.ref>>>>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) { @@ -108,86 +113,84 @@ func.func @test_char4(%arg0: !fir.ref>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[VAL_12_c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_0]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_14]], %[[VAL_10]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr -// CHECK: %[[VAL_15:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_16_BYTESIZE:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_18_LB1:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_20_EX1:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_21:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_22_ST1:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_23:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_24_BASEPTR:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr -// CHECK: %[[VAL_25:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_26_LB0:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_27:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_28_EX0:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_29:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_30_ST0:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(40 : i32) : i32 -// CHECK: %[[VAL_33:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]][1] : (!llvm.ptr) -> !llvm.ptr, i8 -// CHECK: %[[VAL_35:.*]] = llvm.ptrtoint %[[VAL_34]] : !llvm.ptr to i64 -// CHECK: %[[VAL_36_BYTESIZE:.*]] = llvm.mul %[[VAL_35]], %[[VAL_16_BYTESIZE]] : i64 -// CHECK: %[[VAL_37:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36_BYTESIZE]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(20240719 : i32) : i32 -// CHECK: %[[VAL_40:.*]] = llvm.insertvalue %[[VAL_39]], %[[VAL_38]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_41:.*]] = llvm.mlir.constant(2 : i32) : i32 -// CHECK: %[[VAL_42:.*]] = llvm.trunc %[[VAL_41]] : i32 to i8 -// CHECK: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_42]], %[[VAL_40]][3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_44:.*]] = llvm.trunc %[[VAL_32]] : i32 to i8 -// CHECK: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_46:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_47:.*]] = llvm.trunc %[[VAL_46]] : i32 to i8 -// CHECK: %[[VAL_48:.*]] = llvm.insertvalue %[[VAL_47]], %[[VAL_45]][5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_49:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_50:.*]] = llvm.trunc %[[VAL_49]] : i32 to i8 -// CHECK: %[[VAL_51:.*]] = llvm.insertvalue %[[VAL_50]], %[[VAL_48]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_52_c0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[VAL_53:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1_SLICE_LB0]], %[[VAL_26_LB0]] : i64 -// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_16_BYTESIZE]] : i64 -// CHECK: %[[VAL_56_SLICE_OFF0:.*]] = llvm.add %[[VAL_55]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2_SLICE_EX0]], %[[VAL_1_SLICE_LB0]] : i64 -// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52_c0]] : i1, i64 -// CHECK: %[[VAL_62:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_51]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_61]], %[[VAL_62]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_16_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4_SLICE_LB1]], %[[VAL_18_LB1]] : i64 -// CHECK: %[[VAL_69_SLICE_OFF1:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 -// CHECK: %[[VAL_70_OFFSET:.*]] = llvm.add %[[VAL_69_SLICE_OFF1]], %[[VAL_56_SLICE_OFF0]] : i64 -// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5_SLICE_EX1]], %[[VAL_4_SLICE_LB1]] : i64 -// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52_c0]] : i1, i64 -// CHECK: %[[VAL_76:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_65]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_75]], %[[VAL_76]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_79:.*]] = llvm.insertvalue %[[VAL_78]], %[[VAL_77]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24_BASEPTR]]{{\[}}%[[VAL_70_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 -// CHECK: %[[VAL_84:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_84]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_10]], %[[VAL_0]], %[[VAL_13]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_15:.*]] = llvm.load %[[VAL_14]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_17:.*]] = llvm.load %[[VAL_16]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_18:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_19:.*]] = llvm.load %[[VAL_18]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_20:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_21:.*]] = llvm.load %[[VAL_20]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_23:.*]] = llvm.load %[[VAL_22]] : !llvm.ptr -> !llvm.ptr +// CHECK: %[[VAL_24:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_25:.*]] = llvm.load %[[VAL_24]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_26:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_27:.*]] = llvm.load %[[VAL_26]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_28:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_29:.*]] = llvm.load %[[VAL_28]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_30:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: %[[VAL_31:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[VAL_32:.*]] = llvm.getelementptr %[[VAL_31]][1] : (!llvm.ptr) -> !llvm.ptr, i8 +// CHECK: %[[VAL_33:.*]] = llvm.ptrtoint %[[VAL_32]] : !llvm.ptr to i64 +// CHECK: %[[VAL_34:.*]] = llvm.mul %[[VAL_33]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_35:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_36:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_35]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_37:.*]] = llvm.mlir.constant(20240719 : i32) : i32 +// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_37]], %[[VAL_36]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(2 : i32) : i32 +// CHECK: %[[VAL_40:.*]] = llvm.trunc %[[VAL_39]] : i32 to i8 +// CHECK: %[[VAL_41:.*]] = llvm.insertvalue %[[VAL_40]], %[[VAL_38]][3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_42:.*]] = llvm.trunc %[[VAL_30]] : i32 to i8 +// CHECK: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_42]], %[[VAL_41]][4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_44:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[VAL_45:.*]] = llvm.trunc %[[VAL_44]] : i32 to i8 +// CHECK: %[[VAL_46:.*]] = llvm.insertvalue %[[VAL_45]], %[[VAL_43]][5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_47:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[VAL_48:.*]] = llvm.trunc %[[VAL_47]] : i32 to i8 +// CHECK: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_46]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_50:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_51:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_52:.*]] = llvm.sub %[[VAL_1]], %[[VAL_25]] : i64 +// CHECK: %[[VAL_53:.*]] = llvm.mul %[[VAL_52]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_54:.*]] = llvm.add %[[VAL_53]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_55:.*]] = llvm.sub %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_56:.*]] = llvm.add %[[VAL_55]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_57:.*]] = llvm.sdiv %[[VAL_56]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_58:.*]] = llvm.icmp "sgt" %[[VAL_57]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_59:.*]] = llvm.select %[[VAL_58]], %[[VAL_57]], %[[VAL_50]] : i1, i64 +// CHECK: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_51]], %[[VAL_49]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_59]], %[[VAL_60]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_62:.*]] = llvm.mul %[[VAL_34]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_34]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_65:.*]] = llvm.mul %[[VAL_15]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_66:.*]] = llvm.sub %[[VAL_4]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_66]], %[[VAL_65]] : i64 +// CHECK: %[[VAL_68:.*]] = llvm.add %[[VAL_67]], %[[VAL_54]] : i64 +// CHECK: %[[VAL_69:.*]] = llvm.sub %[[VAL_5]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_70:.*]] = llvm.add %[[VAL_69]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_71:.*]] = llvm.sdiv %[[VAL_70]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_72:.*]] = llvm.icmp "sgt" %[[VAL_71]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_73:.*]] = llvm.select %[[VAL_72]], %[[VAL_71]], %[[VAL_50]] : i1, i64 +// CHECK: %[[VAL_74:.*]] = llvm.insertvalue %[[VAL_51]], %[[VAL_63]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_73]], %[[VAL_74]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_76:.*]] = llvm.mul %[[VAL_64]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_64]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_79:.*]] = llvm.mul %[[VAL_65]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_80:.*]] = llvm.getelementptr %[[VAL_23]]{{\[}}%[[VAL_68]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 +// CHECK: %[[VAL_81:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_77]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_81]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr // CHECK: llvm.return // CHECK: } func.func @test_char1(%arg0: !fir.ref>>>>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) { diff --git a/flang/test/Fir/polymorphic.fir b/flang/test/Fir/polymorphic.fir index 40204314e8df7..78e5b8dcf84c7 100644 --- a/flang/test/Fir/polymorphic.fir +++ b/flang/test/Fir/polymorphic.fir @@ -14,8 +14,7 @@ func.func @_QMpolymorphic_testPtest_allocate_unlimited_polymorphic_non_derived() // CHECK: %[[MEM:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } // CHECK: %[[DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 0, i32 20240719, i8 0, i8 -1, i8 1, i8 1, ptr null, [1 x i64] zeroinitializer }, ptr %[[MEM]] -// CHECK: %[[LOADED:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[MEM]], align 8 -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED]], ptr %[[DESC]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[DESC]], ptr %[[MEM]], i32 40, i1 false) // CHECK: ret void // CHECK: } @@ -66,8 +65,7 @@ func.func @_QMpolymorphic_testPtest_embox() { // CHECK-LABEL: @_QMpolymorphic_testPtest_embox() // CHECK: %[[ALLOCA_DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } { ptr @_QFEy, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20240719, i8 1, i8 9, {{.*}}, ptr %[[ALLOCA_DESC]] -// CHECK: %[[LOADED_DESC:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_DESC]], align 8 -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[LOADED_DESC]], ptr @_QFEx, align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr @_QFEx, ptr %[[ALLOCA_DESC]], i32 64, i1 false) // Test emboxing of an array element from an unlimited polymorphic array. @@ -158,8 +156,7 @@ func.func @_QQmain() { // CHECK: %[[CLASS_NONE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } // CHECK: %[[DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr @_QMmod1Ea, i64 ptrtoint (ptr getelementptr (%_QMmod1TtK2, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 1, i8 1, ptr @_QMmod1EXdtXtX2, [1 x i64] zeroinitializer }, ptr %[[CLASS_NONE]], align 8 -// CHECK: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS_NONE]] -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[DESC]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[DESC]], ptr %[[CLASS_NONE]], i32 40, i1 false) // CHECK: call void @_QMmod1Psub1(ptr %[[DESC]]) fir.global @_QMmod2Ep : !fir.class> { @@ -180,8 +177,7 @@ func.func private @_FortranAPointerAssociate(!fir.ref>, !fir.box< // CHECK-LABEL: define void @_QMmod2Pinitp( // CHECK-SAME: ptr %[[ARG0:.*]]){{.*}}{ // CHECK: %[[ALLOCA_CLASS_NONE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } -// CHECK: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG0]] -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[ALLOCA_CLASS_NONE]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA_CLASS_NONE]], ptr %[[ARG0]], i32 40, i1 false) // CHECK: %{{.*}} = call {} @_FortranAPointerAssociate(ptr @_QMmod2Ep, ptr %[[ALLOCA_CLASS_NONE]]) // CHECK: ret void diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir index 809ab3a922a0f..401ebbc8c49fe 100644 --- a/flang/test/Fir/tbaa.fir +++ b/flang/test/Fir/tbaa.fir @@ -137,8 +137,8 @@ module { // CHECK: %[[VAL_7:.*]] = llvm.mlir.addressof @_QFEx : !llvm.ptr // CHECK: %[[VAL_8:.*]] = llvm.mlir.addressof @_QQclX2E2F64756D6D792E66393000 : !llvm.ptr // CHECK: %[[VAL_10:.*]] = llvm.call @_FortranAioBeginExternalListOutput(%[[VAL_6]], %[[VAL_8]], %[[VAL_5]]) {fastmathFlags = #llvm.fastmath} : (i32, !llvm.ptr, i32) -> !llvm.ptr -// CHECK: %[[VAL_11:.*]] = llvm.load %[[VAL_7]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> -// CHECK: llvm.store %[[VAL_11]], %[[VAL_3]] {tbaa = [#[[$BOXT]]]} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)>, !llvm.ptr +// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_3]], %[[VAL_7]], %[[VAL_11]]) <{isVolatile = false, tbaa = [#[[$BOXT]]]}> // CHECK: %[[VAL_12:.*]] = llvm.getelementptr %[[VAL_3]][0, 7, %[[VAL_4]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> // CHECK: %[[VAL_13:.*]] = llvm.load %[[VAL_12]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i64 // CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_3]][0, 7, %[[VAL_4]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90 index 62d0a3faf0c59..63ac6fbe05ee0 100644 --- a/flang/test/Integration/OpenMP/private-global.f90 +++ b/flang/test/Integration/OpenMP/private-global.f90 @@ -31,8 +31,9 @@ program bug ! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 ! CHECK: %[[TABLE_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20240719, i8 1, i8 9, i8 0, i8 0, [1 x [3 x i64]] {{\[\[}}3 x i64] [i64 1, i64 10, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }, ptr %[[PRIV_TABLE]], 0 ! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL]], ptr %[[TABLE_BOX_ADDR]], align 8 -! CHECK: %[[TABLE_BOX_VAL2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[TABLE_BOX_ADDR]], align 8 -! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL2]], ptr %[[TABLE_BOX_ADDR2]], align 8 +! CHECK : %[[TABLE_BOX_VAL2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[TABLE_BOX_ADDR]], align 8 +! CHECK : store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL2]], ptr %[[TABLE_BOX_ADDR2]], align 8 +! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[TABLE_BOX_ADDR]], i32 48, i1 false) ! CHECK: %[[VAL_26:.*]] = call {} @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9) ! ... ! check that we use the private copy of table for table/=50 diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 index 9c97c689dad70..b3a668018df1d 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 @@ -57,5 +57,4 @@ end program compilation_to_obj ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global ! LLVM: define internal void @_QQmain..omp_par -! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 -! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8 +! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %{{.+}}, ptr @[[GLOB_VAR]]t, i32 48, i1 false) diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index 262075ec9b25d..8e6f55abd5671 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -17,7 +17,7 @@ subroutine proc end subroutine proc !CHECK-LABEL: define void @proc_() -!CHECK: call void +!CHECK: call void (ptr, i32, ptr, ...) !CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) !CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index e23e38ffb4b01..4d70e1ea4c739 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -603,10 +603,9 @@ program test_alloc ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) -! LLVM-COUNT-2: call void %{{.*}}() +! LLVM-COUNT-2: call void %{{[0-9]*}}() -! LLVM: %[[C1_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[C1_LOAD]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C1:.*]] = load ptr, ptr %[[GEP_TDESC_C1]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -620,8 +619,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]] ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[LOAD_C2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_C2]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C2:.*]] = load ptr, ptr %[[GEP_TDESC_C2]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -635,9 +633,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[C3_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[C3_LOAD]], ptr %{{.*}} - +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 8 ! LLVM: %[[TDESC_C3:.*]] = load ptr, ptr %[[GEP_TDESC_C3]] ! LLVM: %[[ELE_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -658,8 +654,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[BOX7]], ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[C4_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[C4_LOAD]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 8 ! LLVM: %[[TDESC_C4:.*]] = load ptr, ptr %[[GEP_TDESC_C4]] ! LLVM: %[[ELE_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -686,8 +681,7 @@ program test_alloc ! LLVM-LABEL: define void @_QMpolyPtest_deallocate() ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 ptrtoint (ptr getelementptr (%_QMpolyTp1, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]] -! LLVM: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ALLOCA1]] -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[ALLOCA2:[0-9]*]] +! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) From 0227b73b513154a2bde90ddf1e167b6257765d05 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Wed, 30 Oct 2024 12:50:40 -0400 Subject: [PATCH 20/69] [AMDGPU][True16][test] update VOP2 asm/dasm file with true16/fake16 (#113101) This is a non-functional change update GFX11/GFX12 VOP2 asm/dasm test for true16/fake16: 1. duplicate files to be true16/fake16 by adding "-mattr=+real-true16/-mattr=-real-true16" while true16 test file will be updated to true16 format when the true16 instructions are supported 2. sort "*t16_err.s" and "*t16_promote.s" tests to alphabetic order. This is for the upcoming true16 mc changes, and mainly trying to help repo maintainer to resolve conflicts in the tests quickly. A script is proposed to help for the sorting https://github.com/llvm/llvm-project/pull/111769. Since these two files are t16 only, it should not create conflicts in downstream branches 3. add -filetype=null to seperate stdout and stderr to avoid disordered output from llvm-mc --- llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s | 2554 ++++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop2.s | 8 +- .../MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s | 2114 ++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s | 8 +- .../MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s | 451 +++ llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s | 8 +- .../MC/AMDGPU/gfx11_asm_vop2_err-fake16.s | 13 + llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s | 2 +- llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s | 309 +- .../MC/AMDGPU/gfx11_asm_vop2_t16_promote.s | 261 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s | 2560 +++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop2.s | 8 +- .../MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s | 19 + llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s | 2 +- .../MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s | 2006 +++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s | 8 +- .../MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s | 433 +++ llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s | 295 +- .../MC/AMDGPU/gfx12_asm_vop2_t16_promote.s | 247 +- .../Disassembler/AMDGPU/gfx11_dasm_vop2.txt | 1480 +++++----- .../AMDGPU/gfx11_dasm_vop2_dpp16.txt | 1231 ++++---- .../AMDGPU/gfx11_dasm_vop2_dpp8.txt | 181 +- .../Disassembler/AMDGPU/gfx12_dasm_vop2.txt | 1584 +++++----- .../AMDGPU/gfx12_dasm_vop2_dpp16.txt | 1197 ++++---- .../AMDGPU/gfx12_dasm_vop2_dpp8.txt | 177 +- 26 files changed, 13826 insertions(+), 3338 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s new file mode 100644 index 0000000000000..96dd572089436 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s @@ -0,0 +1,2554 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x64] + +v_add_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x64] + +v_add_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x64] + +v_add_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x64] + +v_add_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x64] + +v_add_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x64] + +v_add_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x64] + +v_add_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x64] + +v_add_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x64] + +v_add_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x64] + +v_add_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x64] + +v_add_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x64] + +v_add_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] + +v_add_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x06] + +v_add_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x06] + +v_add_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x06] + +v_add_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x06] + +v_add_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x06] + +v_add_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x06] + +v_add_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x06] + +v_add_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x06] + +v_add_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x06] + +v_add_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x06] + +v_add_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x06] + +v_add_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x06] + +v_add_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] + +v_add_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4a] + +v_add_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] + +v_and_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x36] + +v_and_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x36] + +v_and_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x36] + +v_and_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x36] + +v_and_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x36] + +v_and_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x36] + +v_and_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x36] + +v_and_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x36] + +v_and_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x36] + +v_and_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x36] + +v_and_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x36] + +v_and_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x36] + +v_and_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x34] + +v_ashrrev_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] + +v_cndmask_b32 v5, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_dot2acc_f32_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2acc_f32_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x04] + +v_dot2acc_f32_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] + +v_dot2c_f32_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2c_f32_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x04] + +v_dot2c_f32_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v1, v2, 0xfe0b +// GFX11: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v127, v2, 0xfe0b +// GFX11: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s1, v2, 0xfe0b +// GFX11: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s105, v2, 0xfe0b +// GFX11: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b +// GFX11: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b +// GFX11: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, ttmp15, v2, 0xfe0b +// GFX11: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, m0, v2, 0xfe0b +// GFX11: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_lo, v2, 0xfe0b +// GFX11: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_hi, v2, 0xfe0b +// GFX11: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, null, v2, 0xfe0b +// GFX11: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, -1, v2, 0xfe0b +// GFX11: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, 0.5, v2, 0xfe0b +// GFX11: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, src_scc, v2, 0xfe0b +// GFX11: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b +// GFX11: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f32 v5, v1, v2, 0xaf123456 +// GFX11: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, v255, v2, 0xaf123456 +// GFX11: encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s1, v2, 0xaf123456 +// GFX11: encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s105, v2, 0xaf123456 +// GFX11: encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, m0, v2, 0xaf123456 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, null, v2, 0xaf123456 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, -1, v2, 0xaf123456 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, 0.5, v2, 0xaf123456 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, src_scc, v2, 0xaf123456 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 +// GFX11: encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] + +v_fmac_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x6c] + +v_fmac_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x6c] + +v_fmac_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x6c] + +v_fmac_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x6c] + +v_fmac_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x6c] + +v_fmac_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x6c] + +v_fmac_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x6c] + +v_fmac_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x6c] + +v_fmac_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] + +v_fmac_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x56] + +v_fmac_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x56] + +v_fmac_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x56] + +v_fmac_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x56] + +v_fmac_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x56] + +v_fmac_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x56] + +v_fmac_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x56] + +v_fmac_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x56] + +v_fmac_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x56] + +v_fmac_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x56] + +v_fmac_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0c] + +v_fmac_legacy_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0c] + +v_fmac_legacy_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] + +v_fmamk_f16 v5, v1, 0xfe0b, v3 +// GFX11: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, v127, 0xfe0b, v3 +// GFX11: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s1, 0xfe0b, v3 +// GFX11: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s105, 0xfe0b, v3 +// GFX11: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 +// GFX11: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 +// GFX11: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 +// GFX11: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, m0, 0xfe0b, v3 +// GFX11: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 +// GFX11: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 +// GFX11: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, null, 0xfe0b, v3 +// GFX11: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, -1, 0xfe0b, v3 +// GFX11: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, 0.5, 0xfe0b, v3 +// GFX11: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, src_scc, 0xfe0b, v3 +// GFX11: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f32 v5, v1, 0xaf123456, v3 +// GFX11: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, v255, 0xaf123456, v3 +// GFX11: encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s1, 0xaf123456, v3 +// GFX11: encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s105, 0xaf123456, v3 +// GFX11: encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 +// GFX11: encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 +// GFX11: encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 +// GFX11: encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, m0, 0xaf123456, v3 +// GFX11: encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 +// GFX11: encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 +// GFX11: encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, null, 0xaf123456, v3 +// GFX11: encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, -1, 0xaf123456, v3 +// GFX11: encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, 0.5, 0xaf123456, v3 +// GFX11: encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, src_scc, 0xaf123456, v3 +// GFX11: encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] + +v_ldexp_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x76] + +v_ldexp_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x76] + +v_ldexp_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x76] + +v_ldexp_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x76] + +v_ldexp_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x76] + +v_ldexp_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x76] + +v_ldexp_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x76] + +v_ldexp_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x76] + +v_ldexp_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x30] + +v_lshlrev_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x32] + +v_lshrrev_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] + +v_max_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x72] + +v_max_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x72] + +v_max_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x72] + +v_max_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x72] + +v_max_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x72] + +v_max_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x72] + +v_max_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x72] + +v_max_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x72] + +v_max_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x72] + +v_max_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x72] + +v_max_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x72] + +v_max_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x72] + +v_max_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x72] + +v_max_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x72] + +v_max_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] + +v_max_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x20] + +v_max_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x20] + +v_max_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x20] + +v_max_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x20] + +v_max_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x20] + +v_max_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x20] + +v_max_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x20] + +v_max_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x20] + +v_max_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x20] + +v_max_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x20] + +v_max_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x20] + +v_max_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x20] + +v_max_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x20] + +v_max_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x20] + +v_max_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] + +v_max_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x24] + +v_max_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x24] + +v_max_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x24] + +v_max_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x24] + +v_max_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x24] + +v_max_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x24] + +v_max_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x24] + +v_max_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x24] + +v_max_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x24] + +v_max_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x24] + +v_max_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x24] + +v_max_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x24] + +v_max_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] + +v_max_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x28] + +v_max_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x28] + +v_max_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x28] + +v_max_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x28] + +v_max_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x28] + +v_max_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x28] + +v_max_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x28] + +v_max_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x28] + +v_max_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x28] + +v_max_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x28] + +v_max_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x28] + +v_max_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x28] + +v_max_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] + +v_min_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x74] + +v_min_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x74] + +v_min_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x74] + +v_min_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x74] + +v_min_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x74] + +v_min_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x74] + +v_min_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x74] + +v_min_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x74] + +v_min_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x74] + +v_min_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x74] + +v_min_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x74] + +v_min_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x74] + +v_min_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x74] + +v_min_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x74] + +v_min_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] + +v_min_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x1e] + +v_min_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x1e] + +v_min_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x1e] + +v_min_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x1e] + +v_min_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x1e] + +v_min_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x1e] + +v_min_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x1e] + +v_min_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x1e] + +v_min_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x1e] + +v_min_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x1e] + +v_min_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x1e] + +v_min_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x1e] + +v_min_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x1e] + +v_min_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x1e] + +v_min_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] + +v_min_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x22] + +v_min_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x22] + +v_min_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x22] + +v_min_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x22] + +v_min_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x22] + +v_min_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x22] + +v_min_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x22] + +v_min_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x22] + +v_min_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x22] + +v_min_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x22] + +v_min_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x22] + +v_min_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x22] + +v_min_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] + +v_min_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x26] + +v_min_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x26] + +v_min_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x26] + +v_min_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x26] + +v_min_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x26] + +v_min_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x26] + +v_min_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x26] + +v_min_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x26] + +v_min_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x26] + +v_min_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x26] + +v_min_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x26] + +v_min_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x26] + +v_min_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x6a] + +v_mul_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x6a] + +v_mul_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x6a] + +v_mul_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x6a] + +v_mul_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x6a] + +v_mul_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x6a] + +v_mul_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x6a] + +v_mul_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x6a] + +v_mul_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x6a] + +v_mul_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x6a] + +v_mul_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] + +v_mul_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x10] + +v_mul_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x10] + +v_mul_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x10] + +v_mul_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x10] + +v_mul_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x10] + +v_mul_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x10] + +v_mul_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x10] + +v_mul_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x10] + +v_mul_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x10] + +v_mul_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x10] + +v_mul_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x12] + +v_mul_i32_i24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x16] + +v_mul_u32_u24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] + +v_or_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x38] + +v_or_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x38] + +v_or_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x38] + +v_or_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x38] + +v_or_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x38] + +v_or_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x38] + +v_or_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x38] + +v_or_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x38] + +v_or_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x38] + +v_or_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x38] + +v_or_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x38] + +v_or_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x38] + +v_or_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] + +v_pk_fmac_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x78] + +v_pk_fmac_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x66] + +v_sub_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x66] + +v_sub_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x66] + +v_sub_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x66] + +v_sub_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x66] + +v_sub_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x66] + +v_sub_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x66] + +v_sub_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x66] + +v_sub_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x66] + +v_sub_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x66] + +v_sub_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] + +v_sub_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x08] + +v_sub_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x08] + +v_sub_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x08] + +v_sub_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x08] + +v_sub_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x08] + +v_sub_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x08] + +v_sub_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x08] + +v_sub_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x08] + +v_sub_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x08] + +v_sub_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x08] + +v_sub_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4c] + +v_sub_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x68] + +v_subrev_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x68] + +v_subrev_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x68] + +v_subrev_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x68] + +v_subrev_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x68] + +v_subrev_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x68] + +v_subrev_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x68] + +v_subrev_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x68] + +v_subrev_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x68] + +v_subrev_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x68] + +v_subrev_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] + +v_subrev_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0a] + +v_subrev_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0a] + +v_subrev_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0a] + +v_subrev_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0a] + +v_subrev_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0a] + +v_subrev_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0a] + +v_subrev_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0a] + +v_subrev_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0a] + +v_subrev_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] + +v_xnor_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x3c] + +v_xnor_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x3c] + +v_xnor_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x3c] + +v_xnor_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x3c] + +v_xnor_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x3c] + +v_xnor_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x3c] + +v_xnor_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x3c] + +v_xnor_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x3c] + +v_xnor_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] + +v_xor_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x3a] + +v_xor_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x3a] + +v_xor_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x3a] + +v_xor_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x3a] + +v_xor_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x3a] + +v_xor_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x3a] + +v_xor_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x3a] + +v_xor_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x3a] + +v_xor_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x3a] + +v_xor_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x3a] + +v_xor_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index fb300b2e94972..2a4b3ea201701 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x40] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s new file mode 100644 index 0000000000000..6b9092f501e5a --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s @@ -0,0 +1,2114 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +v_add_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] + +v_add_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] + +v_add_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] + +v_add_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x09,0x13] + +v_add_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xf5,0x30] + +v_add_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] + +v_add_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] + +v_add_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] + +v_add_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] + +v_add_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x09,0x13] + +v_add_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xf5,0x30] + +v_add_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] + +v_add_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x09,0x13] + +v_add_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x05,0x30] + +v_and_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] + +v_and_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] + +v_and_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] + +v_and_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] + +v_and_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x09,0x13] + +v_and_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x05,0x30] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x09,0x13] + +v_ashrrev_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x05,0x30] + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -v1, |v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, |v1|, -v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_dot2acc_f32_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] + +v_dot2acc_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x09,0x13] + +v_dot2acc_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] + +v_dot2c_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x09,0x13] + +v_dot2c_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30] + +v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] + +v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] + +v_fmac_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] + +v_fmac_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x09,0x13] + +v_fmac_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xf5,0x30] + +v_fmac_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] + +v_fmac_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] + +v_fmac_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] + +v_fmac_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x09,0x13] + +v_fmac_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xf5,0x30] + +v_ldexp_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] + +v_ldexp_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x09,0x13] + +v_ldexp_f16 v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x35,0x30] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x09,0x13] + +v_lshlrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x05,0x30] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x09,0x13] + +v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] + +v_max_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] + +v_max_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] + +v_max_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] + +v_max_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] + +v_max_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] + +v_max_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] + +v_max_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x09,0x13] + +v_max_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xf5,0x30] + +v_max_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] + +v_max_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] + +v_max_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] + +v_max_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] + +v_max_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] + +v_max_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] + +v_max_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x09,0x13] + +v_max_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xf5,0x30] + +v_max_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] + +v_max_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] + +v_max_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] + +v_max_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] + +v_max_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x09,0x13] + +v_max_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x05,0x30] + +v_max_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] + +v_max_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] + +v_max_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] + +v_max_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] + +v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x09,0x13] + +v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] + +v_min_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] + +v_min_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] + +v_min_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] + +v_min_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] + +v_min_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] + +v_min_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] + +v_min_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x09,0x13] + +v_min_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xf5,0x30] + +v_min_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] + +v_min_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] + +v_min_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] + +v_min_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] + +v_min_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] + +v_min_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] + +v_min_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x09,0x13] + +v_min_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xf5,0x30] + +v_min_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] + +v_min_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] + +v_min_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] + +v_min_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] + +v_min_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x09,0x13] + +v_min_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x05,0x30] + +v_min_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] + +v_min_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] + +v_min_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] + +v_min_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] + +v_min_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x09,0x13] + +v_min_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] + +v_mul_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] + +v_mul_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] + +v_mul_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x09,0x13] + +v_mul_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xf5,0x30] + +v_mul_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] + +v_mul_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] + +v_mul_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] + +v_mul_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x09,0x13] + +v_mul_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xf5,0x30] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x05,0x30] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x05,0x30] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x09,0x13] + +v_mul_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x05,0x30] + +v_mul_legacy_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_legacy_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_legacy_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_legacy_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x09,0x13] + +v_mul_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x05,0x30] + +v_or_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] + +v_or_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] + +v_or_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] + +v_or_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] + +v_or_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x09,0x13] + +v_or_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x05,0x30] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] + +v_sub_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] + +v_sub_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] + +v_sub_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x09,0x13] + +v_sub_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xf5,0x30] + +v_sub_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] + +v_sub_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] + +v_sub_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] + +v_sub_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x09,0x13] + +v_sub_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xf5,0x30] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x09,0x13] + +v_sub_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x05,0x30] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] + +v_subrev_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] + +v_subrev_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x09,0x13] + +v_subrev_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xf5,0x30] + +v_subrev_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] + +v_subrev_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] + +v_subrev_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] + +v_subrev_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x09,0x13] + +v_subrev_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xf5,0x30] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x05,0x30] + +v_xnor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] + +v_xnor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] + +v_xnor_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] + +v_xnor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x09,0x13] + +v_xnor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x05,0x30] + +v_xor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] + +v_xor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] + +v_xor_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] + +v_xor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x09,0x13] + +v_xor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s index 62c0deaecd96a..3eff00bb96e47 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s new file mode 100644 index 0000000000000..a4fea037a4de7 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s @@ -0,0 +1,451 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_dot2acc_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2acc_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2acc_f32_f16 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2c_f32_f16 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] + +v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] + +v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] + +v_max_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] + +v_max_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] + +v_max_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] + +v_max_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] + +v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] + +v_min_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] + +v_min_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] + +v_min_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] + +v_mul_legacy_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s index d235fcdeb526a..0f19cf0028525 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s new file mode 100644 index 0000000000000..2d52828d1e283 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s @@ -0,0 +1,13 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s + +v_fmaak_f32 v0, 0xff32, v0, 0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmaak_f16 v0, 0xff32, v0, 0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmamk_f32 v0, 0xff32, 1, v0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmamk_f16 v0, 0xff32, 1, v0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s index 164a49dcdd47b..dedbcb55d7976 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s v_fmaak_f32 v0, 0xff32, v0, 0 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s index 76b1c38fad43d..dd619f3077f70 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s @@ -1,237 +1,238 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s -v_add_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v255, v1, v2, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v255.l, v1.l, v2.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmaak_f16_e32 v5, v255, v2, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_fmac_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5.l, v255.l, v2.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v5, v1, v255, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5.l, v1.l, v255.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_min_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_mul_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_sub_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_subrev_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v255.l, v1.l, v2.l +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_ldexp_f16_e32 v5.l, v1.l, v255.l +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v5.l, v255.l, v2.l +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_max_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s index a5b5f32e97622..a6dcce40fd0e0 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -1,201 +1,202 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s v_add_f16 v255, v1, v2 -// GFX11: v_add_f16_e64 +// GFX11: v_add_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v255, v1, v2 -// GFX11: v_fmac_f16_e64 - -v_ldexp_f16 v255, v1, v2 -// GFX11: v_ldexp_f16_e64 - -v_max_f16 v255, v1, v2 -// GFX11: v_max_f16_e64 +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v255, v1, v2 -// GFX11: v_min_f16_e64 +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 -// GFX11: v_mul_f16_e64 +v_add_f16 v5, v1, v255 +// GFX11: v_add_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v255, v1, v2 -// GFX11: v_sub_f16_e64 +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 -// GFX11: v_subrev_f16_e64 +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] v_add_f16 v5, v255, v2 -// GFX11: v_add_f16_e64 - -v_fmac_f16 v5, v255, v2 -// GFX11: v_fmac_f16_e64 +// GFX11: v_add_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 -// GFX11: v_ldexp_f16_e64 +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_max_f16 v5, v255, v2 -// GFX11: v_max_f16_e64 +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 -// GFX11: v_min_f16_e64 +v_fmac_f16 v255, v1, v2 +// GFX11: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_mul_f16 v5, v255, v2 -// GFX11: v_mul_f16_e64 +v_fmac_f16 v5, v1, v255 +// GFX11: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v5, v255, v2 -// GFX11: v_sub_f16_e64 +v_fmac_f16 v5, v255, v2 +// GFX11: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v5, v255, v2 -// GFX11: v_subrev_f16_e64 +v_ldexp_f16 v255, v1, v2 +// GFX11: v_ldexp_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -v_add_f16 v5, v1, v255 -// GFX11: v_add_f16_e64 +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_fmac_f16 v5, v1, v255 -// GFX11: v_fmac_f16_e64 +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_ldexp_f16 v5, v1, v255 -// GFX11: v_ldexp_f16_e64 +// GFX11: v_ldexp_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x03,0x00] -v_max_f16 v5, v1, v255 -// GFX11: v_max_f16_e64 +v_ldexp_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v5, v1, v255 -// GFX11: v_min_f16_e64 +v_ldexp_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v1, v255 -// GFX11: v_mul_f16_e64 +v_ldexp_f16 v5, v255, v2 +// GFX11: v_ldexp_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v1, v255 -// GFX11: v_sub_f16_e64 +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v1, v255 -// GFX11: v_subrev_f16_e64 +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_max_f16 v255, v1, v2 +// GFX11: v_max_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 - -v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +// GFX11: v_max_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_max_f16 v5, v1, v255 +// GFX11: v_max_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_max_f16 v5, v255, v2 +// GFX11: v_max_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 +// GFX11: v_max_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +v_min_f16 v255, v1, v2 +// GFX11: v_min_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] -v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_min_f16 v5, v1, v255 +// GFX11: v_min_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0xff,0x03,0x00] -v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_ldexp_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 +v_min_f16 v5, v255, v2 +// GFX11: v_min_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0x05,0x02,0x00] -v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_mul_f16 v255, v1, v2 +// GFX11: v_mul_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_mul_f16 v5, v1, v255 +// GFX11: v_mul_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0xff,0x03,0x00] -v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_mul_f16 v5, v255, v2 +// GFX11: v_mul_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_sub_f16 v255, v1, v2 +// GFX11: v_sub_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_sub_f16 v5, v1, v255 +// GFX11: v_sub_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0xff,0x03,0x00] -v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_sub_f16 v5, v255, v2 +// GFX11: v_sub_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0x05,0x02,0x00] -v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_ldexp_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_subrev_f16 v255, v1, v2 +// GFX11: v_subrev_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_subrev_f16 v5, v1, v255 +// GFX11: v_subrev_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0xff,0x03,0x00] v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +// GFX11: v_subrev_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v255, v2 +// GFX11: v_subrev_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0x05,0x02,0x00] + +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s new file mode 100644 index 0000000000000..4c37502e1b247 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s @@ -0,0 +1,2560 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x64] + +v_add_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x64] + +v_add_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x64] + +v_add_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x64] + +v_add_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x64] + +v_add_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x64] + +v_add_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x64] + +v_add_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x64] + +v_add_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x64] + +v_add_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x64] + +v_add_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x64] + +v_add_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x64] + +v_add_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] + +v_add_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x06] + +v_add_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x06] + +v_add_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x06] + +v_add_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x06] + +v_add_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x06] + +v_add_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x06] + +v_add_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x06] + +v_add_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x06] + +v_add_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x06] + +v_add_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x06] + +v_add_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x06] + +v_add_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x06] + +v_add_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x04] + +v_add_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x04] + +v_add_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x04] + +v_add_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x04] + +v_add_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x04] + +v_add_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x04] + +v_add_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x04] + +v_add_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x04] + +v_add_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x04] + +v_add_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x04] + +v_add_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x04] + +v_add_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] + +v_add_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4a] + +v_add_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] + +v_and_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x36] + +v_and_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x36] + +v_and_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x36] + +v_and_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x36] + +v_and_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x36] + +v_and_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x36] + +v_and_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x36] + +v_and_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x36] + +v_and_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x36] + +v_and_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x36] + +v_and_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x36] + +v_and_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x36] + +v_and_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x34] + +v_ashrrev_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] + +v_cndmask_b32 v5, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_fmaak_f16 v5, v1, v2, 0xfe0b +// GFX12: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v127, v2, 0xfe0b +// GFX12: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s1, v2, 0xfe0b +// GFX12: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s105, v2, 0xfe0b +// GFX12: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b +// GFX12: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b +// GFX12: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, ttmp15, v2, 0xfe0b +// GFX12: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, m0, v2, 0xfe0b +// GFX12: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_lo, v2, 0xfe0b +// GFX12: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_hi, v2, 0xfe0b +// GFX12: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, null, v2, 0xfe0b +// GFX12: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, -1, v2, 0xfe0b +// GFX12: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, 0.5, v2, 0xfe0b +// GFX12: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, src_scc, v2, 0xfe0b +// GFX12: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b +// GFX12: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f32 v5, v1, v2, 0xaf123456 +// GFX12: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, v255, v2, 0xaf123456 +// GFX12: encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s1, v2, 0xaf123456 +// GFX12: encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s105, v2, 0xaf123456 +// GFX12: encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, m0, v2, 0xaf123456 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, null, v2, 0xaf123456 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, -1, v2, 0xaf123456 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, 0.5, v2, 0xaf123456 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, src_scc, v2, 0xaf123456 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 +// GFX12: encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] + +v_fmac_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x6c] + +v_fmac_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x6c] + +v_fmac_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x6c] + +v_fmac_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x6c] + +v_fmac_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x6c] + +v_fmac_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x6c] + +v_fmac_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x6c] + +v_fmac_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x6c] + +v_fmac_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] + +v_fmac_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x56] + +v_fmac_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x56] + +v_fmac_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x56] + +v_fmac_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x56] + +v_fmac_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x56] + +v_fmac_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x56] + +v_fmac_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x56] + +v_fmac_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x56] + +v_fmac_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x56] + +v_fmac_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x56] + +v_fmac_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] + +v_fmamk_f16 v5, v1, 0xfe0b, v3 +// GFX12: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, v127, 0xfe0b, v3 +// GFX12: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s1, 0xfe0b, v3 +// GFX12: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s105, 0xfe0b, v3 +// GFX12: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 +// GFX12: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 +// GFX12: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 +// GFX12: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, m0, 0xfe0b, v3 +// GFX12: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 +// GFX12: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 +// GFX12: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, null, 0xfe0b, v3 +// GFX12: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, -1, 0xfe0b, v3 +// GFX12: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, 0.5, 0xfe0b, v3 +// GFX12: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, src_scc, 0xfe0b, v3 +// GFX12: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f32 v5, v1, 0xaf123456, v3 +// GFX12: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, v255, 0xaf123456, v3 +// GFX12: encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s1, 0xaf123456, v3 +// GFX12: encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s105, 0xaf123456, v3 +// GFX12: encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 +// GFX12: encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 +// GFX12: encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 +// GFX12: encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, m0, 0xaf123456, v3 +// GFX12: encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 +// GFX12: encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 +// GFX12: encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, null, 0xaf123456, v3 +// GFX12: encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, -1, 0xaf123456, v3 +// GFX12: encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, 0.5, 0xaf123456, v3 +// GFX12: encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, src_scc, 0xaf123456, v3 +// GFX12: encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] + +v_ldexp_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x76] + +v_ldexp_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x76] + +v_ldexp_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x76] + +v_ldexp_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x76] + +v_ldexp_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x76] + +v_ldexp_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x76] + +v_ldexp_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x76] + +v_ldexp_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x76] + +v_ldexp_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x30] + +v_lshlrev_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] + +v_lshlrev_b64 v[5:6], v1, v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], v255, v[2:3] +// GFX12: encoding: [0xff,0x05,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], s1, v[2:3] +// GFX12: encoding: [0x01,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], s105, v[2:3] +// GFX12: encoding: [0x69,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], vcc_lo, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], vcc_hi, v[2:3] +// GFX12: encoding: [0x6b,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], ttmp15, v[2:3] +// GFX12: encoding: [0x7b,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], exec_hi, v[2:3] +// GFX12: encoding: [0x7f,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x32] + +v_lshrrev_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] + +v_max_num_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x62] + +v_max_num_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x62] + +v_max_num_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x62] + +v_max_num_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x62] + +v_max_num_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x62] + +v_max_num_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x62] + +v_max_num_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x62] + +v_max_num_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x62] + +v_max_num_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x62] + +v_max_num_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x62] + +v_max_num_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x62] + +v_max_num_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x62] + +v_max_num_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x62] + +v_max_num_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x62] + +v_max_num_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] + +v_max_num_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x2c] + +v_max_num_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x2c] + +v_max_num_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x2c] + +v_max_num_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x2c] + +v_max_num_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x2c] + +v_max_num_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x2c] + +v_max_num_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x2c] + +v_max_num_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x2c] + +v_max_num_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x2c] + +v_max_num_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x2c] + +v_max_num_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x2c] + +v_max_num_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x2c] + +v_max_num_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x2c] + +v_max_num_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x2c] + +v_max_num_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] + +v_max_num_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x1c] + +v_max_num_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x1c] + +v_max_num_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x1c] + +v_max_num_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] + +v_max_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x24] + +v_max_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x24] + +v_max_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x24] + +v_max_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x24] + +v_max_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x24] + +v_max_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x24] + +v_max_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x24] + +v_max_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x24] + +v_max_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x24] + +v_max_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x24] + +v_max_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x24] + +v_max_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x24] + +v_max_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] + +v_max_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x28] + +v_max_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x28] + +v_max_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x28] + +v_max_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x28] + +v_max_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x28] + +v_max_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x28] + +v_max_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x28] + +v_max_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x28] + +v_max_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x28] + +v_max_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x28] + +v_max_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x28] + +v_max_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x28] + +v_max_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] + +v_min_num_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x60] + +v_min_num_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x60] + +v_min_num_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x60] + +v_min_num_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x60] + +v_min_num_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x60] + +v_min_num_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x60] + +v_min_num_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x60] + +v_min_num_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x60] + +v_min_num_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x60] + +v_min_num_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x60] + +v_min_num_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x60] + +v_min_num_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x60] + +v_min_num_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x60] + +v_min_num_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x60] + +v_min_num_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] + +v_min_num_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x2a] + +v_min_num_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x2a] + +v_min_num_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x2a] + +v_min_num_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x2a] + +v_min_num_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x2a] + +v_min_num_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x2a] + +v_min_num_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x2a] + +v_min_num_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x2a] + +v_min_num_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x2a] + +v_min_num_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x2a] + +v_min_num_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x2a] + +v_min_num_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x2a] + +v_min_num_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x2a] + +v_min_num_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x2a] + +v_min_num_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] + +v_min_num_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x1a] + +v_min_num_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x1a] + +v_min_num_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x1a] + +v_min_num_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] + +v_min_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x22] + +v_min_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x22] + +v_min_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x22] + +v_min_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x22] + +v_min_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x22] + +v_min_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x22] + +v_min_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x22] + +v_min_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x22] + +v_min_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x22] + +v_min_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x22] + +v_min_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x22] + +v_min_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x22] + +v_min_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] + +v_min_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x26] + +v_min_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x26] + +v_min_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x26] + +v_min_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x26] + +v_min_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x26] + +v_min_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x26] + +v_min_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x26] + +v_min_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x26] + +v_min_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x26] + +v_min_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x26] + +v_min_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x26] + +v_min_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x26] + +v_min_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x6a] + +v_mul_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x6a] + +v_mul_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x6a] + +v_mul_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x6a] + +v_mul_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x6a] + +v_mul_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x6a] + +v_mul_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x6a] + +v_mul_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x6a] + +v_mul_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x6a] + +v_mul_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x6a] + +v_mul_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] + +v_mul_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x10] + +v_mul_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x10] + +v_mul_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x10] + +v_mul_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x10] + +v_mul_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x10] + +v_mul_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x10] + +v_mul_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x10] + +v_mul_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x10] + +v_mul_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x10] + +v_mul_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x10] + +v_mul_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x0c] + +v_mul_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x0c] + +v_mul_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x0c] + +v_mul_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x12] + +v_mul_i32_i24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x16] + +v_mul_u32_u24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] + +v_or_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x38] + +v_or_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x38] + +v_or_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x38] + +v_or_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x38] + +v_or_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x38] + +v_or_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x38] + +v_or_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x38] + +v_or_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x38] + +v_or_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x38] + +v_or_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x38] + +v_or_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x38] + +v_or_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x38] + +v_or_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] + +v_pk_fmac_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x78] + +v_pk_fmac_f16 v255, 0xfe0b, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x66] + +v_sub_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x66] + +v_sub_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x66] + +v_sub_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x66] + +v_sub_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x66] + +v_sub_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x66] + +v_sub_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x66] + +v_sub_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x66] + +v_sub_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x66] + +v_sub_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x66] + +v_sub_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] + +v_sub_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x08] + +v_sub_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x08] + +v_sub_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x08] + +v_sub_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x08] + +v_sub_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x08] + +v_sub_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x08] + +v_sub_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x08] + +v_sub_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x08] + +v_sub_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x08] + +v_sub_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x08] + +v_sub_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4c] + +v_sub_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x68] + +v_subrev_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x68] + +v_subrev_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x68] + +v_subrev_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x68] + +v_subrev_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x68] + +v_subrev_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x68] + +v_subrev_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x68] + +v_subrev_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x68] + +v_subrev_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x68] + +v_subrev_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x68] + +v_subrev_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] + +v_subrev_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0a] + +v_subrev_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0a] + +v_subrev_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0a] + +v_subrev_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0a] + +v_subrev_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0a] + +v_subrev_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0a] + +v_subrev_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0a] + +v_subrev_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0a] + +v_subrev_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] + +v_xnor_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x3c] + +v_xnor_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x3c] + +v_xnor_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x3c] + +v_xnor_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x3c] + +v_xnor_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x3c] + +v_xnor_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x3c] + +v_xnor_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x3c] + +v_xnor_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x3c] + +v_xnor_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] + +v_xor_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x3a] + +v_xor_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x3a] + +v_xor_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x3a] + +v_xor_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x3a] + +v_xor_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x3a] + +v_xor_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x3a] + +v_xor_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x3a] + +v_xor_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x3a] + +v_xor_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x3a] + +v_xor_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x3a] + +v_xor_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s index 08d4be0881319..5593ea77d9424 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x40] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s new file mode 100644 index 0000000000000..ebab0859b3484 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s @@ -0,0 +1,19 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_min_f32 v5, v1, v2 +// GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] + +v_max_f32 v5, v1, v2 +// GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] + +v_min_f16 v5, v1, v2 +// GFX12: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] + +v_max_f16 v5, v1, v2 +// GFX12: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] + +v_max_f64 v[5:6], v[1:2], v[2:3] +// GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[2:3] ; encoding: [0x01,0x05,0x0a,0x1c] + +v_min_f64 v[5:6], v[1:2], v[2:3] +// GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[2:3] ; encoding: [0x01,0x05,0x0a,0x1a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s index 3918dd48cfc06..b7e51cf270647 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_min_f32 v5, v1, v2 // GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s new file mode 100644 index 0000000000000..53373d1f46973 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s @@ -0,0 +1,2006 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +v_add_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] + +v_add_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] + +v_add_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] + +v_add_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x09,0x13] + +v_add_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xf5,0x30] + +v_add_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] + +v_add_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] + +v_add_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] + +v_add_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] + +v_add_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x09,0x13] + +v_add_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xf5,0x30] + +v_add_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] + +v_add_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x09,0x13] + +v_add_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x05,0x30] + +v_and_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] + +v_and_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] + +v_and_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] + +v_and_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] + +v_and_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x09,0x13] + +v_and_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x05,0x30] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x09,0x13] + +v_ashrrev_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x05,0x30] + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] + +v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] + +v_fmac_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] + +v_fmac_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x09,0x13] + +v_fmac_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xf5,0x30] + +v_fmac_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] + +v_fmac_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] + +v_fmac_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] + +v_fmac_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x09,0x13] + +v_fmac_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xf5,0x30] + +v_ldexp_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] + +v_ldexp_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x09,0x13] + +v_ldexp_f16 v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x35,0x30] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x09,0x13] + +v_lshlrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x05,0x30] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x09,0x13] + +v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] + +v_max_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] + +v_max_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] + +v_max_num_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] + +v_max_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x09,0x13] + +v_max_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xf5,0x30] + +v_max_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] + +v_max_num_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] + +v_max_num_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] + +v_max_num_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x09,0x13] + +v_max_num_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xf5,0x30] + +v_max_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] + +v_max_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] + +v_max_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] + +v_max_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] + +v_max_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x09,0x13] + +v_max_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x05,0x30] + +v_max_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] + +v_max_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] + +v_max_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] + +v_max_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] + +v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x09,0x13] + +v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] + +v_min_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] + +v_min_num_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] + +v_min_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x09,0x13] + +v_min_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xf5,0x30] + +v_min_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] + +v_min_num_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] + +v_min_num_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] + +v_min_num_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x09,0x13] + +v_min_num_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xf5,0x30] + +v_min_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] + +v_min_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] + +v_min_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] + +v_min_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] + +v_min_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x09,0x13] + +v_min_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x05,0x30] + +v_min_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] + +v_min_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] + +v_min_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] + +v_min_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] + +v_min_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x09,0x13] + +v_min_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] + +v_mul_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] + +v_mul_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] + +v_mul_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x09,0x13] + +v_mul_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xf5,0x30] + +v_mul_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] + +v_mul_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] + +v_mul_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] + +v_mul_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x09,0x13] + +v_mul_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xf5,0x30] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x05,0x30] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x05,0x30] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x09,0x13] + +v_mul_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x09,0x13] + +v_mul_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x05,0x30] + +v_or_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] + +v_or_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] + +v_or_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] + +v_or_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] + +v_or_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x09,0x13] + +v_or_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x05,0x30] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] + +v_sub_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] + +v_sub_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] + +v_sub_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x09,0x13] + +v_sub_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xf5,0x30] + +v_sub_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] + +v_sub_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] + +v_sub_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] + +v_sub_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x09,0x13] + +v_sub_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xf5,0x30] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x09,0x13] + +v_sub_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x05,0x30] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] + +v_subrev_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] + +v_subrev_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x09,0x13] + +v_subrev_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xf5,0x30] + +v_subrev_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] + +v_subrev_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] + +v_subrev_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] + +v_subrev_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x09,0x13] + +v_subrev_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xf5,0x30] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x05,0x30] + +v_xnor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] + +v_xnor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] + +v_xnor_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] + +v_xnor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x09,0x13] + +v_xnor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x05,0x30] + +v_xor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] + +v_xor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] + +v_xor_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] + +v_xor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x09,0x13] + +v_xor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s index 63ffdbe821af8..a0f93f459f915 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s new file mode 100644 index 0000000000000..a7a035f4a9efc --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s @@ -0,0 +1,433 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] + +v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] + +v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] + +v_max_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] + +v_max_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] + +v_max_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] + +v_max_num_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] + +v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] + +v_min_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] + +v_min_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] + +v_min_num_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s index 54baafb5366ff..81fcb323e2711 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s index 045d698bd504b..b339bc1960f3e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s @@ -1,226 +1,227 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s -v_add_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v255, v1, v2, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmaak_f16_e32 v5, v255, v2, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v5, v1, v255, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction v_max_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_max_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_max_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s index 13939842f7303..e9e91fa70773d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s @@ -1,190 +1,191 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=_e32 %s v_add_f16 v255, v1, v2 -// GFX12: v_add_f16_e64 +// GFX12: v_add_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v255, v1, v2 -// GFX12: v_fmac_f16_e64 +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_ldexp_f16 v255, v1, v2 -// GFX12: v_ldexp_f16_e64 +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v255, v1, v2 -// GFX12: v_max_num_f16_e64 +v_add_f16 v5, v1, v255 +// GFX12: v_add_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v255, v1, v2 -// GFX12: v_min_num_f16_e64 +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v255, v1, v2 -// GFX12: v_mul_f16_e64 +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v255, v1, v2 -// GFX12: v_sub_f16_e64 +v_add_f16 v5, v255, v2 +// GFX12: v_add_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v255, v1, v2 -// GFX12: v_subrev_f16_e64 +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 -// GFX12: v_add_f16_e64 +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_fmac_f16 v255, v1, v2 +// GFX12: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16 v5, v1, v255 +// GFX12: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] v_fmac_f16 v5, v255, v2 -// GFX12: v_fmac_f16_e64 +// GFX12: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 -// GFX12: v_ldexp_f16_e64 +v_ldexp_f16 v255, v1, v2 +// GFX12: v_ldexp_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 -// GFX12: v_max_num_f16_e64 +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16 v5, v255, v2 -// GFX12: v_min_num_f16_e64 +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 -// GFX12: v_mul_f16_e64 +v_ldexp_f16 v5, v255, v2 +// GFX12: v_ldexp_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v255, v2 -// GFX12: v_sub_f16_e64 +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v255, v2 -// GFX12: v_subrev_f16_e64 +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v5, v1, v255 -// GFX12: v_add_f16_e64 +v_max_num_f16 v255, v1, v2 +// GFX12: v_max_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v1, v255 -// GFX12: v_fmac_f16_e64 +v_max_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_max_num_f16 v5, v1, v255 -// GFX12: v_max_num_f16_e64 +// GFX12: v_max_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v5, v1, v255 -// GFX12: v_min_num_f16_e64 +v_max_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 -// GFX12: v_mul_f16_e64 +v_max_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 -// GFX12: v_sub_f16_e64 +v_max_num_f16 v5, v255, v2 +// GFX12: v_max_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x31,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v5, v1, v255 -// GFX12: v_subrev_f16_e64 +v_max_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_max_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_min_num_f16 v255, v1, v2 +// GFX12: v_min_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_min_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] v_min_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +v_min_num_f16 v5, v1, v255 +// GFX12: v_min_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0xff,0x03,0x00] -v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_min_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_min_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_min_num_f16 v5, v255, v2 +// GFX12: v_min_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x30,0xd5,0xff,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_min_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] v_min_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +v_mul_f16 v255, v1, v2 +// GFX12: v_mul_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_mul_f16 v5, v1, v255 +// GFX12: v_mul_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +// GFX12: v_mul_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_mul_f16 v5, v255, v2 +// GFX12: v_mul_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0x05,0x02,0x00] -v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_sub_f16 v255, v1, v2 +// GFX12: v_sub_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -v_min_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_sub_f16 v5, v1, v255 +// GFX12: v_sub_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0xff,0x03,0x00] -v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_sub_f16 v5, v255, v2 +// GFX12: v_sub_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_subrev_f16 v255, v1, v2 +// GFX12: v_subrev_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_subrev_f16 v5, v1, v255 +// GFX12: v_subrev_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0xff,0x03,0x00] -v_max_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_subrev_f16 v5, v255, v2 +// GFX12: v_subrev_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt index 26ffd3a4e383b..4f638cd8ff54f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt @@ -1,2334 +1,2324 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-FAKE16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0x01,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x40] -0x01,0x05,0x0a,0x40 +0xff,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x40] -0xff,0x05,0x0a,0x40 +0x01,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x40] -0x01,0x04,0x0a,0x40 +0x69,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x40] -0x69,0x04,0x0a,0x40 +0x6a,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x40] -0x6a,0x04,0x0a,0x40 +0x6b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x40] -0x6b,0x04,0x0a,0x40 +0x7b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x40] -0x7b,0x04,0x0a,0x40 +0x7d,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x40] -0x7d,0x04,0x0a,0x40 +0x7e,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x40] -0x7e,0x04,0x0a,0x40 +0x7f,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x40] -0x7f,0x04,0x0a,0x40 +0x7c,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x40] -0x7c,0x04,0x0a,0x40 +0xc1,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x40] -0xc1,0x04,0x0a,0x40 +0xf0,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x40] -0xf0,0x04,0x0a,0x40 +0xfd,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x40] -0xfd,0x04,0x0a,0x40 +0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf # W32: v_add_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] # W64: v_add_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] -0x01,0x05,0x0a,0x64 +0x81,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x64] -0x81,0x05,0x0a,0x64 +0x7f,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] -0x7f,0x05,0x0a,0x64 +0xff,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x64] -0xff,0x05,0x0a,0x64 +0x01,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] -0x01,0x04,0x0a,0x64 +0x69,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] -0x69,0x04,0x0a,0x64 +0x6a,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] -0x6a,0x04,0x0a,0x64 +0x6b,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] -0x6b,0x04,0x0a,0x64 +0x7b,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] -0x7b,0x04,0x0a,0x64 +0x7d,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] -0x7d,0x04,0x0a,0x64 +0x7e,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] -0x7e,0x04,0x0a,0x64 +0x7f,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] -0x7f,0x04,0x0a,0x64 +0x7c,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] -0x7c,0x04,0x0a,0x64 +0xc1,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] -0xc1,0x04,0x0a,0x64 +0xf0,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] -0xf0,0x04,0x0a,0x64 +0xfd,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] -0xfd,0x04,0x0a,0x64 -# GFX11-REAL16: v_add_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x65] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x65 0xfd,0x04,0x0b,0x65 +# GFX11-REAL16: v_add_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x65] +0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_add_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_add_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_add_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00] -# GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] 0x01,0x05,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] 0xff,0x05,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] 0x01,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] 0x69,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] 0x6a,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] 0x6b,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] 0x7b,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] 0x7d,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] 0x7e,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] 0x7f,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] 0x7c,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] 0xc1,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] 0xf0,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] 0xfd,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] -# GFX11: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] 0x01,0x05,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] 0xff,0x05,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] 0x01,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] 0x69,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] 0x6a,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] 0x6b,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] 0x7b,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] 0x7d,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] 0x7e,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] 0x7f,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] 0x7c,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] 0xc1,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] 0xf0,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] 0xfd,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf +# GFX11: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] -# GFX11: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] 0x01,0x05,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] 0xff,0x05,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] 0x01,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] 0x69,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] 0x6a,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] 0x6b,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] 0x7b,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] 0x7d,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] 0x7e,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] 0x7f,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] 0x7c,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] 0xc1,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] 0xf0,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] 0xfd,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf +# GFX11: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] -# GFX11: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] 0x01,0x05,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] 0xff,0x05,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] 0x01,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] 0x69,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] 0x6a,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] 0x6b,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] 0x7b,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] 0x7d,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] 0x7e,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] 0x7f,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] 0x7c,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] 0xc1,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] 0xf0,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] 0xfd,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf +# GFX11: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] -0x01,0x05,0x0a,0x02 +0xff,0x05,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] -0xff,0x05,0x0a,0x02 +0x01,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] -0x01,0x04,0x0a,0x02 +0x69,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] -0x69,0x04,0x0a,0x02 +0x6a,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] -0x6a,0x04,0x0a,0x02 +0x6b,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] -0x6b,0x04,0x0a,0x02 +0x7b,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] -0x7b,0x04,0x0a,0x02 +0x7d,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] -0x7d,0x04,0x0a,0x02 +0x7e,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] -0x7e,0x04,0x0a,0x02 +0x7f,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] -0x7f,0x04,0x0a,0x02 +0x7c,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] -0x7c,0x04,0x0a,0x02 +0xc1,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] -0xc1,0x04,0x0a,0x02 +0xf0,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] -0xf0,0x04,0x0a,0x02 +0xfd,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] -0xfd,0x04,0x0a,0x02 +0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf # W32: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] # W64: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] 0x01,0x05,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] 0xff,0x05,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] 0x01,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] 0x69,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] 0x6a,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] 0x6b,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] 0x7b,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] 0x7d,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] 0x7e,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] 0x7f,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] 0x7c,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] 0xc1,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] 0xf0,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] 0xfd,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] -# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] 0x01,0x05,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x04] 0xff,0x05,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x04] 0x01,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x04] 0x69,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x04] 0x6a,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x04] 0x6b,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x04] 0x7b,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x04] 0x7d,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x04] 0x7e,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x04] 0x7f,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x04] 0x7c,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x04] 0xc1,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x04] 0xf0,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x04] 0xfd,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2acc_f32_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0c] 0x01,0x05,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0c] 0xff,0x05,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0c] 0x01,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0c] 0x69,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0c] 0x6a,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0c] 0x6b,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0c] 0x7b,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0c] 0x7d,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0c] 0x7e,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0c] 0x7f,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0c] 0x7c,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0c] 0xc1,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0c] 0xf0,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0c] 0xfd,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf +# GFX11: v_fmac_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] -# GFX11: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] 0x01,0x05,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] 0x7f,0x05,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] 0x01,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] 0x69,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] 0x6a,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] 0x6b,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] 0x7b,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] 0x7d,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] 0x7e,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] 0x7f,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] 0x7c,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] 0xc1,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] 0xf0,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] 0xfd,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] 0x01,0x05,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] 0xff,0x05,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] 0x01,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] 0x69,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] 0x6a,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] 0x6b,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] 0x7b,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] 0x7d,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] 0x7e,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] 0x7f,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] 0x7c,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] 0xc1,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] 0xf0,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] 0xfd,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf +# GFX11: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] 0x01,0x05,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] 0x7f,0x05,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] 0x01,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] 0x69,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] 0x6a,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] 0x6b,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] 0x7b,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] 0x7d,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] 0x7e,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] 0x7f,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] 0x7c,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] 0xc1,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] 0xf0,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] 0xfd,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX11-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX11: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] 0x01,0x05,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] 0xff,0x05,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] 0x01,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] 0x69,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] 0x6a,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] 0x6b,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] 0x7b,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] 0x7d,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] 0x7e,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] 0x7f,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] 0x7c,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] 0xc1,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] 0xf0,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] 0xfd,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf +# GFX11: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] -# GFX11: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] 0x01,0x05,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] 0xff,0x05,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] 0x01,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] 0x69,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] 0x6a,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] 0x6b,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] 0x7b,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] 0x7d,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] 0x7e,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] 0x7f,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] 0x7c,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] 0xc1,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] 0xf0,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] 0xfd,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf +# GFX11: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72] -0x01,0x05,0x0a,0x72 +0x81,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x72] -0x81,0x05,0x0a,0x72 +0x7f,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x72] -0x7f,0x05,0x0a,0x72 +0xff,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x72] -0xff,0x05,0x0a,0x72 +0x01,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x72] -0x01,0x04,0x0a,0x72 +0x69,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x72] -0x69,0x04,0x0a,0x72 +0x6a,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x72] -0x6a,0x04,0x0a,0x72 +0x6b,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x72] -0x6b,0x04,0x0a,0x72 +0x7b,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x72] -0x7b,0x04,0x0a,0x72 +0x7d,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x72] -0x7d,0x04,0x0a,0x72 +0x7e,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x72] -0x7e,0x04,0x0a,0x72 +0x7f,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x72] -0x7f,0x04,0x0a,0x72 +0x7c,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x72] -0x7c,0x04,0x0a,0x72 +0xc1,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x72] -0xc1,0x04,0x0a,0x72 +0xf0,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x72] -0xf0,0x04,0x0a,0x72 +0xfd,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x72] -0xfd,0x04,0x0a,0x72 -# GFX11-REAL16: v_max_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x73] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x73 0xfd,0x04,0x0b,0x73 +# GFX11-REAL16: v_max_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x73] +0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_max_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_max_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_max_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_max_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00] -# GFX11: v_max_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x20] 0x01,0x05,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x20] 0xff,0x05,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x20] 0x01,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x20] 0x69,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x20] 0x6a,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x20] 0x6b,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x20] 0x7b,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x20] 0x7d,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x20] 0x7e,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x20] 0x7f,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x20] 0x7c,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x20] 0xc1,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x20] 0xf0,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x20] 0xfd,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf +# GFX11: v_max_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] -# GFX11: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] 0x01,0x05,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] 0xff,0x05,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] 0x01,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] 0x69,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] 0x6a,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] 0x6b,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] 0x7b,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] 0x7d,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] 0x7e,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] 0x7f,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] 0x7c,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] 0xc1,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] 0xf0,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] 0xfd,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf +# GFX11: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] -# GFX11: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] 0x01,0x05,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] 0xff,0x05,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] 0x01,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] 0x69,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] 0x6a,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] 0x6b,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] 0x7b,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] 0x7d,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] 0x7e,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] 0x7f,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] 0x7c,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] 0xc1,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] 0xf0,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] 0xfd,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf +# GFX11: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74] -0x01,0x05,0x0a,0x74 +0x81,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x74] -0x81,0x05,0x0a,0x74 +0x7f,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x74] -0x7f,0x05,0x0a,0x74 +0xff,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x74] -0xff,0x05,0x0a,0x74 +0x01,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x74] -0x01,0x04,0x0a,0x74 +0x69,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x74] -0x69,0x04,0x0a,0x74 +0x6a,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x74] -0x6a,0x04,0x0a,0x74 +0x6b,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x74] -0x6b,0x04,0x0a,0x74 +0x7b,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x74] -0x7b,0x04,0x0a,0x74 +0x7d,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x74] -0x7d,0x04,0x0a,0x74 +0x7e,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x74] -0x7e,0x04,0x0a,0x74 +0x7f,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x74] -0x7f,0x04,0x0a,0x74 +0x7c,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x74] -0x7c,0x04,0x0a,0x74 +0xc1,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x74] -0xc1,0x04,0x0a,0x74 +0xf0,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x74] -0xf0,0x04,0x0a,0x74 +0xfd,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x74] -0xfd,0x04,0x0a,0x74 -# GFX11-REAL16: v_min_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x75] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x75 0xfd,0x04,0x0b,0x75 +# GFX11-REAL16: v_min_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x75] +0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_min_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_min_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_min_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_min_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00] -# GFX11: v_min_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1e] 0x01,0x05,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x1e] 0xff,0x05,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x1e] 0x01,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x1e] 0x69,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x1e] 0x6a,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x1e] 0x6b,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x1e] 0x7b,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x1e] 0x7d,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x1e] 0x7e,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x1e] 0x7f,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x1e] 0x7c,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x1e] 0xc1,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x1e] 0xf0,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x1e] 0xfd,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf +# GFX11: v_min_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] -# GFX11: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] 0x01,0x05,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] 0xff,0x05,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] 0x01,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] 0x69,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] 0x6a,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] 0x6b,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] 0x7b,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] 0x7d,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] 0x7e,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] 0x7f,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] 0x7c,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] 0xc1,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] 0xf0,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] 0xfd,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf +# GFX11: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] -# GFX11: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] 0x01,0x05,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] 0xff,0x05,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] 0x01,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] 0x69,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] 0x6a,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] 0x6b,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] 0x7b,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] 0x7d,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] 0x7e,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] 0x7f,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] 0x7c,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] 0xc1,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] 0xf0,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] 0xfd,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf +# GFX11: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] 0x01,0x05,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] 0xff,0x05,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] 0x01,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] 0x69,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] 0x6a,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] 0x6b,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] 0x7b,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] 0x7d,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] 0x7e,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] 0x7f,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] 0x7c,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] 0xc1,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] 0xf0,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] 0xfd,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf +# GFX11: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] -0x01,0x05,0x0a,0x6a -# GFX11-REAL16: v_mul_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x6a] -# GFX11-FAKE16: v_mul_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x6a 0x81,0x05,0x0a,0x6a +# GFX11-REAL16: v_mul_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x6a] +# GFX11-FAKE16: v_mul_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x6a] +0x7f,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] -0x7f,0x05,0x0a,0x6a +0xff,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x6a] -0xff,0x05,0x0a,0x6a +0x01,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] -0x01,0x04,0x0a,0x6a +0x69,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] -0x69,0x04,0x0a,0x6a +0x6a,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] -0x6a,0x04,0x0a,0x6a +0x6b,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] -0x6b,0x04,0x0a,0x6a +0x7b,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] -0x7b,0x04,0x0a,0x6a +0x7d,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] -0x7d,0x04,0x0a,0x6a +0x7e,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] -0x7e,0x04,0x0a,0x6a +0x7f,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] -0x7f,0x04,0x0a,0x6a +0x7c,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] -0x7c,0x04,0x0a,0x6a +0xc1,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] -0xc1,0x04,0x0a,0x6a +0xf0,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] -0xf0,0x04,0x0a,0x6a +0xfd,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] -0xfd,0x04,0x0a,0x6a -# GFX11-REAL16: v_mul_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x6b] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x6b 0xfd,0x04,0x0b,0x6b +# GFX11-REAL16: v_mul_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x6b] +0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_mul_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_mul_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_mul_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00] -# GFX11: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] 0x01,0x05,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] 0xff,0x05,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] 0x01,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] 0x69,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] 0x6a,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] 0x6b,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] 0x7b,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] 0x7d,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] 0x7e,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] 0x7f,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] 0x7c,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] 0xc1,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] 0xf0,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] 0xfd,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf +# GFX11: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] 0x01,0x05,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] 0xff,0x05,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] 0x01,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] 0x69,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] 0x6a,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] 0x6b,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] 0x7b,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] 0x7d,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] 0x7e,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] 0x7f,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] 0x7c,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] 0xc1,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] 0xf0,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] 0xfd,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] 0x01,0x05,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] 0xff,0x05,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] 0x01,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] 0x69,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] 0x6a,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] 0x6b,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] 0x7b,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] 0x7d,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] 0x7e,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] 0x7f,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] 0x7c,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] 0xc1,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] 0xf0,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] 0xfd,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] 0x01,0x05,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] 0xff,0x05,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] 0x01,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] 0x69,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] 0x6a,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] 0x6b,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] 0x7b,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] 0x7d,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] 0x7e,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] 0x7f,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] 0x7c,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] 0xc1,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] 0xf0,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] 0xfd,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf +# GFX11: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] 0x01,0x05,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] 0xff,0x05,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] 0x01,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] 0x69,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] 0x6a,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] 0x6b,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] 0x7b,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] 0x7d,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] 0x7e,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] 0x7f,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] 0x7c,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] 0xc1,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] 0xf0,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] 0xfd,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf +# GFX11: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] -# GFX11: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] 0x01,0x05,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] 0xff,0x05,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] 0x01,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] 0x69,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] 0x6a,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] 0x6b,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] 0x7b,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] 0x7d,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] 0x7e,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] 0x7f,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] 0x7c,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] 0xc1,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] 0xf0,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] 0xfd,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf +# GFX11: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] -# GFX11: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] 0x69,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] 0x7b,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] 0x7d,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] 0xfd,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00 +# GFX11: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] +0x01,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x42] -0x01,0x05,0x0a,0x42 +0xff,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x42] -0xff,0x05,0x0a,0x42 +0x01,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x42] -0x01,0x04,0x0a,0x42 +0x69,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x42] -0x69,0x04,0x0a,0x42 +0x6a,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x42] -0x6a,0x04,0x0a,0x42 +0x6b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x42] -0x6b,0x04,0x0a,0x42 +0x7b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x42] -0x7b,0x04,0x0a,0x42 +0x7d,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x42] -0x7d,0x04,0x0a,0x42 +0x7e,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x42] -0x7e,0x04,0x0a,0x42 +0x7f,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x42] -0x7f,0x04,0x0a,0x42 +0x7c,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x42] -0x7c,0x04,0x0a,0x42 +0xc1,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x42] -0xc1,0x04,0x0a,0x42 +0xf0,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x42] -0xf0,0x04,0x0a,0x42 +0xfd,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x42] -0xfd,0x04,0x0a,0x42 +0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf # W32: v_sub_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] # W64: v_sub_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] -0x01,0x05,0x0a,0x66 +0x81,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x66] -0x81,0x05,0x0a,0x66 +0x7f,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] -0x7f,0x05,0x0a,0x66 +0xff,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x66] -0xff,0x05,0x0a,0x66 +0x01,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] -0x01,0x04,0x0a,0x66 +0x69,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] -0x69,0x04,0x0a,0x66 +0x6a,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] -0x6a,0x04,0x0a,0x66 +0x6b,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] -0x6b,0x04,0x0a,0x66 +0x7b,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] -0x7b,0x04,0x0a,0x66 +0x7d,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] -0x7d,0x04,0x0a,0x66 +0x7e,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] -0x7e,0x04,0x0a,0x66 +0x7f,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] -0x7f,0x04,0x0a,0x66 +0x7c,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] -0x7c,0x04,0x0a,0x66 +0xc1,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] -0xc1,0x04,0x0a,0x66 +0xf0,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] -0xf0,0x04,0x0a,0x66 +0xfd,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] -0xfd,0x04,0x0a,0x66 -# GFX11-REAL16: v_sub_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x67] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x67 0xfd,0x04,0x0b,0x67 +# GFX11-REAL16: v_sub_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x67] +0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_sub_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_sub_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_sub_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00] -# GFX11: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] 0x01,0x05,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] 0xff,0x05,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] 0x01,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] 0x69,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] 0x6a,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] 0x6b,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] 0x7b,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] 0x7d,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] 0x7e,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] 0x7f,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] 0x7c,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] 0xc1,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] 0xf0,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] 0xfd,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf +# GFX11: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] -# GFX11: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] 0x01,0x05,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] 0xff,0x05,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] 0x01,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] 0x69,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] 0x6a,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] 0x6b,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] 0x7b,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] 0x7d,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] 0x7e,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] 0x7f,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] 0x7c,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] 0xc1,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] 0xf0,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] 0xfd,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf +# GFX11: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x44] -0x01,0x05,0x0a,0x44 +0xff,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x44] -0xff,0x05,0x0a,0x44 +0x01,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x44] -0x01,0x04,0x0a,0x44 +0x69,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x44] -0x69,0x04,0x0a,0x44 +0x6a,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x44] -0x6a,0x04,0x0a,0x44 +0x6b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x44] -0x6b,0x04,0x0a,0x44 +0x7b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x44] -0x7b,0x04,0x0a,0x44 +0x7d,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x44] -0x7d,0x04,0x0a,0x44 +0x7e,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x44] -0x7e,0x04,0x0a,0x44 +0x7f,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x44] -0x7f,0x04,0x0a,0x44 +0x7c,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x44] -0x7c,0x04,0x0a,0x44 +0xc1,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x44] -0xc1,0x04,0x0a,0x44 +0xf0,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x44] -0xf0,0x04,0x0a,0x44 +0xfd,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x44] -0xfd,0x04,0x0a,0x44 +0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf # W32: v_subrev_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] # W64: v_subrev_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] -0x01,0x05,0x0a,0x68 +0x81,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x68] -0x81,0x05,0x0a,0x68 +0x7f,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] -0x7f,0x05,0x0a,0x68 +0xff,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x68] -0xff,0x05,0x0a,0x68 +0x01,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] -0x01,0x04,0x0a,0x68 +0x69,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] -0x69,0x04,0x0a,0x68 +0x6a,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] -0x6a,0x04,0x0a,0x68 +0x6b,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] -0x6b,0x04,0x0a,0x68 +0x7b,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] -0x7b,0x04,0x0a,0x68 +0x7d,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] -0x7d,0x04,0x0a,0x68 +0x7e,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] -0x7e,0x04,0x0a,0x68 +0x7f,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] -0x7f,0x04,0x0a,0x68 +0x7c,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] -0x7c,0x04,0x0a,0x68 +0xc1,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] -0xc1,0x04,0x0a,0x68 +0xf0,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] -0xf0,0x04,0x0a,0x68 +0xfd,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] -0xfd,0x04,0x0a,0x68 -# GFX11-REAL16: v_subrev_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x69] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x69 0xfd,0x04,0x0b,0x69 +# GFX11-REAL16: v_subrev_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x69] +0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_subrev_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_subrev_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_subrev_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00] -# GFX11: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] 0x01,0x05,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] 0xff,0x05,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] 0x01,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] 0x69,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] 0x6a,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] 0x6b,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] 0x7b,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] 0x7d,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] 0x7e,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] 0x7f,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] 0x7c,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] 0xc1,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] 0xf0,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] 0xfd,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf +# GFX11: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] -# GFX11: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] 0x01,0x05,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] 0xff,0x05,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] 0x01,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] 0x69,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] 0x6a,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] 0x6b,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] 0x7b,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] 0x7d,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] 0x7e,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] 0x7f,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] 0x7c,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] 0xc1,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] 0xf0,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] 0xfd,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf +# GFX11: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] -# GFX11: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] 0x01,0x05,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] 0xff,0x05,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] 0x01,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] 0x69,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] 0x6a,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] 0x6b,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] 0x7b,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] 0x7d,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] 0x7e,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] 0x7f,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] 0x7c,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] 0xc1,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] 0xf0,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] 0xfd,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf +# GFX11: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] -# GFX11: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] 0x01,0x05,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] 0xff,0x05,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] 0x01,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] 0x69,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] 0x6a,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] 0x6b,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] 0x7b,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] 0x7d,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] 0x7e,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] 0x7f,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] 0x7c,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] 0xc1,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] 0xf0,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] 0xfd,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf +# GFX11: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt index eebf0cc13cee6..a8a40f883cc48 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt @@ -1,1750 +1,1851 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] -# GFX11: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] -# GFX11: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_add_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] -# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01 +# GFX11: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] -# GFX11: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13 +# GFX11: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] -# GFX11: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30 +# GFX11: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] -# GFX11: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30 +# GFX11: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] -# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01 +# GFX11: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] -# GFX11: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13 +# GFX11: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] -# GFX11: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30 +# GFX11: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] -# GFX11: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30 +# GFX11: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff # W32: v_cndmask_b32_dpp v5, -v1, |v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff] # W64: v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff # W32: v_cndmask_b32_dpp v5, |v1|, -v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff] # W64: v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff # W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] # W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13] -# GFX11: v_dot2acc_f32_f16_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30 +# GFX11: v_dot2acc_f32_f16_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30] -# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01 +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13 +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] -# GFX11: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30 +# GFX11: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] -# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01 +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13 +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] -# GFX11: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30 +# GFX11: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] -# GFX11: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] 0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30 +# GFX11-REAL16: v_ldexp_f16_dpp v127.l, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] +# GFX11-FAKE16: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] -# GFX11: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] -# GFX11: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] -# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] -# GFX11: v_max_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] -# GFX11: v_max_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_max_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_max_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] -# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01 +# GFX11: v_max_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] -# GFX11: v_max_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13 +# GFX11: v_max_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13] -# GFX11: v_max_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30 +# GFX11: v_max_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30] -# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01 +# GFX11: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] -# GFX11: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13 +# GFX11: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] -# GFX11: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] -# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01 +# GFX11: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] -# GFX11: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13 +# GFX11: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] -# GFX11: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] -# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] -# GFX11: v_min_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] -# GFX11: v_min_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_min_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_min_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] -# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01 +# GFX11: v_min_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] -# GFX11: v_min_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13 +# GFX11: v_min_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13] -# GFX11: v_min_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30 +# GFX11: v_min_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30] -# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01 +# GFX11: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] -# GFX11: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13 +# GFX11: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] -# GFX11: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] -# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01 +# GFX11: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] -# GFX11: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13 +# GFX11: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] -# GFX11: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] -# GFX11: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30 +# GFX11: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] -# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] -# GFX11: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_mul_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] -# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] -# GFX11: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30 +# GFX11: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] -# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] -# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] -# GFX11: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] -# GFX11: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] -# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01 +# GFX11: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] -# GFX11: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13 +# GFX11: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] -# GFX11: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30 +# GFX11: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] -# GFX11: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_sub_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] -# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13 +# GFX11: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] -# GFX11: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30 +# GFX11: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] -# GFX11: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30 +# GFX11: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 -# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_subrev_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] -# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01 +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13 +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30 +# GFX11: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30 +# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] -# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01 +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13 +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] -# GFX11: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30 +# GFX11: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] -# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01 +# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13 +# GFX11: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] -# GFX11: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30 +# GFX11: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt index 5f1d4d4b33cbd..a1d2c34f09f2b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt @@ -1,250 +1,267 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 -# GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] -# GFX11: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_add_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] -# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05 +# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] -# GFX11: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00 +# GFX11: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] -# GFX11: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00 +# GFX11: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] -# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05 +# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] -# GFX11: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00 +# GFX11: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] -# GFX11: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00 +# GFX11: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] -# GFX11: v_dot2acc_f32_f16_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00 +# GFX11: v_dot2acc_f32_f16_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] -# GFX11: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05 +# GFX11: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] -# GFX11: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00 +# GFX11: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] -# GFX11: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05 +# GFX11: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] -# GFX11: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00 +# GFX11: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] -# GFX11: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_ldexp_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] -# GFX11: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00 +# GFX11: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] -# GFX11: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00 +# GFX11: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -# GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] -# GFX11: v_max_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_max_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_max_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] -# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05 +# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] -# GFX11: v_max_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00 +# GFX11: v_max_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] -# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05 +# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] -# GFX11: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00 +# GFX11: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] -# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05 +# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] -# GFX11: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00 +# GFX11: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -# GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] -# GFX11: v_min_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_min_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_min_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] -# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05 +# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] -# GFX11: v_min_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00 +# GFX11: v_min_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] -# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05 +# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] -# GFX11: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00 +# GFX11: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] -# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05 +# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] -# GFX11: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00 +# GFX11: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] -# GFX11: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00 +# GFX11: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] -# GFX11: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] -# GFX11: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_mul_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] -# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05 +# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] -# GFX11: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00 +# GFX11: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] -# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00 +# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] -# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00 +# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] -# GFX11: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00 +# GFX11: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] -# GFX11: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00 +# GFX11: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] -# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05 +# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] -# GFX11: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00 +# GFX11: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 -# GFX11: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] -# GFX11: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_sub_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] -# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05 +# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] -# GFX11: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00 +# GFX11: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] -# GFX11: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00 +# GFX11: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 -# GFX11: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_subrev_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] -# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05 +# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00 +# GFX11: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00 +# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] -# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05 +# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] -# GFX11: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00 +# GFX11: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] -# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05 +# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] -# GFX11: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00 +# GFX11: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt index 673db0664fc6a..1276d898160b3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt @@ -1,2228 +1,2336 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0x01,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x40] -0x01,0x05,0x0a,0x40 +0xff,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x40] -0xff,0x05,0x0a,0x40 +0x01,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x40] -0x01,0x04,0x0a,0x40 +0x69,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x40] -0x69,0x04,0x0a,0x40 +0x6a,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x40] -0x6a,0x04,0x0a,0x40 +0x6b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x40] -0x6b,0x04,0x0a,0x40 +0x7b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x40] -0x7b,0x04,0x0a,0x40 +0x7d,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x40] -0x7d,0x04,0x0a,0x40 +0x7e,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x40] -0x7e,0x04,0x0a,0x40 +0x7f,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x40] -0x7f,0x04,0x0a,0x40 +0x7c,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x40] -0x7c,0x04,0x0a,0x40 +0xc1,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x40] -0xc1,0x04,0x0a,0x40 +0xf0,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x40] -0xf0,0x04,0x0a,0x40 +0xfd,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x40] -0xfd,0x04,0x0a,0x40 +0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf # W32: v_add_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] # W64: v_add_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf -# GFX12: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] 0x01,0x05,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] 0x7f,0x05,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] 0x01,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] 0x69,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] 0x6a,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] 0x6b,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] 0x7b,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] 0x7d,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] 0x7e,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] 0x7f,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] 0x7c,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] 0xc1,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] 0xf0,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] 0xfd,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_add_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] -# GFX12: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] 0x01,0x05,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] 0xff,0x05,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] 0x01,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] 0x69,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] 0x6a,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] 0x6b,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] 0x7b,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] 0x7d,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] 0x7e,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] 0x7f,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] 0x7c,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] 0xc1,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] 0xf0,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] 0xfd,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf +# GFX12: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] -# GFX12: v_add_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x04] 0x01,0x07,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x04] 0xfe,0x05,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x04] 0x00,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x04] 0x68,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x04] 0x6a,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x04] 0x7a,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x04] 0x7e,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x04] 0x7c,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x04] 0xc1,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x04] 0xf0,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x04] 0xfd,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf +# GFX12: v_add_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] -# GFX12: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] 0x01,0x05,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] 0xff,0x05,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] 0x01,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] 0x69,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] 0x6a,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] 0x6b,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] 0x7b,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] 0x7d,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] 0x7e,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] 0x7f,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] 0x7c,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] 0xc1,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] 0xf0,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] 0xfd,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf +# GFX12: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] -# GFX12: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] 0x01,0x05,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] 0xff,0x05,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] 0x01,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] 0x69,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] 0x6a,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] 0x6b,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] 0x7b,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] 0x7d,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] 0x7e,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] 0x7f,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] 0x7c,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] 0xc1,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] 0xf0,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] 0xfd,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf +# GFX12: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] -# GFX12: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] 0x01,0x05,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] 0xff,0x05,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] 0x01,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] 0x69,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] 0x6a,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] 0x6b,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] 0x7b,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] 0x7d,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] 0x7e,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] 0x7f,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] 0x7c,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] 0xc1,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] 0xf0,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] 0xfd,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf +# GFX12: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] -# W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] 0x01,0x05,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] 0xff,0x05,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] 0x01,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] 0x69,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] 0x6a,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] 0x6b,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] 0x7b,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] 0x7d,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] 0x7e,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] 0x7f,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] 0x7c,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] 0xc1,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] 0xf0,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] 0xfd,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] +0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf # W32: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] # W64: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] 0x01,0x05,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] 0xff,0x05,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] 0x01,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] 0x69,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] 0x6a,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] 0x6b,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] 0x7b,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] 0x7d,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] 0x7e,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] 0x7f,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] 0x7c,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] 0xc1,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] 0xf0,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] 0xfd,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] -# GFX12: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] 0x01,0x05,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] 0x7f,0x05,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] 0x01,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] 0x69,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] 0x6a,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] 0x6b,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] 0x7b,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] 0x7d,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] 0x7e,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] 0x7f,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] 0x7c,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] 0xc1,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] 0xf0,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] 0xfd,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] 0x01,0x05,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] 0xff,0x05,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] 0x01,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] 0x69,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] 0x6a,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] 0x6b,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] 0x7b,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] 0x7d,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] 0x7e,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] 0x7f,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] 0x7c,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] 0xc1,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] 0xf0,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] 0xfd,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf +# GFX12: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] -# GFX12: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] 0x01,0x05,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] 0x7f,0x05,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] 0x01,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] 0x69,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] 0x6a,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] 0x6b,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] 0x7b,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] 0x7d,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] 0x7e,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] 0x7f,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] 0x7c,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] 0xc1,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] 0xf0,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] 0xfd,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX12: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] 0x01,0x05,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] 0xff,0x05,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] 0x01,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] 0x69,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] 0x6a,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] 0x6b,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] 0x7b,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] 0x7d,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] 0x7e,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] 0x7f,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] 0x7c,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] 0xc1,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] 0xf0,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] 0xfd,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf +# GFX12: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] -# GFX12: v_lshlrev_b64_e32 v[5:6], v1, v[3:4] ; encoding: [0x01,0x07,0x0a,0x3e] 0x01,0x07,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], v1, v[3:4] ; encoding: [0x01,0x07,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], v255, v[2:3] ; encoding: [0xff,0x05,0x0a,0x3e] 0xff,0x05,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], v255, v[2:3] ; encoding: [0xff,0x05,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], s1, v[2:3] ; encoding: [0x01,0x04,0x0a,0x3e] 0x01,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], s1, v[2:3] ; encoding: [0x01,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], s105, v[2:3] ; encoding: [0x69,0x04,0x0a,0x3e] 0x69,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], s105, v[2:3] ; encoding: [0x69,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_lo, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x3e] 0x6a,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_lo, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_hi, v[2:3] ; encoding: [0x6b,0x04,0x0a,0x3e] 0x6b,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_hi, v[2:3] ; encoding: [0x6b,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], ttmp15, v[2:3] ; encoding: [0x7b,0x04,0x0a,0x3e] 0x7b,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], ttmp15, v[2:3] ; encoding: [0x7b,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], exec_lo, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x3e] 0x7e,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], exec_lo, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], exec_hi, v[2:3] ; encoding: [0x7f,0x04,0x0a,0x3e] 0x7f,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], exec_hi, v[2:3] ; encoding: [0x7f,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x3e] 0x7c,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x3e] 0xc1,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x3e] 0xf0,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x3e] 0xfd,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf +# GFX12: v_lshlrev_b64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] -# GFX12: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] 0x01,0x05,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] 0xff,0x05,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] 0x01,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] 0x69,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] 0x6a,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] 0x6b,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] 0x7b,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] 0x7d,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] 0x7e,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] 0x7f,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] 0x7c,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] 0xc1,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] 0xf0,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] 0xfd,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf +# GFX12: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] -# GFX12: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] 0x01,0x05,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x62] 0x7f,0x05,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x62] 0x01,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x62] 0x69,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x62] 0x6a,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x62] 0x6b,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x62] 0x7b,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x62] 0x7d,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x62] 0x7e,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x62] 0x7f,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x62] 0x7c,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x62] 0xc1,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x62] 0xf0,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x62] 0xfd,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_max_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_max_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] -# GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] 0x01,0x05,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2c] 0xff,0x05,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2c] 0x01,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2c] 0x69,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2c] 0x6a,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2c] 0x6b,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2c] 0x7b,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2c] 0x7d,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2c] 0x7e,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2c] 0x7f,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2c] 0x7c,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2c] 0xc1,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2c] 0xf0,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2c] 0xfd,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf +# GFX12: v_max_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] -# GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1c] 0x01,0x07,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1c] 0xfe,0x05,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1c] 0x00,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1c] 0x68,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1c] 0x6a,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1c] 0x7a,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1c] 0x7e,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1c] 0x7c,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1c] 0xc1,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1c] 0xf0,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1c] 0xfd,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf +# GFX12: v_max_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] -# GFX12: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] 0x01,0x05,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] 0xff,0x05,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] 0x01,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] 0x69,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] 0x6a,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] 0x6b,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] 0x7b,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] 0x7d,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] 0x7e,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] 0x7f,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] 0x7c,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] 0xc1,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] 0xf0,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] 0xfd,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf +# GFX12: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] -# GFX12: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] 0x01,0x05,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] 0xff,0x05,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] 0x01,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] 0x69,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] 0x6a,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] 0x6b,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] 0x7b,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] 0x7d,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] 0x7e,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] 0x7f,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] 0x7c,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] 0xc1,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] 0xf0,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] 0xfd,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf +# GFX12: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] -# GFX12: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] 0x01,0x05,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x60] 0x7f,0x05,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x60] 0x01,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x60] 0x69,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x60] 0x6a,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x60] 0x6b,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x60] 0x7b,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x60] 0x7d,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x60] 0x7e,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x60] 0x7f,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x60] 0x7c,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x60] 0xc1,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x60] 0xf0,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x60] 0xfd,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_min_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_min_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] -# GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] 0x01,0x05,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2a] 0xff,0x05,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2a] 0x01,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2a] 0x69,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2a] 0x6a,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2a] 0x6b,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2a] 0x7b,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2a] 0x7d,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2a] 0x7e,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2a] 0x7f,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2a] 0x7c,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2a] 0xc1,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2a] 0xf0,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2a] 0xfd,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf +# GFX12: v_min_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] -# GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1a] 0x01,0x07,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1a] 0xfe,0x05,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1a] 0x00,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1a] 0x68,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1a] 0x6a,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1a] 0x7a,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1a] 0x7e,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1a] 0x7c,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1a] 0xc1,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1a] 0xf0,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1a] 0xfd,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf +# GFX12: v_min_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] -# GFX12: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] 0x01,0x05,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] 0xff,0x05,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] 0x01,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] 0x69,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] 0x6a,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] 0x6b,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] 0x7b,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] 0x7d,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] 0x7e,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] 0x7f,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] 0x7c,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] 0xc1,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] 0xf0,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] 0xfd,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf +# GFX12: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] -# GFX12: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] 0x01,0x05,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] 0xff,0x05,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] 0x01,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] 0x69,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] 0x6a,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] 0x6b,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] 0x7b,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] 0x7d,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] 0x7e,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] 0x7f,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] 0x7c,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] 0xc1,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] 0xf0,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] 0xfd,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf +# GFX12: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] 0x01,0x05,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] 0xff,0x05,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] 0x01,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] 0x69,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] 0x6a,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] 0x6b,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] 0x7b,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] 0x7d,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] 0x7e,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] 0x7f,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] 0x7c,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] 0xc1,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] 0xf0,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] 0xfd,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf +# GFX12: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] 0x01,0x05,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] 0x7f,0x05,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] 0x01,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] 0x69,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] 0x6a,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] 0x6b,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] 0x7b,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] 0x7d,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] 0x7e,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] 0x7f,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] 0x7c,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] 0xc1,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] 0xf0,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] 0xfd,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_mul_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] -# GFX12: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] 0x01,0x05,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] 0xff,0x05,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] 0x01,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] 0x69,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] 0x6a,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] 0x6b,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] 0x7b,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] 0x7d,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] 0x7e,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] 0x7f,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] 0x7c,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] 0xc1,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] 0xf0,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] 0xfd,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf +# GFX12: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x0c] 0x01,0x07,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x0c] 0xfe,0x05,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x0c] 0x00,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x0c] 0x68,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x0c] 0x6a,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x0c] 0x7a,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x0c] 0x7e,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x0c] 0x7c,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x0c] 0xc1,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x0c] 0xf0,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x0c] 0xfd,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf +# GFX12: v_mul_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] 0x01,0x05,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] 0xff,0x05,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] 0x01,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] 0x69,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] 0x6a,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] 0x6b,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] 0x7b,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] 0x7d,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] 0x7e,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] 0x7f,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] 0x7c,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] 0xc1,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] 0xf0,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] 0xfd,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf +# GFX12: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] 0x01,0x05,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] 0xff,0x05,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] 0x01,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] 0x69,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] 0x6a,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] 0x6b,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] 0x7b,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] 0x7d,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] 0x7e,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] 0x7f,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] 0x7c,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] 0xc1,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] 0xf0,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] 0xfd,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf +# GFX12: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] 0x01,0x05,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] 0xff,0x05,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] 0x01,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] 0x69,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] 0x6a,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] 0x6b,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] 0x7b,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] 0x7d,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] 0x7e,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] 0x7f,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] 0x7c,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] 0xc1,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] 0xf0,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] 0xfd,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf +# GFX12: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] 0x01,0x05,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] 0xff,0x05,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] 0x01,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] 0x69,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] 0x6a,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] 0x6b,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] 0x7b,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] 0x7d,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] 0x7e,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] 0x7f,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] 0x7c,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] 0xc1,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] 0xf0,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] 0xfd,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf +# GFX12: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] -# GFX12: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] 0x01,0x05,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] 0xff,0x05,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] 0x01,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] 0x69,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] 0x6a,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] 0x6b,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] 0x7b,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] 0x7d,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] 0x7e,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] 0x7f,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] 0x7c,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] 0xc1,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] 0xf0,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] 0xfd,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf +# GFX12: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] -# GFX12: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] 0x69,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] 0x7b,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] 0x7d,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] 0xfd,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00 +# GFX12: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] +0x01,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x42] -0x01,0x05,0x0a,0x42 +0xff,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x42] -0xff,0x05,0x0a,0x42 +0x01,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x42] -0x01,0x04,0x0a,0x42 +0x69,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x42] -0x69,0x04,0x0a,0x42 +0x6a,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x42] -0x6a,0x04,0x0a,0x42 +0x6b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x42] -0x6b,0x04,0x0a,0x42 +0x7b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x42] -0x7b,0x04,0x0a,0x42 +0x7d,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x42] -0x7d,0x04,0x0a,0x42 +0x7e,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x42] -0x7e,0x04,0x0a,0x42 +0x7f,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x42] -0x7f,0x04,0x0a,0x42 +0x7c,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x42] -0x7c,0x04,0x0a,0x42 +0xc1,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x42] -0xc1,0x04,0x0a,0x42 +0xf0,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x42] -0xf0,0x04,0x0a,0x42 +0xfd,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x42] -0xfd,0x04,0x0a,0x42 +0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf # W32: v_sub_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] # W64: v_sub_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf -# GFX12: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] 0x01,0x05,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] 0x7f,0x05,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] 0x01,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] 0x69,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] 0x6a,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] 0x6b,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] 0x7b,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] 0x7d,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] 0x7e,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] 0x7f,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] 0x7c,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] 0xc1,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] 0xf0,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] 0xfd,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_sub_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] -# GFX12: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] 0x01,0x05,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] 0xff,0x05,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] 0x01,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] 0x69,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] 0x6a,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] 0x6b,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] 0x7b,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] 0x7d,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] 0x7e,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] 0x7f,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] 0x7c,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] 0xc1,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] 0xf0,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] 0xfd,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf +# GFX12: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] -# GFX12: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] 0x01,0x05,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] 0xff,0x05,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] 0x01,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] 0x69,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] 0x6a,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] 0x6b,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] 0x7b,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] 0x7d,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] 0x7e,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] 0x7f,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] 0x7c,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] 0xc1,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] 0xf0,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] 0xfd,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf +# GFX12: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x44] -0x01,0x05,0x0a,0x44 +0xff,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x44] -0xff,0x05,0x0a,0x44 +0x01,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x44] -0x01,0x04,0x0a,0x44 +0x69,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x44] -0x69,0x04,0x0a,0x44 +0x6a,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x44] -0x6a,0x04,0x0a,0x44 +0x6b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x44] -0x6b,0x04,0x0a,0x44 +0x7b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x44] -0x7b,0x04,0x0a,0x44 +0x7d,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x44] -0x7d,0x04,0x0a,0x44 +0x7e,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x44] -0x7e,0x04,0x0a,0x44 +0x7f,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x44] -0x7f,0x04,0x0a,0x44 +0x7c,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x44] -0x7c,0x04,0x0a,0x44 +0xc1,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x44] -0xc1,0x04,0x0a,0x44 +0xf0,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x44] -0xf0,0x04,0x0a,0x44 +0xfd,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x44] -0xfd,0x04,0x0a,0x44 +0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf # W32: v_subrev_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] # W64: v_subrev_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf -# GFX12: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] 0x01,0x05,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] 0x7f,0x05,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] 0x01,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] 0x69,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] 0x6a,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] 0x6b,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] 0x7b,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] 0x7d,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] 0x7e,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] 0x7f,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] 0x7c,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] 0xc1,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] 0xf0,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] 0xfd,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_subrev_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] -# GFX12: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] 0x01,0x05,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] 0xff,0x05,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] 0x01,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] 0x69,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] 0x6a,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] 0x6b,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] 0x7b,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] 0x7d,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] 0x7e,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] 0x7f,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] 0x7c,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] 0xc1,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] 0xf0,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] 0xfd,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf +# GFX12: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] -# GFX12: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] 0x01,0x05,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] 0xff,0x05,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] 0x01,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] 0x69,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] 0x6a,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] 0x6b,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] 0x7b,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] 0x7d,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] 0x7e,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] 0x7f,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] 0x7c,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] 0xc1,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] 0xf0,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] 0xfd,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf +# GFX12: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] -# GFX12: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] 0x01,0x05,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] 0xff,0x05,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] 0x01,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] 0x69,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] 0x6a,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] 0x6b,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] 0x7b,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] 0x7d,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] 0x7e,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] 0x7f,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] 0x7c,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] 0xc1,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] 0xf0,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] 0xfd,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf +# GFX12: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] -# GFX12: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] 0x01,0x05,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] 0xff,0x05,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] 0x01,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] 0x69,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] 0x6a,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] 0x6b,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] 0x7b,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] 0x7d,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] 0x7e,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] 0x7f,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] 0x7c,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] 0xc1,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] 0xf0,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] 0xfd,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf +# GFX12: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt index 05c8dff02a40b..551fb0d311188 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt @@ -1,1696 +1,1797 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] -# GFX12: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] -# GFX12: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_add_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] -# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01 +# GFX12: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] -# GFX12: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13 +# GFX12: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] -# GFX12: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30 +# GFX12: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] -# GFX12: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] -# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01 +# GFX12: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] -# GFX12: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13 +# GFX12: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] -# GFX12: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30 +# GFX12: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] -# GFX12: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30 +# GFX12: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] -# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01 +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13 +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] -# GFX12: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30 +# GFX12: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] -# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01 +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13 +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] -# GFX12: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30 +# GFX12: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] -# GFX12: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] 0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30 +# GFX12-REAL16: v_ldexp_f16_dpp v127.l, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] +# GFX12-FAKE16: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] -# GFX12: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] -# GFX12: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] -# GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] -# GFX12: v_max_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_max_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_max_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] -# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01 +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13 +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13] -# GFX12: v_max_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30 +# GFX12: v_max_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30] -# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01 +# GFX12: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] -# GFX12: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13 +# GFX12: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] -# GFX12: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] -# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01 +# GFX12: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] -# GFX12: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13 +# GFX12: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] -# GFX12: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] -# GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] -# GFX12: v_min_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_min_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_min_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] -# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01 +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13 +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13] -# GFX12: v_min_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30 +# GFX12: v_min_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30] -# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01 +# GFX12: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] -# GFX12: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13 +# GFX12: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] -# GFX12: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] -# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01 +# GFX12: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] -# GFX12: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13 +# GFX12: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] -# GFX12: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] -# GFX12: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30 +# GFX12: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] -# GFX12: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] -# GFX12: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_mul_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] -# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13 +# GFX12: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] -# GFX12: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30 +# GFX12: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] -# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] -# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] -# GFX12: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] -# GFX12: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] -# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01 +# GFX12: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] -# GFX12: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13 +# GFX12: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] -# GFX12: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30 +# GFX12: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] -# GFX12: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_sub_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] -# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13 +# GFX12: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] -# GFX12: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30 +# GFX12: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] -# GFX12: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 -# GFX12: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_subrev_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] -# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01 +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13 +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30 +# GFX12: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30 +# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] -# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01 +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13 +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] -# GFX12: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30 +# GFX12: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] -# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01 +# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13 +# GFX12: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] -# GFX12: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30 +# GFX12: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt index 2e33df35af1f3..bbf494c153fd3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt @@ -1,244 +1,261 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 -# GFX12: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] -# GFX12: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_add_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] -# GFX12: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05 +# GFX12: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] -# GFX12: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00 +# GFX12: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] -# GFX12: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00 +# GFX12: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] -# GFX12: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05 +# GFX12: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] -# GFX12: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00 +# GFX12: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] -# GFX12: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00 +# GFX12: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] -# GFX12: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05 +# GFX12: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] -# GFX12: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00 +# GFX12: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] -# GFX12: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05 +# GFX12: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] -# GFX12: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00 +# GFX12: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] -# GFX12: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_ldexp_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] -# GFX12: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00 +# GFX12: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] -# GFX12: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00 +# GFX12: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -# GFX12: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] -# GFX12: v_max_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_max_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_max_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] -# GFX12: v_max_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] -# GFX12: v_max_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00 +# GFX12: v_max_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] -# GFX12: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05 +# GFX12: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] -# GFX12: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00 +# GFX12: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] -# GFX12: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05 +# GFX12: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] -# GFX12: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00 +# GFX12: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -# GFX12: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] -# GFX12: v_min_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_min_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_min_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] -# GFX12: v_min_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] -# GFX12: v_min_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00 +# GFX12: v_min_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] -# GFX12: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05 +# GFX12: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] -# GFX12: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00 +# GFX12: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] -# GFX12: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05 +# GFX12: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] -# GFX12: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00 +# GFX12: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] -# GFX12: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00 +# GFX12: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] -# GFX12: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] -# GFX12: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_mul_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] -# GFX12: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] -# GFX12: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00 +# GFX12: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] -# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00 +# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] -# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00 +# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] -# GFX12: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00 +# GFX12: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] -# GFX12: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00 +# GFX12: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] -# GFX12: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05 +# GFX12: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] -# GFX12: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00 +# GFX12: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 -# GFX12: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] -# GFX12: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_sub_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] -# GFX12: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] -# GFX12: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00 +# GFX12: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] -# GFX12: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00 +# GFX12: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 -# GFX12: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_subrev_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] -# GFX12: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00 +# GFX12: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00 +# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] -# GFX12: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05 +# GFX12: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] -# GFX12: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00 +# GFX12: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] -# GFX12: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05 +# GFX12: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] -# GFX12: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00 +# GFX12: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] From 311c0772f9e67a694f3038ab63ea4ec981ce6a9a Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Oct 2024 16:50:54 +0000 Subject: [PATCH 21/69] [AMDGPU] Fix test failures after #114232 and #114200 --- .../AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll | 12 ++++++++---- .../AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll index c202476d85baf..ae309f3a614d5 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -671,7 +671,9 @@ define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 ; @@ -685,10 +687,12 @@ define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: nonuniform_callee diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll index a456f549174c9..90707e823c147 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -671,7 +671,9 @@ define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 ; @@ -685,10 +687,12 @@ define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: nonuniform_callee From 463a4c16ea9c1a3c1210d0ac39e56a75b43b5a8d Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Oct 2024 16:53:08 +0000 Subject: [PATCH 22/69] [clang] Remove some uses of llvm::StructType::setBody. NFC. (#113691) It is simple to create the struct body up front, now that we have transitioned to opaque pointers. --- clang/lib/CodeGen/CGBlocks.cpp | 8 ++-- clang/lib/CodeGen/CGObjCGNU.cpp | 4 +- clang/lib/CodeGen/CGObjCMac.cpp | 57 +++++++++++++-------------- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 12 +++--- 4 files changed, 37 insertions(+), 44 deletions(-) diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 41bb8d19d161e..bfa9b0a2f836b 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -2590,10 +2590,6 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { if (it != BlockByrefInfos.end()) return it->second; - llvm::StructType *byrefType = - llvm::StructType::create(getLLVMContext(), - "struct.__block_byref_" + D->getNameAsString()); - QualType Ty = D->getType(); CharUnits size; @@ -2658,7 +2654,9 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { } types.push_back(varTy); - byrefType->setBody(types, packed); + llvm::StructType *byrefType = llvm::StructType::create( + getLLVMContext(), types, "struct.__block_byref_" + D->getNameAsString(), + packed); BlockByrefInfo info; info.Type = byrefType; diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 7a07284f8a8aa..d6f5f2a43cf51 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1509,8 +1509,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { GetSectionBounds(StringRef Section) { if (CGM.getTriple().isOSBinFormatCOFF()) { if (emptyStruct == nullptr) { - emptyStruct = llvm::StructType::create(VMContext, ".objc_section_sentinel"); - emptyStruct->setBody({}, /*isPacked*/true); + emptyStruct = llvm::StructType::create( + VMContext, {}, ".objc_section_sentinel", /*isPacked=*/true); } auto ZeroInit = llvm::Constant::getNullValue(emptyStruct); auto Sym = [&](StringRef Prefix, StringRef SecSuffix) { diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 1c16d273a5535..47ea636c75643 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -5835,15 +5835,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); - // Handle recursive construction of Protocol and ProtocolList types - - ProtocolTy = - llvm::StructType::create(VMContext, "struct._objc_protocol"); - - ProtocolListTy = - llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy, - llvm::ArrayType::get(ProtocolTy, 0)); + // Handle construction of Protocol and ProtocolList types // struct _objc_protocol { // struct _objc_protocol_extension *isa; @@ -5852,9 +5844,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_method_description_list *instance_methods; // struct _objc_method_description_list *class_methods; // } - ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListTy), - MethodDescriptionListPtrTy, MethodDescriptionListPtrTy); + ProtocolTy = llvm::StructType::create( + {ProtocolExtensionPtrTy, Int8PtrTy, + llvm::PointerType::getUnqual(VMContext), MethodDescriptionListPtrTy, + MethodDescriptionListPtrTy}, + "struct._objc_protocol"); + + ProtocolListTy = + llvm::StructType::create({llvm::PointerType::getUnqual(VMContext), LongTy, + llvm::ArrayType::get(ProtocolTy, 0)}, + "struct._objc_protocol_list"); // struct _objc_protocol_list * ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy); @@ -5886,8 +5885,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy); ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy); - ClassTy = llvm::StructType::create(VMContext, "struct._objc_class"); - // struct _objc_class { // Class isa; // Class super_class; @@ -5902,10 +5899,12 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *ivar_layout; // struct _objc_class_ext *ext; // }; - ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy), - llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy, - LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, - ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy); + ClassTy = llvm::StructType::create( + {llvm::PointerType::getUnqual(VMContext), + llvm::PointerType::getUnqual(VMContext), Int8PtrTy, LongTy, LongTy, + LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, ProtocolListPtrTy, + Int8PtrTy, ClassExtensionPtrTy}, + "struct._objc_class"); ClassPtrTy = llvm::PointerType::getUnqual(ClassTy); @@ -5988,13 +5987,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _prop_list_t * class_properties; // } - // Holder for struct _protocol_list_t * - ProtocolListnfABITy = - llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolnfABITy = llvm::StructType::create( "struct._protocol_t", ObjectPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy, + llvm::PointerType::getUnqual(VMContext), MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy, PropertyListPtrTy); @@ -6006,8 +6001,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // long protocol_count; // Note, this is 32/64 bit // struct _protocol_t *[protocol_count]; // } - ProtocolListnfABITy->setBody(LongTy, - llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)); + ProtocolListnfABITy = llvm::StructType::create( + {LongTy, llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)}, + "struct._objc_protocol_list"); // struct _objc_protocol_list* ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy); @@ -6067,11 +6063,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct class_ro_t *ro; // } - ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t"); - ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy), - llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy, - llvm::PointerType::getUnqual(ImpnfABITy), - llvm::PointerType::getUnqual(ClassRonfABITy)); + ClassnfABITy = llvm::StructType::create( + {llvm::PointerType::getUnqual(VMContext), + llvm::PointerType::getUnqual(VMContext), CachePtrTy, + llvm::PointerType::getUnqual(ImpnfABITy), + llvm::PointerType::getUnqual(ClassRonfABITy)}, + "struct._class_t"); // LLVM for struct _class_t * ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy); diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 0b0b45ffead92..3802dc8bcafc4 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -529,31 +529,29 @@ class MicrosoftCXXABI : public CGCXXABI { if (ClassHierarchyDescriptorType) return ClassHierarchyDescriptorType; // Forward-declare RTTIClassHierarchyDescriptor to break a cycle. - ClassHierarchyDescriptorType = llvm::StructType::create( - CGM.getLLVMContext(), "rtti.ClassHierarchyDescriptor"); llvm::Type *FieldTypes[] = {CGM.IntTy, CGM.IntTy, CGM.IntTy, getImageRelativeType(CGM.UnqualPtrTy)}; - ClassHierarchyDescriptorType->setBody(FieldTypes); + ClassHierarchyDescriptorType = + llvm::StructType::create(FieldTypes, "rtti.ClassHierarchyDescriptor"); return ClassHierarchyDescriptorType; } llvm::StructType *getCompleteObjectLocatorType() { if (CompleteObjectLocatorType) return CompleteObjectLocatorType; - CompleteObjectLocatorType = llvm::StructType::create( - CGM.getLLVMContext(), "rtti.CompleteObjectLocator"); llvm::Type *FieldTypes[] = { CGM.IntTy, CGM.IntTy, CGM.IntTy, getImageRelativeType(CGM.Int8PtrTy), getImageRelativeType(CGM.UnqualPtrTy), - getImageRelativeType(CompleteObjectLocatorType), + getImageRelativeType(CGM.VoidTy), }; llvm::ArrayRef FieldTypesRef(FieldTypes); if (!isImageRelative()) FieldTypesRef = FieldTypesRef.drop_back(); - CompleteObjectLocatorType->setBody(FieldTypesRef); + CompleteObjectLocatorType = + llvm::StructType::create(FieldTypesRef, "rtti.CompleteObjectLocator"); return CompleteObjectLocatorType; } From cc2d8e7616762710b284aa9af44a297b633b270a Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 30 Oct 2024 16:54:09 +0000 Subject: [PATCH 23/69] [AArch64] Add assembly/disassembly of FMOP4{A,S} (non-widening) single-precision instructions (#113344) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 3 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../fmop4as-fp32-non-widening-diagnostics.s | 245 ++++++++++++++++++ .../SME2p2/fmop4as-fp32-non-widening.s | 179 +++++++++++++ 4 files changed, 464 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e78cd7146df2a..e7389b533354d 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1017,6 +1017,9 @@ let Predicates = [HasSME2p2] in { defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">; defm FMUL_4ZZ : sme2_multi4_fmul_sm<"fmul">; defm FMUL_4Z4Z : sme2_multi4_fmul_mm< "fmul">; + + defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index b31bea712a76d..2740ac814f9ca 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5454,3 +5454,40 @@ multiclass sme2_bfmop4as_non_widening { // Multiple vectors def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; } + +class sme2_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000000000; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp32_non_widening { + // Single vectors + def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s new file mode 100644 index 0000000000000..c9c59128f4206 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s @@ -0,0 +1,245 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z15.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z16.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z14.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Single and multiple vectors + +fmop4a za0.d, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z1.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z16.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.s, {z17.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, z0.s, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.s, {z12.s-z13.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.s, {z1.s-z2.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z2.s-z4.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z16.s-z17.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, {z0.s-z1.s}, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, {z0.s-z1.s}, z12.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Multiple vectors + +fmop4a za0.d, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z1.s-z2.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z2.s-z4.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z18.s-z19.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.s}, {z19.s-z20.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.s}, {z10.s-z11.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// FMOP4S + +// Single vectors + +fmop4s za0.d, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z15.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z16.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z14.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Single and multiple vectors + +fmop4s za0.d, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z1.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z16.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.s, {z17.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, z0.s, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.s, {z12.s-z13.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.d, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4s za0.s, {z1.s-z2.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z2.s-z4.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z16.s-z17.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, {z0.s-z1.s}, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, {z0.s-z1.s}, z12.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Multiple vectors + +fmop4s za0.d, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z1.s-z2.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z2.s-z4.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z18.s-z19.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.s-z1.s}, {z19.s-z20.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.s-z1.s}, {z10.s-z11.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s new file mode 100644 index 0000000000000..e65def17cd1b3 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s @@ -0,0 +1,179 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors + +fmop4a za0.s, z0.s, z16.s // 10000000-00000000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.s, z16.s +// CHECK-ENCODING: [0x00,0x00,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000000 + +fmop4a za3.s, z12.s, z24.s // 10000000-00001000-00000001-10000011 +// CHECK-INST: fmop4a za3.s, z12.s, z24.s +// CHECK-ENCODING: [0x83,0x01,0x08,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80080183 + +fmop4a za3.s, z14.s, z30.s // 10000000-00001110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.s, z30.s +// CHECK-ENCODING: [0xc3,0x01,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e01c3 + +// Single and multiple vectors + +fmop4a za0.s, z0.s, {z16.s-z17.s} // 10000000-00010000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.s, { z16.s, z17.s } +// CHECK-ENCODING: [0x00,0x00,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100000 + +fmop4a za1.s, z10.s, {z20.s-z21.s} // 10000000-00010100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.s, { z20.s, z21.s } +// CHECK-ENCODING: [0x41,0x01,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140141 + +fmop4a za3.s, z14.s, {z30.s-z31.s} // 10000000-00011110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.s, { z30.s, z31.s } +// CHECK-ENCODING: [0xc3,0x01,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e01c3 + +// Multiple and single vectors + +fmop4a za0.s, {z0.s-z1.s}, z16.s // 10000000-00000000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.s, z1.s }, z16.s +// CHECK-ENCODING: [0x00,0x02,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000200 + +fmop4a za1.s, {z10.s-z11.s}, z20.s // 10000000-00000100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.s, z11.s }, z20.s +// CHECK-ENCODING: [0x41,0x03,0x04,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80040341 + +fmop4a za3.s, {z14.s-z15.s}, z30.s // 10000000-00001110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.s, z15.s }, z30.s +// CHECK-ENCODING: [0xc3,0x03,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e03c3 + +// Multiple vectors + +fmop4a za0.s, {z0.s-z1.s}, {z16.s-z17.s} // 10000000-00010000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.s, z1.s }, { z16.s, z17.s } +// CHECK-ENCODING: [0x00,0x02,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100200 + +fmop4a za1.s, {z10.s-z11.s}, {z20.s-z21.s} // 10000000-00010100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x41,0x03,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140341 + +fmop4a za3.s, {z14.s-z15.s}, {z30.s-z31.s} // 10000000-00011110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.s, z15.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xc3,0x03,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e03c3 + +// FMOP4S + +// Single vectors + +fmop4s za0.s, z0.s, z16.s // 10000000-00000000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.s, z16.s +// CHECK-ENCODING: [0x10,0x00,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000010 + +fmop4s za3.s, z12.s, z24.s // 10000000-00001000-00000001-10010011 +// CHECK-INST: fmop4s za3.s, z12.s, z24.s +// CHECK-ENCODING: [0x93,0x01,0x08,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80080193 + +fmop4s za3.s, z14.s, z30.s // 10000000-00001110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.s, z30.s +// CHECK-ENCODING: [0xd3,0x01,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e01d3 + +// Single and multiple vectors + +fmop4s za0.s, z0.s, {z16.s-z17.s} // 10000000-00010000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.s, { z16.s, z17.s } +// CHECK-ENCODING: [0x10,0x00,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100010 + +fmop4s za1.s, z10.s, {z20.s-z21.s} // 10000000-00010100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.s, { z20.s, z21.s } +// CHECK-ENCODING: [0x51,0x01,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140151 + +fmop4s za3.s, z14.s, {z30.s-z31.s} // 10000000-00011110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.s, { z30.s, z31.s } +// CHECK-ENCODING: [0xd3,0x01,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e01d3 + +// Multiple and single vectors + +fmop4s za0.s, {z0.s-z1.s}, z16.s // 10000000-00000000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.s, z1.s }, z16.s +// CHECK-ENCODING: [0x10,0x02,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000210 + +fmop4s za1.s, {z10.s-z11.s}, z20.s // 10000000-00000100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.s, z11.s }, z20.s +// CHECK-ENCODING: [0x51,0x03,0x04,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80040351 + +fmop4s za3.s, {z14.s-z15.s}, z30.s // 10000000-00001110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.s, z15.s }, z30.s +// CHECK-ENCODING: [0xd3,0x03,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e03d3 + +// Multiple vectors + +fmop4s za0.s, {z0.s-z1.s}, {z16.s-z17.s} // 10000000-00010000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.s, z1.s }, { z16.s, z17.s } +// CHECK-ENCODING: [0x10,0x02,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100210 + +fmop4s za1.s, {z10.s-z11.s}, {z20.s-z21.s} // 10000000-00010100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x51,0x03,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140351 + +fmop4s za3.s, {z14.s-z15.s}, {z30.s-z31.s} // 10000000-00011110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.s, z15.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xd3,0x03,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e03d3 From d7e6cba030f34162ea45aef1dc18f708b5d2ec70 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 09:55:14 -0700 Subject: [PATCH 24/69] [RISCV] Use bit or bits for some tablegen class arguments. NFC These eventually end up in TSFlags so we should use the same types. --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 6ffdae1d7df2a..5554fda760ebb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -877,7 +877,7 @@ class VPseudoILoadNoMask LMUL, bit Ordered, bit EarlyClobber, - int TargetConstraintType = 1> : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -899,7 +899,7 @@ class VPseudoILoadMask LMUL, bit Ordered, bit EarlyClobber, - int TargetConstraintType = 1> : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, IdxClass:$rs2, @@ -1021,7 +1021,7 @@ class VPseudoNullaryPseudoM : class VPseudoUnaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1039,7 +1039,7 @@ class VPseudoUnaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins OpClass:$rs2, AVL:$vl, sew:$sew), []>, RISCVVPseudo { @@ -1055,7 +1055,7 @@ class VPseudoUnaryNoMaskNoPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1075,7 +1075,7 @@ class VPseudoUnaryNoMaskRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1094,7 +1094,7 @@ class VPseudoUnaryMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$rm, @@ -1133,7 +1133,7 @@ class VPseudoUnaryMask_NoExcept : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$frm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1152,7 +1152,7 @@ class VPseudoUnaryNoMask_FRM : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$frm, @@ -1211,7 +1211,7 @@ class VPseudoBinaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo { @@ -1228,7 +1228,7 @@ class VPseudoBinaryNoMaskPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1247,8 +1247,8 @@ class VPseudoBinaryNoMaskRoundingMode : + bit UsesVXRM_ = 1, + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1268,8 +1268,8 @@ class VPseudoBinaryMaskPolicyRoundingMode : + bit UsesVXRM_, + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, @@ -1294,7 +1294,7 @@ class VPseudoBinaryMaskPolicyRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1314,7 +1314,7 @@ class VPseudoTiedBinaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs2, Op2Class:$rs1, ixlenimm:$rm, @@ -1367,7 +1367,7 @@ class VPseudoBinaryMaskPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, @@ -1427,7 +1427,7 @@ class VPseudoBinaryMOutMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, @@ -1449,7 +1449,7 @@ class VPseudoBinaryMOutMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op2Class:$rs1, @@ -1470,7 +1470,7 @@ class VPseudoTiedBinaryMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op2Class:$rs1, @@ -1498,7 +1498,7 @@ class VPseudoBinaryCarry : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), !if(CarryIn, (ins Op1Class:$rs2, Op2Class:$rs1, @@ -1520,7 +1520,7 @@ class VPseudoTiedBinaryCarryIn : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl, sew:$sew), []>, @@ -1556,7 +1556,7 @@ class VPseudoTernaryNoMaskWithPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -1575,7 +1575,7 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, @@ -2105,7 +2105,7 @@ multiclass VPseudoBinary TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); @@ -2123,8 +2123,8 @@ multiclass VPseudoBinaryRoundingMode TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); @@ -2147,7 +2147,7 @@ multiclass VPseudoBinaryM TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, isCommutable = Commutable in { def "_" # MInfo.MX : VPseudoBinaryNoMask { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask; @@ -2195,7 +2195,7 @@ multiclass VPseudoTiedBinaryRoundingMode { + bits<2> TargetConstraintType = 1> { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let VLMul = MInfo.value in { def suffix # "_TIED": @@ -2417,7 +2417,7 @@ multiclass VPseudoBinaryV_WI_RM { multiclass VPseudoBinaryV_VM { + bits<2> TargetConstraintType = 1> { let isCommutable = Commutable in def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry { } multiclass VPseudoBinaryV_XM { + string Constraint = "", bits<2> TargetConstraintType = 1> { def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry { + string Constraint = "", bits<2> TargetConstraintType = 1> { def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry TargetConstraintType = 1, bit Commutable = 0> { defm _VV : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VX { +multiclass VPseudoBinaryM_VX TargetConstraintType = 1> { defm "_VX" : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VF { +multiclass VPseudoBinaryM_VF TargetConstraintType = 1> { defm "_V" # f.FX : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VI { +multiclass VPseudoBinaryM_VI TargetConstraintType = 1> { defm _VI : VPseudoBinaryM; } @@ -3202,7 +3202,7 @@ multiclass VPseudoTernaryWithPolicy { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { let isCommutable = Commutable in def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy; @@ -3218,7 +3218,7 @@ multiclass VPseudoTernaryWithPolicyRoundingMode { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let isCommutable = Commutable in @@ -3548,7 +3548,7 @@ multiclass VPseudoConversion { + bits<2> TargetConstraintType = 1> { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let VLMul = MInfo.value, SEW=sew in { def suffix : VPseudoUnaryNoMask; @@ -3563,7 +3563,7 @@ multiclass VPseudoConversionRoundingMode { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoUnaryNoMaskRoundingMode; @@ -3580,7 +3580,7 @@ multiclass VPseudoConversionRM { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoUnaryNoMask_FRM Date: Wed, 30 Oct 2024 17:03:35 +0000 Subject: [PATCH 25/69] [Clang] Add and use mangleVendorType helper. NFC. (#108970) --- clang/lib/AST/ItaniumMangle.cpp | 52 +++++++++++++++------------------ 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index b3e46508cf596..14bc260d0245f 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -468,6 +468,7 @@ class CXXNameMangler { void mangleLambdaSig(const CXXRecordDecl *Lambda); void mangleModuleNamePrefix(StringRef Name, bool IsPartition = false); void mangleVendorQualifier(StringRef Name); + void mangleVendorType(StringRef Name); private: @@ -2891,6 +2892,10 @@ void CXXNameMangler::mangleVendorQualifier(StringRef name) { Out << 'U' << name.size() << name; } +void CXXNameMangler::mangleVendorType(StringRef name) { + Out << 'u' << name.size() << name; +} + void CXXNameMangler::mangleRefQualifier(RefQualifierKind RefQualifier) { // ::= R # lvalue reference // ::= O # rvalue-reference @@ -3413,8 +3418,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { if (T->getKind() == BuiltinType::SveBFloat16 && \ isCompatibleWith(LangOptions::ClangABI::Ver17)) { \ /* Prior to Clang 18.0 we used this incorrect mangled name */ \ - type_name = "__SVBFloat16_t"; \ - Out << "u" << type_name.size() << type_name; \ + mangleVendorType("__SVBFloat16_t"); \ } else { \ type_name = MangledName; \ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ @@ -3436,35 +3440,30 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ break; #include "clang/Basic/AArch64SVEACLETypes.def" -#define PPC_VECTOR_TYPE(Name, Id, Size) \ - case BuiltinType::Id: \ - type_name = #Name; \ - Out << 'u' << type_name.size() << type_name; \ +#define PPC_VECTOR_TYPE(Name, Id, Size) \ + case BuiltinType::Id: \ + mangleVendorType(#Name); \ break; #include "clang/Basic/PPCTypes.def" // TODO: Check the mangling scheme for RISC-V V. #define RVV_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: \ - type_name = Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(Name); \ break; #include "clang/Basic/RISCVVTypes.def" #define WASM_REF_TYPE(InternalName, MangledName, Id, SingletonId, AS) \ case BuiltinType::Id: \ - type_name = MangledName; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(MangledName); \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" #define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \ case BuiltinType::Id: \ - type_name = Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(Name); \ break; #include "clang/Basic/AMDGPUTypes.def" #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: \ - type_name = #Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(#Name); \ break; #include "clang/Basic/HLSLIntangibleTypes.def" } @@ -4035,8 +4034,9 @@ void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) { if (T->getVectorKind() == VectorKind::SveFixedLengthPredicate) VecSizeInBits *= 8; - Out << "9__SVE_VLSI" << 'u' << TypeName.size() << TypeName << "Lj" - << VecSizeInBits << "EE"; + Out << "9__SVE_VLSI"; + mangleVendorType(TypeName); + Out << "Lj" << VecSizeInBits << "EE"; } void CXXNameMangler::mangleAArch64FixedSveVectorType( @@ -4136,8 +4136,9 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { } TypeNameOS << "_t"; - Out << "9__RVV_VLSI" << 'u' << TypeNameStr.size() << TypeNameStr << "Lj" - << VecSizeInBits << "EE"; + Out << "9__RVV_VLSI"; + mangleVendorType(TypeNameStr); + Out << "Lj" << VecSizeInBits << "EE"; } void CXXNameMangler::mangleRISCVFixedRVVVectorType( @@ -4236,8 +4237,7 @@ void CXXNameMangler::mangleType(const ConstantMatrixType *T) { // Mangle matrix types as a vendor extended type: // umatrix_typeIE - StringRef VendorQualifier = "matrix_type"; - Out << "u" << VendorQualifier.size() << VendorQualifier; + mangleVendorType("matrix_type"); Out << "I"; auto &ASTCtx = getASTContext(); @@ -4255,8 +4255,7 @@ void CXXNameMangler::mangleType(const ConstantMatrixType *T) { void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) { // Mangle matrix types as a vendor extended type: // umatrix_typeIE - StringRef VendorQualifier = "matrix_type"; - Out << "u" << VendorQualifier.size() << VendorQualifier; + mangleVendorType("matrix_type"); Out << "I"; mangleTemplateArgExpr(T->getRowExpr()); @@ -4302,7 +4301,7 @@ void CXXNameMangler::mangleType(const ObjCObjectType *T) { StringRef name = I->getName(); QualOS << name.size() << name; } - Out << 'U' << QualStr.size() << QualStr; + mangleVendorQualifier(QualStr); } mangleType(T->getBaseType()); @@ -4436,8 +4435,6 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) { // If this is dependent, we need to record that. If not, we simply // mangle it as the underlying type since they are equivalent. if (T->isDependentType()) { - Out << "u"; - StringRef BuiltinName; switch (T->getUTTKind()) { #define TRANSFORM_TYPE_TRAIT_DEF(Enum, Trait) \ @@ -4446,7 +4443,7 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) { break; #include "clang/Basic/TransformTypeTraits.def" } - Out << BuiltinName.size() << BuiltinName; + mangleVendorType(BuiltinName); } Out << "I"; @@ -5311,9 +5308,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, // ::= u * E # vendor extension const TypeTraitExpr *TTE = cast(E); NotPrimaryExpr(); - Out << 'u'; llvm::StringRef Spelling = getTraitSpelling(TTE->getTrait()); - Out << Spelling.size() << Spelling; + mangleVendorType(Spelling); for (TypeSourceInfo *TSI : TTE->getArgs()) { mangleType(TSI->getType()); } From 0c8e12fc64073a889956e790881cdf0d58018372 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 30 Oct 2024 10:09:34 -0700 Subject: [PATCH 26/69] [libc] Fix fexcept_t type to match canonical ABI and API (#113666) In glibc and musl, fexcept_t is unsigned short int on x86 and unsigned int on other machines that llvm-libc supports. Match that ABI (only different from before on x86) and API (different everywhere as it was previously signed). --- libc/include/llvm-libc-types/fexcept_t.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libc/include/llvm-libc-types/fexcept_t.h b/libc/include/llvm-libc-types/fexcept_t.h index 60687bd1318aa..5aa09fbbaffc7 100644 --- a/libc/include/llvm-libc-types/fexcept_t.h +++ b/libc/include/llvm-libc-types/fexcept_t.h @@ -9,6 +9,10 @@ #ifndef LLVM_LIBC_TYPES_FEXCEPT_T_H #define LLVM_LIBC_TYPES_FEXCEPT_T_H -typedef int fexcept_t; +#if defined(__x86_64__) || defined(__i386__) +typedef unsigned short int fexcept_t; +#else +typedef unsigned int fexcept_t; +#endif #endif // LLVM_LIBC_TYPES_FEXCEPT_T_H From 5192cb772ad58af4b557539791ff8de60ab450a3 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Wed, 30 Oct 2024 16:55:40 +0000 Subject: [PATCH 27/69] [AArch64] Add hidden option to enable subreg liveness tracking. Subreg liveness tracking is disabled by default for now until all issues are ironed out. This option allows the feature to be used in tests. --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 +++++++++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 7fb2a961e0313..736d57e6ae2fd 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -86,6 +86,13 @@ static cl::alias AArch64StreamingStackHazardSize( cl::desc("alias for -aarch64-streaming-hazard-size"), cl::aliasopt(AArch64StreamingHazardSize)); +// Subreg liveness tracking is disabled by default for now until all issues +// are ironed out. This option allows the feature to be used in tests. +static cl::opt + EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking", + cl::init(false), cl::Hidden, + cl::desc("Enable subreg liveness tracking")); + unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) return OverrideVectorInsertExtractBaseCost; @@ -380,6 +387,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, ReserveXRegisterForRA.set(29); AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM)); + + EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } const CallLowering *AArch64Subtarget::getCallLowering() const { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 50adb7cbf69a8..f3dcce3f3994b 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -90,6 +90,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { unsigned VScaleForTuning = 2; TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; + bool EnableSubregLiveness; + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -153,6 +155,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } + bool enableSubRegLiveness() const override { return EnableSubregLiveness; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } From 70d35fbdb6c01e2ccd76ce5c5fe7610ab77d0ea1 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 30 Oct 2024 10:25:26 +1100 Subject: [PATCH 28/69] [ORC] Fix include guard names. NFC. --- .../ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h index ef42cc5f798fd..8a4740c1dd9cb 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H -#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H +#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/RedirectionManager.h" @@ -103,4 +103,4 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, } // namespace orc } // namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H From b94762d5a7fbf883707c4018dbf43d7525a06e12 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 30 Oct 2024 10:26:26 +1100 Subject: [PATCH 29/69] [ORC] Add comment on include guard #endif --- llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h index 4adc3efad5573..cd185d54b2e7c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h @@ -178,4 +178,4 @@ class ReOptimizeLayer : public IRLayer, public ResourceManager { } // namespace orc } // namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_REOPTIMIZELAYER_H From feb2d867fac3b6339c169fff97ddf0716fce6f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kenji=20Mouri=20/=20=E6=AF=9B=E5=88=A9=20=E7=A0=94?= =?UTF-8?q?=E4=BA=8C?= Date: Thu, 31 Oct 2024 01:34:32 +0800 Subject: [PATCH 30/69] [TLI] Add support for hypot libcall. (#113724) This patch adds basic support for `hypot`. Constant folding support will be submitted in a subsequent patch. Related issue: https://github.com/llvm/llvm-project/issues/113711 Note: It's my first time contributing to the LLVM with encouragement from one of my friends, @fawdlstty. I learned a lot from https://github.com/llvm/llvm-project/pull/99611, and thanks for that. Kenji Mouri --- llvm/include/llvm/Analysis/TargetLibraryInfo.def | 15 +++++++++++++++ llvm/lib/Analysis/TargetLibraryInfo.cpp | 2 ++ llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 3 +++ .../Transforms/InferFunctionAttrs/annotate.ll | 9 +++++++++ .../tools/llvm-tli-checker/ps4-tli-check.yaml | 12 ++++++++++++ llvm/unittests/Analysis/TargetLibraryInfoTest.cpp | 3 +++ 6 files changed, 44 insertions(+) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 3e23e398f6a79..fd53a26ef8fc1 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1671,6 +1671,21 @@ TLI_DEFINE_ENUM_INTERNAL(htons) TLI_DEFINE_STRING_INTERNAL("htons") TLI_DEFINE_SIG_INTERNAL(Int16, Int16) +/// double hypot(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(hypot) +TLI_DEFINE_STRING_INTERNAL("hypot") +TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, Dbl) + +/// float hypotf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(hypotf) +TLI_DEFINE_STRING_INTERNAL("hypotf") +TLI_DEFINE_SIG_INTERNAL(Flt, Flt, Flt) + +/// long double hypotl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(hypotl) +TLI_DEFINE_STRING_INTERNAL("hypotl") +TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl) + /// int iprintf(const char *format, ...); TLI_DEFINE_ENUM_INTERNAL(iprintf) TLI_DEFINE_STRING_INTERNAL("iprintf") diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 0ee83d217a500..7f0b98ab3c151 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -300,6 +300,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_expf); TLI.setUnavailable(LibFunc_floorf); TLI.setUnavailable(LibFunc_fmodf); + TLI.setUnavailable(LibFunc_hypotf); TLI.setUnavailable(LibFunc_log10f); TLI.setUnavailable(LibFunc_logf); TLI.setUnavailable(LibFunc_modff); @@ -331,6 +332,7 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_floorl); TLI.setUnavailable(LibFunc_fmodl); TLI.setUnavailable(LibFunc_frexpl); + TLI.setUnavailable(LibFunc_hypotl); TLI.setUnavailable(LibFunc_ldexpl); TLI.setUnavailable(LibFunc_log10l); TLI.setUnavailable(LibFunc_logl); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 5fd4fd78c28a9..e039457f313b2 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1215,6 +1215,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_fmod: case LibFunc_fmodf: case LibFunc_fmodl: + case LibFunc_hypot: + case LibFunc_hypotf: + case LibFunc_hypotl: case LibFunc_isascii: case LibFunc_isdigit: case LibFunc_labs: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index d8266f4c6703d..452d90aa98d88 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -589,6 +589,15 @@ declare ptr @gets(ptr) ; CHECK: declare noundef i32 @gettimeofday(ptr nocapture noundef, ptr nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @gettimeofday(ptr, ptr) +; CHECK: declare double @hypot(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare double @hypot(double, double) + +; CHECK: declare float @hypotf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare float @hypotf(float, float) + +; CHECK: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare x86_fp80 @hypotl(x86_fp80, x86_fp80) + ; CHECK: declare i32 @isascii(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @isascii(i32) diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 408b9c3993428..d52f3c751b066 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -602,6 +602,18 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: hypot + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: hypotf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: hypotl + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: isdigit Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 98f8989d4e6e9..982d00c5d3359 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -249,6 +249,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare %struct* @getpwnam(i8*)\n" "declare i8* @gets(i8*)\n" "declare i32 @gettimeofday(%struct*, i8*)\n" + "declare double @hypot(double, double)\n" + "declare float @hypotf(float, float)\n" + "declare x86_fp80 @hypotl(x86_fp80, x86_fp80)\n" "declare i32 @_Z7isasciii(i32)\n" "declare i32 @_Z7isdigiti(i32)\n" "declare i64 @labs(i64)\n" From 04549500562783b01db262de62fe324c7ee471c4 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 30 Oct 2024 17:42:13 +0000 Subject: [PATCH 31/69] [AArch64] Add assembly/disassembly for FMOP4{A,S} (non-widening) double-precision instructions (#113345) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 5 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../fmop4as-fp64-non-widening-diagnostics.s | 243 ++++++++++++++++++ .../SME2p2/fmop4as-fp64-non-widening.s | 180 +++++++++++++ 4 files changed, 465 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index e7389b533354d..d77219fa7a305 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1055,3 +1055,8 @@ let Predicates = [HasSME2p2, HasSMEB16B16] in { defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a">; defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s">; } + +let Predicates = [HasSME2p2, HasSMEF64F64] in { + defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s">; +} diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 2740ac814f9ca..1c5ec09692456 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5491,3 +5491,40 @@ multiclass sme2_fmop4as_fp32_non_widening { // Multiple vectors def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>; } + +class sme2_fp64_quarter_tile_outer_product + : I<(outs TileOp64:$ZAda), + (ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000000110; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3} = 0b1; + let Inst{2-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp64_non_widening { + // Single vectors + def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s new file mode 100644 index 0000000000000..ff9602bc12afc --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s @@ -0,0 +1,243 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f64f64 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.s, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za8.d, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z15.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z16.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Single and multiple vectors + +fmop4a za0.s, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za8.d, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z1.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z16.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.d, {z17.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, z0.d, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.d, {z12.d-z13.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.s, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za8.d, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z1.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z16.d-z17.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, {z0.d-z1.d}, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, {z0.d-z1.d}, z12.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Multiple vectors + +fmop4a za0.s, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za8.d, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z1.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z18.d-z19.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.d-z1.d}, {z19.d-z20.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.d-z1.d}, {z10.d-z11.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// FMOP4S + +// Single vectors + +fmop4s za0.s, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za8.d, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z15.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z16.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Single and multiple vectors + +fmop4s za0.s, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za8.d, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z1.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z16.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.d, {z17.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, z0.d, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.d, {z12.d-z13.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.s, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za8.d, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z1.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z16.d-z17.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, {z0.d-z1.d}, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, {z0.d-z1.d}, z12.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Multiple vectors + +fmop4s za0.s, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za8.d, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z1.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z18.d-z19.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.d-z1.d}, {z19.d-z20.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.d-z1.d}, {z10.d-z11.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s new file mode 100644 index 0000000000000..b0ad2984ad5ac --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s @@ -0,0 +1,180 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-f64f64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f64f64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.d, z16.d // 10000000-11000000-00000000-00001000 +// CHECK-INST: fmop4a za0.d, z0.d, z16.d +// CHECK-ENCODING: [0x08,0x00,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00008 + +fmop4a za5.d, z10.d, z20.d // 10000000-11000100-00000001-01001101 +// CHECK-INST: fmop4a za5.d, z10.d, z20.d +// CHECK-ENCODING: [0x4d,0x01,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4014d + +fmop4a za7.d, z14.d, z30.d // 10000000-11001110-00000001-11001111 +// CHECK-INST: fmop4a za7.d, z14.d, z30.d +// CHECK-ENCODING: [0xcf,0x01,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce01cf + +// Single and multiple vectors + +fmop4a za0.d, z0.d, {z16.d-z17.d} // 10000000-11010000-00000000-00001000 +// CHECK-INST: fmop4a za0.d, z0.d, { z16.d, z17.d } +// CHECK-ENCODING: [0x08,0x00,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00008 + +fmop4a za5.d, z10.d, {z20.d-z21.d} // 10000000-11010100-00000001-01001101 +// CHECK-INST: fmop4a za5.d, z10.d, { z20.d, z21.d } +// CHECK-ENCODING: [0x4d,0x01,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4014d + +fmop4a za7.d, z14.d, {z30.d-z31.d} // 10000000-11011110-00000001-11001111 +// CHECK-INST: fmop4a za7.d, z14.d, { z30.d, z31.d } +// CHECK-ENCODING: [0xcf,0x01,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de01cf + +// Multiple and single vectors + +fmop4a za0.d, {z0.d-z1.d}, z16.d // 10000000-11000000-00000010-00001000 +// CHECK-INST: fmop4a za0.d, { z0.d, z1.d }, z16.d +// CHECK-ENCODING: [0x08,0x02,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00208 + +fmop4a za5.d, {z10.d-z11.d}, z20.d // 10000000-11000100-00000011-01001101 +// CHECK-INST: fmop4a za5.d, { z10.d, z11.d }, z20.d +// CHECK-ENCODING: [0x4d,0x03,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4034d + +fmop4a za7.d, {z14.d-z15.d}, z30.d // 10000000-11001110-00000011-11001111 +// CHECK-INST: fmop4a za7.d, { z14.d, z15.d }, z30.d +// CHECK-ENCODING: [0xcf,0x03,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce03cf + +// Multiple vectors + +fmop4a za0.d, {z0.d-z1.d}, {z16.d-z17.d} // 10000000-11010000-00000010-00001000 +// CHECK-INST: fmop4a za0.d, { z0.d, z1.d }, { z16.d, z17.d } +// CHECK-ENCODING: [0x08,0x02,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00208 + +fmop4a za5.d, {z10.d-z11.d}, {z20.d-z21.d} // 10000000-11010100-00000011-01001101 +// CHECK-INST: fmop4a za5.d, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x4d,0x03,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4034d + +fmop4a za7.d, {z14.d-z15.d}, {z30.d-z31.d} // 10000000-11011110-00000011-11001111 +// CHECK-INST: fmop4a za7.d, { z14.d, z15.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xcf,0x03,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de03cf + + +// FMOP4S + +// Single vectors + +fmop4s za0.d, z0.d, z16.d // 10000000-11000000-00000000-00011000 +// CHECK-INST: fmop4s za0.d, z0.d, z16.d +// CHECK-ENCODING: [0x18,0x00,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00018 + +fmop4s za5.d, z10.d, z20.d // 10000000-11000100-00000001-01011101 +// CHECK-INST: fmop4s za5.d, z10.d, z20.d +// CHECK-ENCODING: [0x5d,0x01,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4015d + +fmop4s za7.d, z14.d, z30.d // 10000000-11001110-00000001-11011111 +// CHECK-INST: fmop4s za7.d, z14.d, z30.d +// CHECK-ENCODING: [0xdf,0x01,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce01df + +// Single and multiple vectors + +fmop4s za0.d, z0.d, {z16.d-z17.d} // 10000000-11010000-00000000-00011000 +// CHECK-INST: fmop4s za0.d, z0.d, { z16.d, z17.d } +// CHECK-ENCODING: [0x18,0x00,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00018 + +fmop4s za5.d, z10.d, {z20.d-z21.d} // 10000000-11010100-00000001-01011101 +// CHECK-INST: fmop4s za5.d, z10.d, { z20.d, z21.d } +// CHECK-ENCODING: [0x5d,0x01,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4015d + +fmop4s za7.d, z14.d, {z30.d-z31.d} // 10000000-11011110-00000001-11011111 +// CHECK-INST: fmop4s za7.d, z14.d, { z30.d, z31.d } +// CHECK-ENCODING: [0xdf,0x01,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de01df + +// Multiple and single vectors + +fmop4s za0.d, {z0.d-z1.d}, z16.d // 10000000-11000000-00000010-00011000 +// CHECK-INST: fmop4s za0.d, { z0.d, z1.d }, z16.d +// CHECK-ENCODING: [0x18,0x02,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00218 + +fmop4s za5.d, {z10.d-z11.d}, z20.d // 10000000-11000100-00000011-01011101 +// CHECK-INST: fmop4s za5.d, { z10.d, z11.d }, z20.d +// CHECK-ENCODING: [0x5d,0x03,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4035d + +fmop4s za7.d, {z14.d-z15.d}, z30.d // 10000000-11001110-00000011-11011111 +// CHECK-INST: fmop4s za7.d, { z14.d, z15.d }, z30.d +// CHECK-ENCODING: [0xdf,0x03,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce03df + +// Multiple vectors + +fmop4s za0.d, {z0.d-z1.d}, {z16.d-z17.d} // 10000000-11010000-00000010-00011000 +// CHECK-INST: fmop4s za0.d, { z0.d, z1.d }, { z16.d, z17.d } +// CHECK-ENCODING: [0x18,0x02,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00218 + +fmop4s za5.d, {z10.d-z11.d}, {z20.d-z21.d} // 10000000-11010100-00000011-01011101 +// CHECK-INST: fmop4s za5.d, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x5d,0x03,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4035d + +fmop4s za7.d, {z14.d-z15.d}, {z30.d-z31.d} // 10000000-11011110-00000011-11011111 +// CHECK-INST: fmop4s za7.d, { z14.d, z15.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xdf,0x03,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de03df From 705f3ebf1458c154fe63552ca984be6a16711661 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Wed, 30 Oct 2024 10:42:40 -0700 Subject: [PATCH 32/69] [rtsan][NFC] Add documentation link to Function Effects (#113979) --- clang/docs/RealtimeSanitizer.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index 41b8bbb33baf1..e5f5abfcd9b47 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -11,11 +11,16 @@ RealtimeSanitizer (a.k.a. RTSan) is a real-time safety testing tool for C and C+ projects. RTSan can be used to detect real-time violations, i.e. calls to methods that are not safe for use in functions with deterministic run time requirements. RTSan considers any function marked with the ``[[clang::nonblocking]]`` attribute -to be a real-time function. If RTSan detects a call to ``malloc``, ``free``, -``pthread_mutex_lock``, or anything else that could have a non-deterministic -execution time in a function marked ``[[clang::nonblocking]]`` +to be a real-time function. At run-time, if RTSan detects a call to ``malloc``, +``free``, ``pthread_mutex_lock``, or anything else that could have a +non-deterministic execution time in a function marked ``[[clang::nonblocking]]`` RTSan raises an error. +RTSan performs its analysis at run-time but shares the ``[[clang::nonblocking]]`` +attribute with the :doc:`FunctionEffectAnalysis` system, which operates at +compile-time to detect potential real-time safety violations. For comprehensive +detection of real-time safety issues, it is recommended to use both systems together. + The runtime slowdown introduced by RealtimeSanitizer is negligible. How to build From 5545f76dc94e76ef6800823bdd1e107ad2264717 Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Wed, 30 Oct 2024 13:48:00 -0400 Subject: [PATCH 33/69] Pass the executable name as arg[0] when calling ExecuteAndWait() (#114067) PR https://github.com/llvm/llvm-project/pull/111976 was enabling the tests updated in the PR to run on all systems. We found a few didn't run on z/OS. I tracked the problem down to: 1. the ExecuteToolChainProgram() function wasn't passing the executable name as the first arg. That was causing exec on z/OS to fail. 2. the temp file needs to be a text file so codepage conversion happens. --- clang/lib/Driver/ToolChain.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 34de0043ca012..bdf3da0c96adc 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -109,7 +109,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, llvm::Expected> ToolChain::executeToolChainProgram(StringRef Executable) const { llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); + llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); llvm::FileRemover OutputRemover(OutputFile.c_str()); std::optional Redirects[] = { {""}, @@ -128,7 +129,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const { *Str + "'"); SecondsToWait = std::max(SecondsToWait, 0); // infinite } - if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, + if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, + SecondsToWait, /*MemoryLimit=*/0, &ErrorMessage)) return llvm::createStringError(std::error_code(), Executable + ": " + ErrorMessage); From ca1154d1d41c75db6594428a8cdf263cf7041896 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Wed, 30 Oct 2024 11:07:15 -0700 Subject: [PATCH 34/69] AMDGPU: Disable pattern matching "x<<32-y>>32-y" to "bfe x, 0, y" (#114279) It is not correct to lower "x<<32-y>>32-y" to "bfe x, 0, y". When y equals to 32, the left-hand side is still x (unchanged), however, the right-hand side will be evaluated to 0. So it is not always correct to do such transformation. We may be able to keep the pattern for immediate y while y is within [0, 31]. However, the immediate operands of the sub (32 - y) are easily folded, and "(x << imm) >> imm" will be lowered to "and x, (2^(32-imm))-1" anyway. So no bfe matching is needed. --- llvm/lib/Target/AMDGPU/SIInstructions.td | 13 ------------- llvm/test/CodeGen/AMDGPU/bfe-patterns.ll | 16 ++++++++++++---- llvm/test/CodeGen/AMDGPU/extract-lowbits.ll | 20 +++++++++++++++----- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index faa0b6d6c3f50..c8a46217190a1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3553,19 +3553,6 @@ def : AMDGPUPat < (V_BFE_U32_e64 $src, (i32 0), $width) >; -// x << (bitwidth - y) >> (bitwidth - y) -def : AMDGPUPat < - (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), - (sub 32, i32:$width)), - (V_BFE_U32_e64 $src, (i32 0), $width) ->; - -def : AMDGPUPat < - (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), - (sub 32, i32:$width)), - (V_BFE_I32_e64 $src, (i32 0), $width) ->; - // SHA-256 Ma patterns // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index f54ea615ca664..c57a35aa1880d 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -17,7 +17,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_bfe_u32 v2, v2, 0, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 +; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -36,7 +38,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_bfe_u32 v2, v3, 0, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 +; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -215,7 +219,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_bfe_i32 v2, v2, 0, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 +; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -234,7 +240,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_bfe_i32 v2, v3, 0, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 +; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 9677ec41ce268..3d9616f02d52d 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -150,11 +150,21 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { -; GCN-LABEL: bzhi32_d0: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: bzhi32_d0: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: bzhi32_d0: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits From a518ed2d815c16010a6262edd0414a5f60a63a39 Mon Sep 17 00:00:00 2001 From: Dana Jansens Date: Wed, 30 Oct 2024 14:30:53 -0400 Subject: [PATCH 35/69] Respect the [[clang::unsafe_buffer_usage]] attribute for field and constructor initializers (#91991) CXXCtorInitializers are not statements , but they point to an initializer expression which is. When visiting a FunctionDecl, also walk through any constructor initializers and run the warning checks/matchers against their initializer expressions. This catches warnings for initializing fields and calling other constructors, such as: struct C { C(P* Ptr) : AnUnsafeCtor(Ptr) {} } Field initializers can be found by traversing CXXDefaultInitExprs. This catches warnings in places such as: struct C { P* Ptr; AnUnsafeCtor U{Ptr}; }; We add tests for explicit construction, for field initialization, base class constructor calls, delegated constructor calls, and aggregate initialization. Note that aggregate initialization is not fully covered where a field specifies an initializer and it's not overridden in the aggregate initialization, such as in: struct AggregateViaValueInit { UnsafeMembers f1; // FIXME: A construction of this class does initialize the field // through this initializer, so it should warn. Ideally it should // also point to where the site of the construction is in // testAggregateViaValueInit(). UnsafeMembers f2{3}; }; void testAggregateViaValueInit() { auto A = AggregateViaValueInit(); }; There are 3 tests for different types of aggregate initialization with FIXMEs documenting this future work. One attempt to fix this involved returning true from MatchDescendantVisitor::shouldVisitImplicitCode(), however, it breaks expectations for field in-class initializers by moving the SourceLocation, possibly to inside the implicit ctor instead of on the line where the field initialization happens. struct C { P* Ptr; AnUnsafeCtor U{Ptr}; // expected-warning{{this is never seen then}} }; Tests are also added for std::span(ptr, size) constructor being called from a field initializer and a constructor initializer. Issue #80482 --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 127 +++++++++++------- ...warn-unsafe-buffer-usage-function-attr.cpp | 122 +++++++++++++++++ ...ffer-usage-in-container-span-construct.cpp | 20 +++ 3 files changed, 224 insertions(+), 45 deletions(-) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index fad2f52e89ef1..2c68409b846bc 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -171,6 +171,12 @@ class MatchDescendantVisitor return VisitorBase::TraverseCXXTypeidExpr(Node); } + bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) { + if (!TraverseStmt(Node->getExpr())) + return false; + return VisitorBase::TraverseCXXDefaultInitExpr(Node); + } + bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { if (!Node) return true; @@ -1972,14 +1978,18 @@ class DerefSimplePtrArithFixableGadget : public FixableGadget { }; /// Scan the function and return a list of gadgets found with provided kits. -static std::tuple -findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, - bool EmitSuggestions) { +static void findGadgets(const Stmt *S, ASTContext &Ctx, + const UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions, FixableGadgetList &FixableGadgets, + WarningGadgetList &WarningGadgets, + DeclUseTracker &Tracker) { struct GadgetFinderCallback : MatchFinder::MatchCallback { - FixableGadgetList FixableGadgets; - WarningGadgetList WarningGadgets; - DeclUseTracker Tracker; + GadgetFinderCallback(FixableGadgetList &FixableGadgets, + WarningGadgetList &WarningGadgets, + DeclUseTracker &Tracker) + : FixableGadgets(FixableGadgets), WarningGadgets(WarningGadgets), + Tracker(Tracker) {} void run(const MatchFinder::MatchResult &Result) override { // In debug mode, assert that we've found exactly one gadget. @@ -2020,10 +2030,14 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, assert(numFound >= 1 && "Gadgets not found in match result!"); assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); } + + FixableGadgetList &FixableGadgets; + WarningGadgetList &WarningGadgets; + DeclUseTracker &Tracker; }; MatchFinder M; - GadgetFinderCallback CB; + GadgetFinderCallback CB{FixableGadgets, WarningGadgets, Tracker}; // clang-format off M.addMatcher( @@ -2068,9 +2082,7 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, // clang-format on } - M.match(*D->getBody(), D->getASTContext()); - return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), - std::move(CB.Tracker)}; + M.match(*S, Ctx); } // Compares AST nodes by source locations. @@ -3614,39 +3626,9 @@ class VariableGroupsManagerImpl : public VariableGroupsManager { } }; -void clang::checkUnsafeBufferUsage(const Decl *D, - UnsafeBufferUsageHandler &Handler, - bool EmitSuggestions) { -#ifndef NDEBUG - Handler.clearDebugNotes(); -#endif - - assert(D && D->getBody()); - // We do not want to visit a Lambda expression defined inside a method - // independently. Instead, it should be visited along with the outer method. - // FIXME: do we want to do the same thing for `BlockDecl`s? - if (const auto *fd = dyn_cast(D)) { - if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) - return; - } - - // Do not emit fixit suggestions for functions declared in an - // extern "C" block. - if (const auto *FD = dyn_cast(D)) { - for (FunctionDecl *FReDecl : FD->redecls()) { - if (FReDecl->isExternC()) { - EmitSuggestions = false; - break; - } - } - } - - WarningGadgetSets UnsafeOps; - FixableGadgetSets FixablesForAllVars; - - auto [FixableGadgets, WarningGadgets, Tracker] = - findGadgets(D, Handler, EmitSuggestions); - +void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets, + WarningGadgetList WarningGadgets, DeclUseTracker Tracker, + UnsafeBufferUsageHandler &Handler, bool EmitSuggestions) { if (!EmitSuggestions) { // Our job is very easy without suggestions. Just warn about // every problematic operation and consider it done. No need to deal @@ -3690,8 +3672,10 @@ void clang::checkUnsafeBufferUsage(const Decl *D, if (WarningGadgets.empty()) return; - UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); - FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); + WarningGadgetSets UnsafeOps = + groupWarningGadgetsByVar(std::move(WarningGadgets)); + FixableGadgetSets FixablesForAllVars = + groupFixablesByVar(std::move(FixableGadgets)); std::map FixItsForVariableGroup; @@ -3912,3 +3896,56 @@ void clang::checkUnsafeBufferUsage(const Decl *D, } } } + +void clang::checkUnsafeBufferUsage(const Decl *D, + UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions) { +#ifndef NDEBUG + Handler.clearDebugNotes(); +#endif + + assert(D); + + SmallVector Stmts; + + if (const auto *FD = dyn_cast(D)) { + // We do not want to visit a Lambda expression defined inside a method + // independently. Instead, it should be visited along with the outer method. + // FIXME: do we want to do the same thing for `BlockDecl`s? + if (const auto *MD = dyn_cast(D)) { + if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass()) + return; + } + + for (FunctionDecl *FReDecl : FD->redecls()) { + if (FReDecl->isExternC()) { + // Do not emit fixit suggestions for functions declared in an + // extern "C" block. + EmitSuggestions = false; + break; + } + } + + Stmts.push_back(FD->getBody()); + + if (const auto *ID = dyn_cast(D)) { + for (const CXXCtorInitializer *CI : ID->inits()) { + Stmts.push_back(CI->getInit()); + } + } + } else if (isa(D) || isa(D)) { + Stmts.push_back(D->getBody()); + } + + assert(!Stmts.empty()); + + FixableGadgetList FixableGadgets; + WarningGadgetList WarningGadgets; + DeclUseTracker Tracker; + for (Stmt *S : Stmts) { + findGadgets(S, D->getASTContext(), Handler, EmitSuggestions, FixableGadgets, + WarningGadgets, Tracker); + } + applyGadgets(D, std::move(FixableGadgets), std::move(WarningGadgets), + std::move(Tracker), Handler, EmitSuggestions); +} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp index bfc34b55c1f66..724d444638b57 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp @@ -111,6 +111,37 @@ int testFoldExpression(Vs&&... v) { return (... + v); // expected-warning{{function introduces unsafe buffer manipulation}} } +struct HoldsUnsafeMembers { + HoldsUnsafeMembers() + : FromCtor(3), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{3} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + [[clang::unsafe_buffer_usage]] + HoldsUnsafeMembers(int i) + : FromCtor(i), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{i} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + HoldsUnsafeMembers(float f) + : HoldsUnsafeMembers(0) {} // expected-warning{{function introduces unsafe buffer manipulation}} + + UnsafeMembers FromCtor; + UnsafeMembers FromCtor2; + UnsafeMembers FromField{3}; // expected-warning 2{{function introduces unsafe buffer manipulation}} +}; + +struct SubclassUnsafeMembers : public UnsafeMembers { + SubclassUnsafeMembers() + : UnsafeMembers(3) // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + [[clang::unsafe_buffer_usage]] + SubclassUnsafeMembers(int i) + : UnsafeMembers(i) // expected-warning{{function introduces unsafe buffer manipulation}} + {} +}; + // https://github.com/llvm/llvm-project/issues/80482 void testClassMembers() { UnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} @@ -122,4 +153,95 @@ void testClassMembers() { UnsafeMembers()(); // expected-warning{{function introduces unsafe buffer manipulation}} testFoldExpression(UnsafeMembers(), UnsafeMembers()); + + HoldsUnsafeMembers(); + HoldsUnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} + + SubclassUnsafeMembers(); + SubclassUnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} +} + +// Not an aggregate, so its constructor is not implicit code and will be +// visited/checked for warnings. +struct NotCalledHoldsUnsafeMembers { + NotCalledHoldsUnsafeMembers() + : FromCtor(3), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{3} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + UnsafeMembers FromCtor; + UnsafeMembers FromCtor2; + UnsafeMembers FromField{3}; // expected-warning{{function introduces unsafe buffer manipulation}} +}; + +// An aggregate, so its constructor is implicit code. Since it's not called, it +// is never generated. +struct AggregateUnused { + UnsafeMembers f1; + // While this field would trigger the warning during initialization, since + // it's unused, there's no code generated that does the initialization, so + // no warning. + UnsafeMembers f2{3}; +}; + +struct AggregateExplicitlyInitializedSafe { + UnsafeMembers f1; + // The warning is not fired as the field is always explicltly initialized + // elsewhere. This initializer is never used. + UnsafeMembers f2{3}; +}; + +void testAggregateExplicitlyInitializedSafe() { + AggregateExplicitlyInitializedSafe A{ + .f2 = UnsafeMembers(), // A safe constructor. + }; } + +struct AggregateExplicitlyInitializedUnsafe { + UnsafeMembers f1; + // The warning is not fired as the field is always explicltly initialized + // elsewhere. This initializer is never used. + UnsafeMembers f2{3}; +}; + +void testAggregateExplicitlyInitializedUnsafe() { + AggregateExplicitlyInitializedUnsafe A{ + .f2 = UnsafeMembers(3), // expected-warning{{function introduces unsafe buffer manipulation}} + }; +} + +struct AggregateViaAggregateInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaAggregateInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaAggregateInit() { + AggregateViaAggregateInit A{}; +}; + +struct AggregateViaValueInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaValueInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaValueInit() { + auto A = AggregateViaValueInit(); +}; + +struct AggregateViaDefaultInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaValueInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaDefaultInit() { + AggregateViaDefaultInit A; +}; diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp index e97511593bbd8..c138fe088b3ba 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp @@ -157,3 +157,23 @@ namespace test_flag { } } //namespace test_flag + +struct HoldsStdSpanAndInitializedInCtor { + char* Ptr; + unsigned Size; + std::span Span{Ptr, Size}; // no-warning (this code is unreachable) + + HoldsStdSpanAndInitializedInCtor(char* P, unsigned S) + : Span(P, S) // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} + {} +}; + +struct HoldsStdSpanAndNotInitializedInCtor { + char* Ptr; + unsigned Size; + std::span Span{Ptr, Size}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} + + HoldsStdSpanAndNotInitializedInCtor(char* P, unsigned S) + : Ptr(P), Size(S) + {} +}; From 2bc5302706e710d125752c215392043fd5bf80fa Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Wed, 30 Oct 2024 11:36:10 -0700 Subject: [PATCH 36/69] Revert "[lldb] Use Py_InitializeFromConfig with Python >= 3.8 (NFC)" (#114290) Reverts llvm/llvm-project#114112 because this triggers a compile error: ``` no known conversion from 'str_type' (aka 'wchar_t *') to 'const char *' for 3rd argument 221 | PyAPI_FUNC(PyStatus) PyConfig_SetBytesString( | ^ 222 | PyConfig *config, 223 | wchar_t **config_str, 224 | const char *str); | ~~~~~~~~~~~~~~~ 1 error generated. ``` --- .../Python/ScriptInterpreterPython.cpp | 68 ++++++++----------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index 6158083a98280..7cc38da6a6a94 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -92,38 +92,7 @@ namespace { struct InitializePythonRAII { public: InitializePythonRAII() { -#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) - PyConfig config; - PyConfig_InitPythonConfig(&config); -#endif - -#if LLDB_EMBED_PYTHON_HOME - typedef wchar_t *str_type; - static str_type g_python_home = []() -> str_type { - const char *lldb_python_home = LLDB_PYTHON_HOME; - const char *absolute_python_home = nullptr; - llvm::SmallString<64> path; - if (llvm::sys::path::is_absolute(lldb_python_home)) { - absolute_python_home = lldb_python_home; - } else { - FileSpec spec = HostInfo::GetShlibDir(); - if (!spec) - return nullptr; - spec.GetPath(path); - llvm::sys::path::append(path, lldb_python_home); - absolute_python_home = path.c_str(); - } - size_t size = 0; - return Py_DecodeLocale(absolute_python_home, &size); - }(); - if (g_python_home != nullptr) { -#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) - PyConfig_SetBytesString(&config, &config.home, g_python_home); -#else - Py_SetPythonHome(g_python_home); -#endif - } -#endif + InitializePythonHome(); // The table of built-in modules can only be extended before Python is // initialized. @@ -148,22 +117,15 @@ struct InitializePythonRAII { PyImport_AppendInittab("_lldb", LLDBSwigPyInit); } -#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) || (PY_MAJOR_VERSION > 3) - config.install_signal_handlers = 0; - Py_InitializeFromConfig(&config); - PyConfig_Clear(&config); - InitializeThreadsPrivate(); -#else // Python < 3.2 and Python >= 3.2 reversed the ordering requirements for // calling `Py_Initialize` and `PyEval_InitThreads`. < 3.2 requires that you // call `PyEval_InitThreads` first, and >= 3.2 requires that you call it last. -#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 2) +#if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 2) || (PY_MAJOR_VERSION > 3) Py_InitializeEx(0); InitializeThreadsPrivate(); #else InitializeThreadsPrivate(); Py_InitializeEx(0); -#endif #endif } @@ -180,6 +142,32 @@ struct InitializePythonRAII { } private: + void InitializePythonHome() { +#if LLDB_EMBED_PYTHON_HOME + typedef wchar_t *str_type; + static str_type g_python_home = []() -> str_type { + const char *lldb_python_home = LLDB_PYTHON_HOME; + const char *absolute_python_home = nullptr; + llvm::SmallString<64> path; + if (llvm::sys::path::is_absolute(lldb_python_home)) { + absolute_python_home = lldb_python_home; + } else { + FileSpec spec = HostInfo::GetShlibDir(); + if (!spec) + return nullptr; + spec.GetPath(path); + llvm::sys::path::append(path, lldb_python_home); + absolute_python_home = path.c_str(); + } + size_t size = 0; + return Py_DecodeLocale(absolute_python_home, &size); + }(); + if (g_python_home != nullptr) { + Py_SetPythonHome(g_python_home); + } +#endif + } + void InitializeThreadsPrivate() { // Since Python 3.7 `Py_Initialize` calls `PyEval_InitThreads` inside itself, // so there is no way to determine whether the embedded interpreter From 5bd1af5abcb7b9f92741dd7209e84b5607f7e88a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 30 Oct 2024 18:39:49 +0000 Subject: [PATCH 37/69] [LV] Directly store VPlan in InnerLoopVectorizer (NFC). The current VPlan is already passed to multiple functions and more in the future. Store it once directly in InnerLoopVectorizer. --- .../Transforms/Vectorize/LoopVectorize.cpp | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 150fc4a42b484..3d638e52328b5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -467,11 +467,12 @@ class InnerLoopVectorizer { ElementCount MinProfitableTripCount, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks) + ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks, + VPlan &Plan) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI), - PSI(PSI), RTChecks(RTChecks) { + PSI(PSI), RTChecks(RTChecks), Plan(Plan) { // Query this against the original loop and save it here because the profile // of the original loop header may change as the transformation happens. OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize( @@ -498,7 +499,7 @@ class InnerLoopVectorizer { createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. - void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); + void fixVectorizedLoop(VPTransformState &State); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } @@ -513,7 +514,7 @@ class InnerLoopVectorizer { VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. - void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); + void fixNonInductionPHIs(VPTransformState &State); /// Create a new phi node for the induction variable \p OrigPhi to resume /// iteration count in the scalar epilogue, from where the vectorized loop @@ -541,8 +542,7 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, - VPTransformState &State); + BasicBlock *MiddleBlock, VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -674,6 +674,8 @@ class InnerLoopVectorizer { /// Structure to hold information about generated runtime checks, responsible /// for cleaning the checks, if vectorization turns out unprofitable. GeneratedRTChecks &RTChecks; + + VPlan &Plan; }; /// Encapsulate information regarding vectorization of a loop and its epilogue. @@ -715,10 +717,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Checks) + GeneratedRTChecks &Checks, VPlan &Plan) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL, - CM, BFI, PSI, Checks), + CM, BFI, PSI, Checks, Plan), EPI(EPI) {} // Override this function to handle the more complex control flow around the @@ -755,9 +757,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Check) + GeneratedRTChecks &Check, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, LVL, CM, BFI, PSI, Check) {} + EPI, LVL, CM, BFI, PSI, Check, Plan) {} /// Implements the interface for creating a vectorized skeleton using the /// *main loop* strategy (ie the first pass of vplan execution). std::pair @@ -773,7 +775,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, + BasicBlock *MiddleBlock, VPTransformState &State) override {}; }; @@ -789,9 +791,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Checks) + GeneratedRTChecks &Checks, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, LVL, CM, BFI, PSI, Checks) { + EPI, LVL, CM, BFI, PSI, Checks, Plan) { TripCount = EPI.TripCount; } /// Implements the interface for creating a vectorized skeleton using the @@ -2751,7 +2753,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, + BasicBlock *MiddleBlock, VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -2931,11 +2933,10 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, TargetTransformInfo::TCK_RecipThroughput); } -void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, - VPlan &Plan) { +void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. if (EnableVPlanNativePath) - fixNonInductionPHIs(Plan, State); + fixNonInductionPHIs(State); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); @@ -2966,7 +2967,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(nullptr), - IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); + IVEndValues[Entry.first], LoopMiddleBlock, State); } // Fix live-out phis not already fixed earlier. @@ -3077,8 +3078,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { } while (Changed); } -void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan, - VPTransformState &State) { +void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &P : VPBB->phis()) { @@ -7744,7 +7744,7 @@ DenseMap LoopVectorizationPlanner::executePlan( // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. - ILV.fixVectorizedLoop(State, BestVPlan); + ILV.fixVectorizedLoop(State); ILV.printDebugTracesAtEnd(); @@ -9727,7 +9727,7 @@ static bool processLoopInVPlanNativePath( GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), AddBranchWeights); InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, - VF.Width, 1, LVL, &CM, BFI, PSI, Checks); + VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false); @@ -10215,11 +10215,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. + VPlan &BestPlan = LVP.getPlanFor(VF.Width); InnerLoopVectorizer Unroller( L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1), - ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks); + ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan); - VPlan &BestPlan = LVP.getPlanFor(VF.Width); LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { @@ -10236,15 +10236,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizationFactor EpilogueVF = LVP.selectEpilogueVectorizationFactor(VF.Width, IC); if (EpilogueVF.Width.isVector()) { + std::unique_ptr BestMainPlan(BestPlan.duplicate()); // The first pass vectorizes the main loop and creates a scalar epilogue // to be vectorized by executing the plan (potentially with a different // factor) again shortly afterwards. EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, &LVL, &CM, BFI, PSI, Checks); + EPI, &LVL, &CM, BFI, PSI, Checks, + *BestMainPlan); - std::unique_ptr BestMainPlan(BestPlan.duplicate()); auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; @@ -10253,11 +10254,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { // edges from the first pass. EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; + VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF); EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, - Checks); + Checks, BestEpiPlan); - VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF); VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion(); VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); @@ -10340,7 +10341,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, - PSI, Checks); + PSI, Checks, BestPlan); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; From c3724ba8667c695f29d5af93f2b0d1b23c1b41e7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 11:46:15 -0700 Subject: [PATCH 38/69] [RISCV] Add OperandType for vector rounding mode operands. (#114179) Use TSFlags to distinquish which type of rounding mode it is. We use the same tablegen base classes for vxrm and frm sometimes so its hard to have different types for different instructions. --- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 4 +++- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 ++++++ .../Target/RISCV/RISCVInstrInfoVPseudos.td | 24 +++++++++++-------- llvm/test/CodeGen/RISCV/rvv/frm-insert.ll | 6 ++--- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index b3a6cd40ea039..19103e219cb80 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -341,7 +341,9 @@ enum OperandType : unsigned { OPERAND_VEC_POLICY, // Vector SEW operand. OPERAND_SEW, - OPERAND_LAST_RISCV_IMM = OPERAND_SEW, + // Vector rounding mode for VXRM or FRM. + OPERAND_VEC_RM, + OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index d5b086861d71e..688da1ee1b33f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2551,6 +2551,13 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_SEW: Ok = Imm == 0 || (Imm >= 3 && Imm <= 6); break; + case RISCVOp::OPERAND_VEC_RM: + assert(RISCVII::hasRoundModeOp(Desc.TSFlags)); + if (RISCVII::usesVXRM(Desc.TSFlags)) + Ok = isUInt<2>(Imm); + else + Ok = RISCVFPRndMode::isValidRoundingMode(Imm); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 5554fda760ebb..399a2386d493e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -92,6 +92,10 @@ def sew : RISCVOp { let OperandType = "OPERAND_SEW"; } +def vec_rm : RISCVOp { + let OperandType = "OPERAND_VEC_RM"; +} + // X0 has special meaning for vsetvl/vsetvli. // rd | rs1 | AVL value | Effect on vl //-------------------------------------------------------------- @@ -1057,7 +1061,7 @@ class VPseudoUnaryNoMaskRoundingMode TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$rm, + (ins RetClass:$passthru, OpClass:$rs2, vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1097,7 +1101,7 @@ class VPseudoUnaryMaskRoundingMode TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$rm, + VMaskOp:$vm, vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1135,7 +1139,7 @@ class VPseudoUnaryNoMask_FRM TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$frm, + (ins RetClass:$passthru, OpClass:$rs2, vec_rm:$frm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1155,7 +1159,7 @@ class VPseudoUnaryMask_FRM TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$frm, + VMaskOp:$vm, vec_rm:$frm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1250,7 +1254,7 @@ class VPseudoBinaryNoMaskRoundingMode TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, + (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1273,7 +1277,7 @@ class VPseudoBinaryMaskPolicyRoundingMode.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, ixlenimm:$rm, AVL:$vl, + VMaskOp:$vm, vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1317,7 +1321,7 @@ class VPseudoTiedBinaryNoMaskRoundingMode TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs2, Op2Class:$rs1, - ixlenimm:$rm, + vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { @@ -1408,7 +1412,7 @@ class VPseudoTernaryMaskPolicyRoundingMode.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, VMaskOp:$vm, - ixlenimm:$rm, + vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1475,7 +1479,7 @@ class VPseudoTiedBinaryMaskRoundingMode.R:$passthru, Op2Class:$rs1, VMaskOp:$vm, - ixlenimm:$rm, + vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; @@ -1578,7 +1582,7 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, + vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll index ccfe94ecad286..54f56eadf0034 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll @@ -559,7 +559,7 @@ define @after_fsrm3( %0, @after_fsrm3( %0, undef, %0, %1, - i64 5, i64 %2) + i64 3, i64 %2) ret %a } From 71b6f6b8a1cd9a63b9d382fe15f40bbb427939b9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 11:47:40 -0700 Subject: [PATCH 39/69] [RISCV] Add missing hasPostISelHook = 1 to vector pseudos that might read FRM. (#114186) We need an implicit FRM read operand anytime the rounding mode is dynamic. The post isel hook is responsible for this when isel creates an instruction with dynamic rounding mode. Add a MachineVerifier check to verify the operand is present. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 7 +++++++ llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 17 ++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 688da1ee1b33f..04bb964bfc48c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2630,6 +2630,13 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (int Idx = RISCVII::getFRMOpNum(Desc); + Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN && + !MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) { + ErrInfo = "dynamic rounding mode should read FRM"; + return false; + } + return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 399a2386d493e..d5b0fa340684b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6483,7 +6483,7 @@ defm PseudoVFRDIV : VPseudoVFRDIV_VF_RM; //===----------------------------------------------------------------------===// // 13.5. Vector Widening Floating-Point Multiply //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in { +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFWMUL : VPseudoVWMUL_VV_VF_RM; } @@ -6516,7 +6516,7 @@ defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM; //===----------------------------------------------------------------------===// // 13.8. Vector Floating-Point Square-Root Instruction //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFSQRT : VPseudoVSQR_V_RM; //===----------------------------------------------------------------------===// @@ -6528,7 +6528,7 @@ defm PseudoVFRSQRT7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 13.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFREC7 : VPseudoVRCP_V_RM; //===----------------------------------------------------------------------===// @@ -6640,9 +6640,10 @@ defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W_RM; defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W; defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W; -let hasSideEffects = 0, hasPostISelHook = 1 in +let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM; +} defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true @@ -6678,8 +6679,7 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 14.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, - hasSideEffects = 0 in { +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFREDOSUM : VPseudoVFREDO_VS_RM; defm PseudoVFREDUSUM : VPseudoVFRED_VS_RM; } @@ -6691,9 +6691,8 @@ defm PseudoVFREDMAX : VPseudoVFREDMINMAX_VS; //===----------------------------------------------------------------------===// // 14.4. Vector Widening Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -let IsRVVWideningReduction = 1, - hasSideEffects = 0, - mayRaiseFPException = true in { +let IsRVVWideningReduction = 1, hasSideEffects = 0, mayRaiseFPException = true, + hasPostISelHook = 1 in { defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } From 90786adade22784a52856a0e8b545ec6710b47f6 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Wed, 30 Oct 2024 12:50:40 -0600 Subject: [PATCH 40/69] [Clang][Sema] Always use latest redeclaration of primary template (#114258) This patch fixes a couple of regressions introduced in #111852. Consider: ``` template struct A { template static constexpr bool f() requires U { return true; } }; template<> template constexpr bool A::f() requires U { return A::f(); } template<> template constexpr bool A::f() requires U { return true; } static_assert(A::f()); // crash here ``` This crashes because when collecting template arguments from the _first_ declaration of `A::f` for constraint checking, we don't add the template arguments from the enclosing class template specialization because there exists another redeclaration that is a member specialization. This also fixes the following example, which happens for a similar reason: ``` // input.cppm export module input; export template constexpr int f(); template struct A { template friend constexpr int f(); }; template struct A<0>; template constexpr int f() { return N; } ``` ``` // input.cpp import input; static_assert(f<1>() == 1); // error: static assertion failed ``` --- clang/include/clang/AST/DeclTemplate.h | 52 ++--------- clang/lib/AST/Decl.cpp | 10 +-- clang/lib/AST/DeclCXX.cpp | 4 +- clang/lib/AST/DeclTemplate.cpp | 56 +++++++++++- clang/lib/Sema/SemaDecl.cpp | 4 +- clang/lib/Sema/SemaInit.cpp | 2 +- clang/lib/Sema/SemaTemplateInstantiate.cpp | 14 +-- clang/test/AST/ast-dump-decl.cpp | 2 +- .../CXX/temp/temp.spec/temp.expl.spec/p7.cpp | 87 +++++++++++++++++++ 9 files changed, 165 insertions(+), 66 deletions(-) diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index a572e3380f165..0ca3fd48e81cf 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -857,16 +857,6 @@ class RedeclarableTemplateDecl : public TemplateDecl, /// \endcode bool isMemberSpecialization() const { return Common.getInt(); } - /// Determines whether any redeclaration of this template was - /// a specialization of a member template. - bool hasMemberSpecialization() const { - for (const auto *D : redecls()) { - if (D->isMemberSpecialization()) - return true; - } - return false; - } - /// Note that this member template is a specialization. void setMemberSpecialization() { assert(!isMemberSpecialization() && "already a member specialization"); @@ -1965,13 +1955,7 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, /// specialization which was specialized by this. llvm::PointerUnion - getSpecializedTemplateOrPartial() const { - if (const auto *PartialSpec = - SpecializedTemplate.dyn_cast()) - return PartialSpec->PartialSpecialization; - - return SpecializedTemplate.get(); - } + getSpecializedTemplateOrPartial() const; /// Retrieve the set of template arguments that should be used /// to instantiate members of the class template or class template partial @@ -2208,17 +2192,6 @@ class ClassTemplatePartialSpecializationDecl return InstantiatedFromMember.getInt(); } - /// Determines whether any redeclaration of this this class template partial - /// specialization was a specialization of a member partial specialization. - bool hasMemberSpecialization() const { - for (const auto *D : redecls()) { - if (cast(D) - ->isMemberSpecialization()) - return true; - } - return false; - } - /// Note that this member template is a specialization. void setMemberSpecialization() { return InstantiatedFromMember.setInt(true); } @@ -2740,13 +2713,7 @@ class VarTemplateSpecializationDecl : public VarDecl, /// Retrieve the variable template or variable template partial /// specialization which was specialized by this. llvm::PointerUnion - getSpecializedTemplateOrPartial() const { - if (const auto *PartialSpec = - SpecializedTemplate.dyn_cast()) - return PartialSpec->PartialSpecialization; - - return SpecializedTemplate.get(); - } + getSpecializedTemplateOrPartial() const; /// Retrieve the set of template arguments that should be used /// to instantiate the initializer of the variable template or variable @@ -2980,18 +2947,6 @@ class VarTemplatePartialSpecializationDecl return InstantiatedFromMember.getInt(); } - /// Determines whether any redeclaration of this this variable template - /// partial specialization was a specialization of a member partial - /// specialization. - bool hasMemberSpecialization() const { - for (const auto *D : redecls()) { - if (cast(D) - ->isMemberSpecialization()) - return true; - } - return false; - } - /// Note that this member template is a specialization. void setMemberSpecialization() { return InstantiatedFromMember.setInt(true); } @@ -3164,6 +3119,9 @@ class VarTemplateDecl : public RedeclarableTemplateDecl { return makeSpecIterator(getSpecializations(), true); } + /// Merge \p Prev with our RedeclarableTemplateDecl::Common. + void mergePrevDecl(VarTemplateDecl *Prev); + // Implement isa/cast/dyncast support static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == VarTemplate; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 86913763ef9ff..cd173d1726379 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2708,7 +2708,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { if (isTemplateInstantiation(VDTemplSpec->getTemplateSpecializationKind())) { auto From = VDTemplSpec->getInstantiatedFrom(); if (auto *VTD = From.dyn_cast()) { - while (!VTD->hasMemberSpecialization()) { + while (!VTD->isMemberSpecialization()) { if (auto *NewVTD = VTD->getInstantiatedFromMemberTemplate()) VTD = NewVTD; else @@ -2718,7 +2718,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { } if (auto *VTPSD = From.dyn_cast()) { - while (!VTPSD->hasMemberSpecialization()) { + while (!VTPSD->isMemberSpecialization()) { if (auto *NewVTPSD = VTPSD->getInstantiatedFromMember()) VTPSD = NewVTPSD; else @@ -2732,7 +2732,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { // If this is the pattern of a variable template, find where it was // instantiated from. FIXME: Is this necessary? if (VarTemplateDecl *VTD = VD->getDescribedVarTemplate()) { - while (!VTD->hasMemberSpecialization()) { + while (!VTD->isMemberSpecialization()) { if (auto *NewVTD = VTD->getInstantiatedFromMemberTemplate()) VTD = NewVTD; else @@ -4153,7 +4153,7 @@ FunctionDecl::getTemplateInstantiationPattern(bool ForDefinition) const { if (FunctionTemplateDecl *Primary = getPrimaryTemplate()) { // If we hit a point where the user provided a specialization of this // template, we're done looking. - while (!ForDefinition || !Primary->hasMemberSpecialization()) { + while (!ForDefinition || !Primary->isMemberSpecialization()) { if (auto *NewPrimary = Primary->getInstantiatedFromMemberTemplate()) Primary = NewPrimary; else @@ -4170,7 +4170,7 @@ FunctionTemplateDecl *FunctionDecl::getPrimaryTemplate() const { if (FunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { - return Info->getTemplate(); + return Info->getTemplate()->getMostRecentDecl(); } return nullptr; } diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index db0ea62a2323e..1c92fd9e3ff06 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2030,7 +2030,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { if (auto *TD = dyn_cast(this)) { auto From = TD->getInstantiatedFrom(); if (auto *CTD = From.dyn_cast()) { - while (!CTD->hasMemberSpecialization()) { + while (!CTD->isMemberSpecialization()) { if (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) CTD = NewCTD; else @@ -2040,7 +2040,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { } if (auto *CTPSD = From.dyn_cast()) { - while (!CTPSD->hasMemberSpecialization()) { + while (!CTPSD->isMemberSpecialization()) { if (auto *NewCTPSD = CTPSD->getInstantiatedFromMemberTemplate()) CTPSD = NewCTPSD; else diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 755ec72f00bf7..1db02d0d04448 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -993,7 +993,17 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return SpecializedTemplate.get()->getMostRecentDecl(); +} + +llvm::PointerUnion +ClassTemplateSpecializationDecl::getSpecializedTemplateOrPartial() const { + if (const auto *PartialSpec = + SpecializedTemplate.dyn_cast()) + return PartialSpec->PartialSpecialization->getMostRecentDecl(); + + return SpecializedTemplate.get()->getMostRecentDecl(); } SourceRange @@ -1283,6 +1293,39 @@ VarTemplateDecl::newCommon(ASTContext &C) const { return CommonPtr; } +void VarTemplateDecl::mergePrevDecl(VarTemplateDecl *Prev) { + // If we haven't created a common pointer yet, then it can just be created + // with the usual method. + if (!getCommonPtrInternal()) + return; + + Common *ThisCommon = static_cast(getCommonPtrInternal()); + Common *PrevCommon = nullptr; + SmallVector PreviousDecls; + for (; Prev; Prev = Prev->getPreviousDecl()) { + if (CommonBase *C = Prev->getCommonPtrInternal()) { + PrevCommon = static_cast(C); + break; + } + PreviousDecls.push_back(Prev); + } + + // If the previous redecl chain hasn't created a common pointer yet, then just + // use this common pointer. + if (!PrevCommon) { + for (auto *D : PreviousDecls) + D->setCommonPtr(ThisCommon); + return; + } + + // Ensure we don't leak any important state. + assert(ThisCommon->Specializations.empty() && + ThisCommon->PartialSpecializations.empty() && + "Can't merge incompatible declarations!"); + + setCommonPtr(PrevCommon); +} + VarTemplateSpecializationDecl * VarTemplateDecl::findSpecialization(ArrayRef Args, void *&InsertPos) { @@ -1405,7 +1448,16 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get(); + return SpecializedTemplate.get()->getMostRecentDecl(); +} + +llvm::PointerUnion +VarTemplateSpecializationDecl::getSpecializedTemplateOrPartial() const { + if (const auto *PartialSpec = + SpecializedTemplate.dyn_cast()) + return PartialSpec->PartialSpecialization->getMostRecentDecl(); + + return SpecializedTemplate.get()->getMostRecentDecl(); } SourceRange VarTemplateSpecializationDecl::getSourceRange() const { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f8e5f3c6d309d..3e8b76e8dfd62 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -4696,8 +4696,10 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) { // Keep a chain of previous declarations. New->setPreviousDecl(Old); - if (NewTemplate) + if (NewTemplate) { + NewTemplate->mergePrevDecl(OldTemplate); NewTemplate->setPreviousDecl(OldTemplate); + } // Inherit access appropriately. New->setAccess(Old->getAccess()); diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 573e90aced3ee..e2a59f63ccf58 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -9954,7 +9954,7 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer( auto SynthesizeAggrGuide = [&](InitListExpr *ListInit) { auto *Pattern = Template; while (Pattern->getInstantiatedFromMemberTemplate()) { - if (Pattern->hasMemberSpecialization()) + if (Pattern->isMemberSpecialization()) break; Pattern = Pattern->getInstantiatedFromMemberTemplate(); } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index b63063813f1b5..de0ec0128905f 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -343,7 +343,7 @@ struct TemplateInstantiationArgumentCollecter // If this function was instantiated from a specialized member that is // a function template, we're done. assert(FD->getPrimaryTemplate() && "No function template?"); - if (FD->getPrimaryTemplate()->hasMemberSpecialization()) + if (FD->getPrimaryTemplate()->isMemberSpecialization()) return Done(); // If this function is a generic lambda specialization, we are done. @@ -442,11 +442,11 @@ struct TemplateInstantiationArgumentCollecter Specialized = CTSD->getSpecializedTemplateOrPartial(); if (auto *CTPSD = Specialized.dyn_cast()) { - if (CTPSD->hasMemberSpecialization()) + if (CTPSD->isMemberSpecialization()) return Done(); } else { auto *CTD = Specialized.get(); - if (CTD->hasMemberSpecialization()) + if (CTD->isMemberSpecialization()) return Done(); } return UseNextDecl(CTSD); @@ -478,11 +478,11 @@ struct TemplateInstantiationArgumentCollecter Specialized = VTSD->getSpecializedTemplateOrPartial(); if (auto *VTPSD = Specialized.dyn_cast()) { - if (VTPSD->hasMemberSpecialization()) + if (VTPSD->isMemberSpecialization()) return Done(); } else { auto *VTD = Specialized.get(); - if (VTD->hasMemberSpecialization()) + if (VTD->isMemberSpecialization()) return Done(); } return UseNextDecl(VTSD); @@ -4141,7 +4141,7 @@ getPatternForClassTemplateSpecialization( CXXRecordDecl *Pattern = nullptr; Specialized = ClassTemplateSpec->getSpecializedTemplateOrPartial(); if (auto *CTD = Specialized.dyn_cast()) { - while (!CTD->hasMemberSpecialization()) { + while (!CTD->isMemberSpecialization()) { if (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) CTD = NewCTD; else @@ -4151,7 +4151,7 @@ getPatternForClassTemplateSpecialization( } else if (auto *CTPSD = Specialized .dyn_cast()) { - while (!CTPSD->hasMemberSpecialization()) { + while (!CTPSD->isMemberSpecialization()) { if (auto *NewCTPSD = CTPSD->getInstantiatedFromMemberTemplate()) CTPSD = NewCTPSD; else diff --git a/clang/test/AST/ast-dump-decl.cpp b/clang/test/AST/ast-dump-decl.cpp index e84241cee922f..7b998f20944f4 100644 --- a/clang/test/AST/ast-dump-decl.cpp +++ b/clang/test/AST/ast-dump-decl.cpp @@ -530,7 +530,7 @@ namespace testCanonicalTemplate { // CHECK-NEXT: | `-ClassTemplateDecl 0x{{.+}} parent 0x{{.+}} col:40 friend_undeclared TestClassTemplate{{$}} // CHECK-NEXT: | |-TemplateTypeParmDecl 0x{{.+}} col:23 typename depth 1 index 0 T2{{$}} // CHECK-NEXT: | `-CXXRecordDecl 0x{{.+}} parent 0x{{.+}} col:40 class TestClassTemplate{{$}} - // CHECK-NEXT: `-ClassTemplateSpecializationDecl 0x{{.+}} line:[[@LINE-19]]:31 class TestClassTemplate definition implicit_instantiation{{$}} + // CHECK-NEXT: `-ClassTemplateSpecializationDecl 0x{{.+}} line:[[@LINE-19]]:31 class TestClassTemplate definition implicit_instantiation{{$}} // CHECK-NEXT: |-DefinitionData pass_in_registers empty aggregate standard_layout trivially_copyable pod trivial literal has_constexpr_non_copy_move_ctor can_const_default_init{{$}} // CHECK-NEXT: | |-DefaultConstructor exists trivial constexpr defaulted_is_constexpr{{$}} // CHECK-NEXT: | |-CopyConstructor simple trivial has_const_param implicit_has_const_param{{$}} diff --git a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp index 87127366eb58a..e7e4738032f64 100644 --- a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp +++ b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp @@ -177,6 +177,93 @@ namespace Defined { static_assert(A::B::y == 2); } // namespace Defined +namespace Constrained { + template + struct A { + template requires V + static constexpr int f(); // expected-note {{declared here}} + + template requires V + static const int x; // expected-note {{declared here}} + + template requires V + static const int x; // expected-note {{declared here}} + + template requires V + struct B; // expected-note {{template is declared here}} + + template requires V + struct B; // expected-note {{template is declared here}} + }; + + template<> + template requires V + constexpr int A::f() { + return A::f(); + } + + template<> + template requires V + constexpr int A::x = A::x; + + template<> + template requires V + constexpr int A::x = A::x; + + template<> + template requires V + struct A::B { + static constexpr int y = A::B::y; + }; + + template<> + template requires V + struct A::B { + static constexpr int y = A::B::y; + }; + + template<> + template requires V + constexpr int A::f() { + return 1; + } + + template<> + template requires V + constexpr int A::x = 1; + + template<> + template requires V + constexpr int A::x = 2; + + template<> + template requires V + struct A::B { + static constexpr int y = 1; + }; + + template<> + template requires V + struct A::B { + static constexpr int y = 2; + }; + + static_assert(A::f() == 0); // expected-error {{static assertion expression is not an integral constant expression}} + // expected-note@-1 {{undefined function 'f' cannot be used in a constant expression}} + static_assert(A::x == 0); // expected-error {{static assertion expression is not an integral constant expression}} + // expected-note@-1 {{initializer of 'x' is unknown}} + static_assert(A::x == 0); // expected-error {{static assertion expression is not an integral constant expression}} + // expected-note@-1 {{initializer of 'x' is unknown}} + static_assert(A::B::y == 0); // expected-error {{implicit instantiation of undefined template 'Constrained::A::B'}} + static_assert(A::B::y == 0); // expected-error {{implicit instantiation of undefined template 'Constrained::A::B'}} + + static_assert(A::f() == 1); + static_assert(A::x == 1); + static_assert(A::x == 2); + static_assert(A::B::y == 1); + static_assert(A::B::y == 2); +} // namespace Constrained + namespace Dependent { template struct A { From 47d9db762484afadeca1acb60534b6b88784464a Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 30 Oct 2024 18:51:16 +0000 Subject: [PATCH 41/69] [AArch64] Add asssembly/disassembly for FMOP4{A,S} (widening, 2-way, FP16 to FP32) instructions (#113346) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 3 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../fmop4as-fp16-fp32-widening-diagnostics.s | 243 ++++++++++++++++++ .../SME2p2/fmop4as-fp16-fp32-widening.s | 177 +++++++++++++ 4 files changed, 460 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index d77219fa7a305..7357aa3c1f0d5 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1020,6 +1020,9 @@ let Predicates = [HasSME2p2] in { defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a">; defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s">; + + defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 1c5ec09692456..867901ac5d903 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5528,3 +5528,40 @@ multiclass sme2_fmop4as_fp64_non_widening { // Multiple vectors def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>; } + +class sme2_fp16_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001001; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp16_fp32_widening { + // Single vectors + def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s new file mode 100644 index 0000000000000..457add20355e8 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s @@ -0,0 +1,243 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z0.h, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z0.h, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, z0.h, {z16.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za4.s, {z0.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.d}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, {z18.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// FMOP4S + +// Single vectors + +fmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z0.h, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z0.h, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, z0.h, {z16.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4s za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.d}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, {z18.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s new file mode 100644 index 0000000000000..d615fb85b4fd7 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s @@ -0,0 +1,177 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors +fmop4a za0.s, z0.h, z16.h // 10000001-00100000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x00,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200000 + +fmop4a za1.s, z10.h, z20.h // 10000001-00100100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x41,0x01,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240141 + +fmop4a za3.s, z14.h, z30.h // 10000001-00101110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xc3,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e01c3 + +// Single and multiple vectors + +fmop4a za0.s, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300000 + +fmop4a za1.s, z10.h, {z20.h-z21.h} // 10000001-00110100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.h, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x01,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340141 + +fmop4a za3.s, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e01c3 + +// Multiple and single vectors + +fmop4a za0.s, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x00,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200200 + +fmop4a za1.s, {z10.h-z11.h}, z20.h // 10000001-00100100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.h, z11.h }, z20.h +// CHECK-ENCODING: [0x41,0x03,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240341 + +fmop4a za3.s, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc3,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e03c3 + +// Multiple vectors + +fmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300200 + +fmop4a za1.s, {z10.h-z11.h}, {z20.h-z21.h} // 10000001-00110100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x03,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340341 + +fmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e03c3 + +// FMOP4S + +// Single vectors +fmop4s za0.s, z0.h, z16.h // 10000001-00100000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x10,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200010 + +fmop4s za1.s, z10.h, z20.h // 10000001-00100100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x51,0x01,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240151 + +fmop4s za3.s, z14.h, z30.h // 10000001-00101110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xd3,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e01d3 + +// Single and multiple vectors + +fmop4s za0.s, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300010 + +fmop4s za1.s, z10.h, {z20.h-z21.h} // 10000001-00110100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.h, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x01,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340151 + +fmop4s za3.s, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e01d3 + +// Multiple and single vectors + +fmop4s za0.s, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x10,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200210 + +fmop4s za1.s, {z10.h-z11.h}, z20.h // 10000001-00100100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.h, z11.h }, z20.h +// CHECK-ENCODING: [0x51,0x03,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240351 + +fmop4s za3.s, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd3,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e03d3 + +// Multiple vectors + +fmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300210 + +fmop4s za1.s, {z10.h-z11.h}, {z20.h-z21.h} // 10000001-00110100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x03,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340351 + +fmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e03d3 From 408c84f35b8b0338b630a6ee313c14238e62b5e6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 11:52:49 -0700 Subject: [PATCH 42/69] [RISCV] Add hasPostISelHook to sf.vfnrclip pseudo instructions. (#114274) Add Uses = [FRM] to the underlying MC instructions. Tweak a couple test cases so the MachineVerifier would have caught this. --- llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 5 +++-- llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_x_f_qf.ll | 4 +--- llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll | 4 +--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 81467ada00448..1ad3e1b681466 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -222,7 +222,8 @@ let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq", def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">; } -let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in { +let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf", + Uses = [FRM] in { def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">; def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">; } @@ -405,7 +406,7 @@ multiclass VPseudoSiFiveVFWMACC { multiclass VPseudoSiFiveVFNRCLIP { foreach i = 0-4 in - let hasSideEffects = 0 in + let hasSideEffects = 0, hasPostISelHook = 1 in defm "Pseudo" # NAME : VPseudoBinaryRoundingMode @llvm.riscv.sf.vfnrclip.x.f.qf.nxv1i8.nxv1f32.iXLen( define @intrinsic_sf_vfnrclip_x_f_qf_nxv1i8_nxv1f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_sf_vfnrclip_x_f_qf_nxv1i8_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: sf.vfnrclip.x.f.qf v9, v8, fa0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -24,7 +22,7 @@ entry: undef, %0, float %1, - iXLen 0, iXLen %2) + iXLen 7, iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll index dbcee311c6e35..dfb0ccd982e84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll @@ -13,10 +13,8 @@ declare @llvm.riscv.sf.vfnrclip.xu.f.qf.nxv1i8.nxv1f32.iXLen( define @intrinsic_sf_vfnrclip_xu_f_qf_nxv1i8_nxv1f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_sf_vfnrclip_xu_f_qf_nxv1i8_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: sf.vfnrclip.xu.f.qf v9, v8, fa0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -24,7 +22,7 @@ entry: undef, %0, float %1, - iXLen 0, iXLen %2) + iXLen 7, iXLen %2) ret %a } From b3bb6f18bb5b2b8756b585b80d46d13ab3636a18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Wed, 30 Oct 2024 19:56:25 +0100 Subject: [PATCH 43/69] [GlobalISel] Import samesign flag (#114267) Credits: https://github.com/llvm/llvm-project/pull/111419 Fixes icmp-flags.mir First attempt: https://github.com/llvm/llvm-project/pull/113090 Revert: https://github.com/llvm/llvm-project/pull/114256 --- .../CodeGen/GlobalISel/GenericMachineInstrs.h | 2 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 3 +- llvm/include/llvm/CodeGen/MachineInstr.h | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 9 +-- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 5 +- llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.h | 1 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 5 +- llvm/lib/CodeGen/MIRPrinter.cpp | 2 + llvm/lib/CodeGen/MachineInstr.cpp | 7 ++ .../CodeGen/AArch64/GlobalISel/icmp-flags.mir | 45 ++++++++++++ .../GlobalISel/irtranslater-samesign.ll | 69 +++++++++++++++++++ 12 files changed, 139 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index b6309a9ea0ec7..cd7ebcf54c9e1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -28,7 +28,7 @@ namespace llvm { class GenericMachineInstr : public MachineInstr { constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs; + FmNoNans | FmNoInfs | SameSign; public: GenericMachineInstr() = delete; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index c41e74ec7ebdc..14a641512a67d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1266,7 +1266,8 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + std::optional Flgs = std::nullopt); /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1 /// diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 3605173247463..ead6bbe1d5f64 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -119,6 +119,7 @@ class MachineInstr Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. + SameSign = 1 << 21 // Both operands have the same sign. }; private: diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5381dce58f9e6..a87754389cc8e 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -340,20 +340,17 @@ bool IRTranslator::translateCompare(const User &U, Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI->getPredicate(); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(*CI); if (CmpInst::isIntPredicate(Pred)) - MIRBuilder.buildICmp(Pred, Res, Op0, Op1); + MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); - else { - uint32_t Flags = 0; - if (CI) - Flags = MachineInstr::copyFlagsFromInstruction(*CI); + else MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); - } return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 59f2fc633f5de..15b9164247846 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -898,8 +898,9 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op, MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}); + const SrcOp &Op1, + std::optional Flags) { + return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 5a3806ce57335..1c450b05f49e9 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -216,6 +216,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("exact", MIToken::kw_exact) .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) + .Case("samesign", MIToken::kw_samesign) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 3931da3eaae1d..d7cd06759cfbb 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -77,6 +77,7 @@ struct MIToken { kw_unpredictable, kw_nneg, kw_disjoint, + kw_samesign, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 45847b5830da6..059814c70f828 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1476,7 +1476,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_noconvergent) || Token.is(MIToken::kw_unpredictable) || Token.is(MIToken::kw_nneg) || - Token.is(MIToken::kw_disjoint)) { + Token.is(MIToken::kw_disjoint) || + Token.is(MIToken::kw_samesign)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1513,6 +1514,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NonNeg; if (Token.is(MIToken::kw_disjoint)) Flags |= MachineInstr::Disjoint; + if (Token.is(MIToken::kw_samesign)) + Flags |= MachineInstr::SameSign; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index a015cd3c2a55f..658bbe0e577e5 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -837,6 +837,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "disjoint "; if (MI.getFlag(MachineInstr::NoUSWrap)) OS << "nusw "; + if (MI.getFlag(MachineInstr::SameSign)) + OS << "samesign "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index c1bd0bb5b7162..941861da5c569 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -596,6 +596,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::Disjoint; } + // Copy the samesign flag. + if (const ICmpInst *ICmp = dyn_cast(&I)) + if (ICmp->hasSameSign()) + MIFlags |= MachineInstr::MIFlag::SameSign; + // Copy the exact flag. if (const PossiblyExactOperator *PE = dyn_cast(&I)) if (PE->isExact()) @@ -1770,6 +1775,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::SameSign)) + OS << "samesign "; // Print the opcode name. if (TII) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir new file mode 100644 index 0000000000000..59e4de9440416 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=none -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: icmp_samesign +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: icmp_samesign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = samesign G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = samesign G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +... +--- +name: icmp_differentsign +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: icmp_differentsign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +--- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll new file mode 100644 index 0000000000000..0173f92c98220 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator < %s | FileCheck %s + + +define <2 x i1> @call_icmp_samesign_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_samesign_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: %2:_(<2 x s1>) = samesign G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT %2(<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp samesign ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define <2 x i1> @call_icmp_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s1>) = G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[ICMP]](<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define i1 @call_icmp(i32 %a) { + ; CHECK-LABEL: name: call_icmp + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp ult i32 %a, 3 + ret i1 %result +} + +define i1 @call_icmp_samesign(i32 %a) { + ; CHECK-LABEL: name: call_icmp_samesign + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %2:_(s1) = samesign G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %2(s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp samesign ult i32 %a, 3 + ret i1 %result +} From dc1ff883caf687f00bd916ea997321ac411c73fd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 30 Oct 2024 11:56:41 -0700 Subject: [PATCH 44/69] [libc][i386] define MINSIGSTKSZ & SIGSTKSZ (#114249) Link: #93709 --- libc/include/llvm-libc-macros/linux/signal-macros.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libc/include/llvm-libc-macros/linux/signal-macros.h b/libc/include/llvm-libc-macros/linux/signal-macros.h index e379fc41efd02..0b7317ebc9b80 100644 --- a/libc/include/llvm-libc-macros/linux/signal-macros.h +++ b/libc/include/llvm-libc-macros/linux/signal-macros.h @@ -76,15 +76,12 @@ #define SS_ONSTACK 0x1 #define SS_DISABLE 0x2 -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__i386__) || defined(__riscv) #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 #elif defined(__aarch64__) #define MINSIGSTKSZ 5120 #define SIGSTKSZ 16384 -#elif defined(__riscv) -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 #else #error "Signal stack sizes not defined for your platform." #endif From bc79ec0c5bc3fce31448419846c343017ae1c5ad Mon Sep 17 00:00:00 2001 From: Fred Tingaud <95592999+frederic-tingaud-sonarsource@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:57:09 +0100 Subject: [PATCH 45/69] [clang][ASTMatcher] Handle variable templates in `isInstantiated` and `isInTemplateInstantiation` matchers (#110666) Fix `isInstantiated` and `isInTemplateInstantiation` matchers, so they return true for instantiations of variable templates, and any declaration in statements contained in such instantiations. --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/ASTMatchers/ASTMatchers.h | 9 +++-- .../ASTMatchers/ASTMatchersNarrowingTest.cpp | 39 +++++++++++++++++++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1a179e63f902f..402203f89e23a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -745,6 +745,8 @@ AST Matchers - Fixed a crash when traverse lambda expr with invalid captures. (#GH106444) +- Fixed ``isInstantiated`` and ``isInTemplateInstantiation`` to also match for variable templates. (#GH110666) + - Ensure ``hasName`` matches template specializations across inline namespaces, making `matchesNodeFullSlow` and `matchesNodeFullFast` consistent. diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 54e484d41fb1c..c77140842d7a6 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -6750,7 +6750,8 @@ AST_POLYMORPHIC_MATCHER(isTemplateInstantiation, /// matches 'A(int) {...};' and 'A(unsigned) {...}'. AST_MATCHER_FUNCTION(internal::Matcher, isInstantiated) { auto IsInstantiation = decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), - functionDecl(isTemplateInstantiation()))); + functionDecl(isTemplateInstantiation()), + varDecl(isTemplateInstantiation()))); return decl(anyOf(IsInstantiation, hasAncestor(IsInstantiation))); } @@ -6769,9 +6770,9 @@ AST_MATCHER_FUNCTION(internal::Matcher, isInstantiated) { /// will NOT match j += 42; as it's shared between the template definition and /// instantiation. AST_MATCHER_FUNCTION(internal::Matcher, isInTemplateInstantiation) { - return stmt( - hasAncestor(decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), - functionDecl(isTemplateInstantiation()))))); + return stmt(hasAncestor(decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), + functionDecl(isTemplateInstantiation()), + varDecl(isTemplateInstantiation()))))); } /// Matches explicit template specializations of function, class, or diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp index d696375547acc..056b7c7b571ef 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp @@ -3342,6 +3342,45 @@ TEST_P(ASTMatchersTest, declStmt(isInTemplateInstantiation()))); } +TEST_P(ASTMatchersTest, IsInstantiated_MatchesVariableInstantiation) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(matches("template int V = 10; void x() { V; }", + varDecl(isInstantiated()))); +} + +TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesVariableDefinition) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(notMatches("template int V = 10;", + varDecl(isInstantiated()))); +} + +TEST_P(ASTMatchersTest, + IsInTemplateInstantiation_MatchesVariableInstantiationStmt) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(matches( + "template auto V = []() { T i; }; void x() { V(); }", + declStmt(isInTemplateInstantiation()))); +} + +TEST_P(ASTMatchersTest, + IsInTemplateInstantiation_NotMatchesVariableDefinitionStmt) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(notMatches("template auto V = []() { T i; };", + declStmt(isInTemplateInstantiation()))); +} + TEST_P(ASTMatchersTest, IsInTemplateInstantiation_Sharing) { if (!GetParam().isCXX()) { return; From d8295e2eeceef37bfd9e0f84918735eff6cfc659 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Wed, 30 Oct 2024 15:01:02 -0400 Subject: [PATCH 46/69] [SPIRV][HLSL] Handle arrays of resources (#111564) This commit adds the ability to get a particular resource from an array of resources using the handle_fromBinding intrinsic. The main changes are: 1. Create an array when generating the type. 2. Add capabilities from [SPV_EXT_descriptor_indexing](https://htmlpreview.github.io/?https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/EXT/SPV_EXT_descriptor_indexing.html). We are still missing the ability to declare a runtime array. That will be done in a follow up PR. --- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp | 43 ++++- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 43 ++++- llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 164 +++++++++++++++++- .../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 4 +- .../CombinedSamplerImageDynIdx.ll | 41 +++++ .../CombinedSamplerImageNonUniformIdx.ll | 48 +++++ .../{ => hlsl-resources}/HlslBufferLoad.ll | 0 .../InputAttachmentImageDynIdx.ll | 40 +++++ .../InputAttachmentImageNonUniformIdx.ll | 47 +++++ .../hlsl-resources/SampledImageDynIdx.ll | 66 +++++++ .../SampledImageNonUniformIdx.ll | 47 +++++ .../hlsl-resources/SamplerArrayDynIdx.ll | 39 +++++ .../SamplerArrayNonUniformIdx.ll | 46 +++++ .../hlsl-resources/StorageImageDynIdx.ll | 40 +++++ .../StorageImageNonUniformIdx.ll | 47 +++++ .../StorageTexelBufferDynIdx.ll | 40 +++++ .../StorageTexelBufferNonUniformIdx.ll | 47 +++++ .../UniformTexelBufferDynIdx.ll | 40 +++++ .../UniformTexelBufferNonUniformIdx.ll | 47 +++++ 19 files changed, 871 insertions(+), 18 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll rename llvm/test/CodeGen/SPIRV/{ => hlsl-resources}/HlslBufferLoad.ll (100%) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 64fde8bf67ab9..62bd8d1f9d243 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -713,21 +713,36 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( return Reg; } +static std::string GetSpirvImageTypeName(const SPIRVType *Type, + MachineIRBuilder &MIRBuilder, + const std::string &Prefix); + static std::string buildSpirvTypeName(const SPIRVType *Type, MachineIRBuilder &MIRBuilder) { switch (Type->getOpcode()) { + case SPIRV::OpTypeSampledImage: { + return GetSpirvImageTypeName(Type, MIRBuilder, "sampled_image_"); + } case SPIRV::OpTypeImage: { - Register SampledTypeReg = Type->getOperand(1).getReg(); - auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg); - std::string TypeName = - "image_" + buildSpirvTypeName(SampledType, MIRBuilder); - for (uint32_t I = 2; I < Type->getNumOperands(); ++I) { - TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str(); - } - return TypeName; + return GetSpirvImageTypeName(Type, MIRBuilder, "image_"); + } + case SPIRV::OpTypeArray: { + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + Register ElementTypeReg = Type->getOperand(1).getReg(); + auto *ElementType = MRI->getUniqueVRegDef(ElementTypeReg); + const SPIRVType *TypeInst = MRI->getVRegDef(Type->getOperand(2).getReg()); + assert(TypeInst->getOpcode() != SPIRV::OpConstantI); + MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg()); + assert(ImmInst->getOpcode() == TargetOpcode::G_CONSTANT); + uint32_t ArraySize = ImmInst->getOperand(1).getCImm()->getZExtValue(); + return (buildSpirvTypeName(ElementType, MIRBuilder) + Twine("[") + + Twine(ArraySize) + Twine("]")) + .str(); } case SPIRV::OpTypeFloat: return ("f" + Twine(Type->getOperand(1).getImm())).str(); + case SPIRV::OpTypeSampler: + return ("sampler"); case SPIRV::OpTypeInt: if (Type->getOperand(2).getImm()) return ("i" + Twine(Type->getOperand(1).getImm())).str(); @@ -737,6 +752,18 @@ static std::string buildSpirvTypeName(const SPIRVType *Type, } } +static std::string GetSpirvImageTypeName(const SPIRVType *Type, + MachineIRBuilder &MIRBuilder, + const std::string &Prefix) { + Register SampledTypeReg = Type->getOperand(1).getReg(); + auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg); + std::string TypeName = Prefix + buildSpirvTypeName(SampledType, MIRBuilder); + for (uint32_t I = 2; I < Type->getNumOperands(); ++I) { + TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str(); + } + return TypeName; +} + Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding( const SPIRVType *VarType, uint32_t Set, uint32_t Binding, MachineIRBuilder &MIRBuilder) { diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 11ed7d660be09..526305d7ed28a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -260,6 +260,7 @@ class SPIRVInstructionSelector : public InstructionSelector { SPIRVType *SrcPtrTy) const; Register buildPointerToResource(const SPIRVType *ResType, uint32_t Set, uint32_t Binding, uint32_t ArraySize, + Register IndexReg, bool IsNonUniform, MachineIRBuilder MIRBuilder) const; }; @@ -2616,10 +2617,15 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg, uint32_t Set = foldImm(I.getOperand(2), MRI); uint32_t Binding = foldImm(I.getOperand(3), MRI); uint32_t ArraySize = foldImm(I.getOperand(4), MRI); + Register IndexReg = I.getOperand(5).getReg(); + bool IsNonUniform = ArraySize > 1 && foldImm(I.getOperand(6), MRI); MachineIRBuilder MIRBuilder(I); - Register VarReg = - buildPointerToResource(ResType, Set, Binding, ArraySize, MIRBuilder); + Register VarReg = buildPointerToResource(ResType, Set, Binding, ArraySize, + IndexReg, IsNonUniform, MIRBuilder); + + if (IsNonUniform) + buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {}); // TODO: For now we assume the resource is an image, which needs to be // loaded to get the handle. That will not be true for storage buffers. @@ -2631,10 +2637,35 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg, Register SPIRVInstructionSelector::buildPointerToResource( const SPIRVType *ResType, uint32_t Set, uint32_t Binding, - uint32_t ArraySize, MachineIRBuilder MIRBuilder) const { - assert(ArraySize == 1 && "Resource arrays are not implemented yet."); - return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding, - MIRBuilder); + uint32_t ArraySize, Register IndexReg, bool IsNonUniform, + MachineIRBuilder MIRBuilder) const { + if (ArraySize == 1) + return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding, + MIRBuilder); + + const SPIRVType *VarType = GR.getOrCreateSPIRVArrayType( + ResType, ArraySize, *MIRBuilder.getInsertPt(), TII); + Register VarReg = GR.getOrCreateGlobalVariableWithBinding( + VarType, Set, Binding, MIRBuilder); + + SPIRVType *ResPointerType = GR.getOrCreateSPIRVPointerType( + ResType, MIRBuilder, SPIRV::StorageClass::UniformConstant); + + Register AcReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass); + if (IsNonUniform) { + // It is unclear which value needs to be marked an non-uniform, so both + // the index and the access changed are decorated as non-uniform. + buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); + buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); + } + + MIRBuilder.buildInstr(SPIRV::OpAccessChain) + .addDef(AcReg) + .addUse(GR.getSPIRVTypeID(ResPointerType)) + .addUse(VarReg) + .addUse(IndexReg); + + return AcReg; } bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg, diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index db5463f5c7abb..29ce60d9983e3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -689,11 +689,31 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan( const SPIRVSubtarget &ST) { addAvailableCaps({Capability::Shader, Capability::Linkage}); - // Provided by all supported Vulkan versions. + // Core in Vulkan 1.1 and earlier. addAvailableCaps({Capability::Int16, Capability::Int64, Capability::Float16, Capability::Float64, Capability::GroupNonUniform, Capability::Image1D, Capability::SampledBuffer, - Capability::ImageBuffer}); + Capability::ImageBuffer, + Capability::UniformBufferArrayDynamicIndexing, + Capability::SampledImageArrayDynamicIndexing, + Capability::StorageBufferArrayDynamicIndexing, + Capability::StorageImageArrayDynamicIndexing}); + + // Became core in Vulkan 1.2 + if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) { + addAvailableCaps( + {Capability::ShaderNonUniformEXT, Capability::RuntimeDescriptorArrayEXT, + Capability::InputAttachmentArrayDynamicIndexingEXT, + Capability::UniformTexelBufferArrayDynamicIndexingEXT, + Capability::StorageTexelBufferArrayDynamicIndexingEXT, + Capability::UniformBufferArrayNonUniformIndexingEXT, + Capability::SampledImageArrayNonUniformIndexingEXT, + Capability::StorageBufferArrayNonUniformIndexingEXT, + Capability::StorageImageArrayNonUniformIndexingEXT, + Capability::InputAttachmentArrayNonUniformIndexingEXT, + Capability::UniformTexelBufferArrayNonUniformIndexingEXT, + Capability::StorageTexelBufferArrayNonUniformIndexingEXT}); + } } } // namespace SPIRV @@ -729,6 +749,8 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex, Dec == SPIRV::Decoration::ImplementInRegisterMapINTEL) { Reqs.addExtension( SPIRV::Extension::SPV_INTEL_global_variable_fpga_decorations); + } else if (Dec == SPIRV::Decoration::NonUniformEXT) { + Reqs.addRequirements(SPIRV::Capability::ShaderNonUniformEXT); } } @@ -848,6 +870,136 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI, } } +bool isUniformTexelBuffer(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 1; +} + +bool isStorageTexelBuffer(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 2; +} + +bool isSampledImage(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 1; +} + +bool isInputAttachment(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_SubpassData && Sampled == 2; +} + +bool isStorageImage(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 2; +} + +bool isCombinedImageSampler(MachineInstr *SampledImageInst) { + if (SampledImageInst->getOpcode() != SPIRV::OpTypeSampledImage) + return false; + + const MachineRegisterInfo &MRI = SampledImageInst->getMF()->getRegInfo(); + Register ImageReg = SampledImageInst->getOperand(1).getReg(); + auto *ImageInst = MRI.getUniqueVRegDef(ImageReg); + return isSampledImage(ImageInst); +} + +bool hasNonUniformDecoration(Register Reg, const MachineRegisterInfo &MRI) { + for (const auto &MI : MRI.reg_instructions(Reg)) { + if (MI.getOpcode() != SPIRV::OpDecorate) + continue; + + uint32_t Dec = MI.getOperand(1).getImm(); + if (Dec == SPIRV::Decoration::NonUniformEXT) + return true; + } + return false; +} + +void addOpAccessChainReqs(const MachineInstr &Instr, + SPIRV::RequirementHandler &Handler, + const SPIRVSubtarget &Subtarget) { + const MachineRegisterInfo &MRI = Instr.getMF()->getRegInfo(); + // Get the result type. If it is an image type, then the shader uses + // descriptor indexing. The appropriate capabilities will be added based + // on the specifics of the image. + Register ResTypeReg = Instr.getOperand(1).getReg(); + MachineInstr *ResTypeInst = MRI.getUniqueVRegDef(ResTypeReg); + + assert(ResTypeInst->getOpcode() == SPIRV::OpTypePointer); + uint32_t StorageClass = ResTypeInst->getOperand(1).getImm(); + if (StorageClass != SPIRV::StorageClass::StorageClass::UniformConstant && + StorageClass != SPIRV::StorageClass::StorageClass::Uniform && + StorageClass != SPIRV::StorageClass::StorageClass::StorageBuffer) { + return; + } + + Register PointeeTypeReg = ResTypeInst->getOperand(2).getReg(); + MachineInstr *PointeeType = MRI.getUniqueVRegDef(PointeeTypeReg); + if (PointeeType->getOpcode() != SPIRV::OpTypeImage && + PointeeType->getOpcode() != SPIRV::OpTypeSampledImage && + PointeeType->getOpcode() != SPIRV::OpTypeSampler) { + return; + } + + bool IsNonUniform = + hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI); + if (isUniformTexelBuffer(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::UniformTexelBufferArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::UniformTexelBufferArrayDynamicIndexingEXT); + } else if (isInputAttachment(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::InputAttachmentArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::InputAttachmentArrayDynamicIndexingEXT); + } else if (isStorageTexelBuffer(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::StorageTexelBufferArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::StorageTexelBufferArrayDynamicIndexingEXT); + } else if (isSampledImage(PointeeType) || + isCombinedImageSampler(PointeeType) || + PointeeType->getOpcode() == SPIRV::OpTypeSampler) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::SampledImageArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::SampledImageArrayDynamicIndexing); + } else if (isStorageImage(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::StorageImageArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::StorageImageArrayDynamicIndexing); + } +} + void addInstrRequirements(const MachineInstr &MI, SPIRV::RequirementHandler &Reqs, const SPIRVSubtarget &ST) { @@ -967,11 +1119,17 @@ void addInstrRequirements(const MachineInstr &MI, case SPIRV::OpConstantSampler: Reqs.addCapability(SPIRV::Capability::LiteralSampler); break; + case SPIRV::OpInBoundsAccessChain: + case SPIRV::OpAccessChain: + addOpAccessChainReqs(MI, Reqs, ST); + break; case SPIRV::OpTypeImage: addOpTypeImageReqs(MI, Reqs, ST); break; case SPIRV::OpTypeSampler: - Reqs.addCapability(SPIRV::Capability::ImageBasic); + if (!ST.isVulkanEnv()) { + Reqs.addCapability(SPIRV::Capability::ImageBasic); + } break; case SPIRV::OpTypeForwardPointer: // TODO: check if it's OpenCL's kernel. diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 13ad1eb8e8b33..d63438baca7e7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -355,7 +355,9 @@ defm GeometryPointSize : CapabilityOperand<24, 0, 0, [], [Geometry]>; defm ImageGatherExtended : CapabilityOperand<25, 0, 0, [], [Shader]>; defm StorageImageMultisample : CapabilityOperand<27, 0, 0, [], [Shader]>; defm UniformBufferArrayDynamicIndexing : CapabilityOperand<28, 0, 0, [], [Shader]>; -defm SampledImageArrayDymnamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>; +defm SampledImageArrayDynamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>; +defm StorageBufferArrayDynamicIndexing : CapabilityOperand<30, 0, 0, [], [Shader]>; +defm StorageImageArrayDynamicIndexing : CapabilityOperand<31, 0, 0, [], [Shader]>; defm ClipDistance : CapabilityOperand<32, 0, 0, [], [Shader]>; defm CullDistance : CapabilityOperand<33, 0, 0, [], [Shader]>; defm SampleRateShading : CapabilityOperand<35, 0, 0, [], [Shader]>; diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll new file mode 100644 index 0000000000000..d5e95c7824144 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll @@ -0,0 +1,41 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] + %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] + %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll new file mode 100644 index 0000000000000..68bf3478fa9af --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll @@ -0,0 +1,48 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK: OpCapability ShaderNonUniform +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0:%[0-9]+]] = OpLoad [[CombindedType]] [[ac0]] + %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[CombindedType]] [[ac1]] + %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/HlslBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/HlslBufferLoad.ll similarity index 100% rename from llvm/test/CodeGen/SPIRV/HlslBufferLoad.ll rename to llvm/test/CodeGen/SPIRV/hlsl-resources/HlslBufferLoad.ll diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll new file mode 100644 index 0000000000000..39fdc866af7ff --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability InputAttachmentArrayDynamicIndexing +; SCHECK-NEXT: OpCapability InputAttachment +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll new file mode 100644 index 0000000000000..b05b7eb885b42 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability InputAttachmentArrayNonUniformIndexing +; SCHECK-NEXT: OpCapability InputAttachment +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll new file mode 100644 index 0000000000000..0c47eeb606e80 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll @@ -0,0 +1,66 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK-DAG: OpDecorate [[OtherVar:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[OtherVar]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK-DAG: [[OtherArraySize:%[0-9]+]] = OpConstant [[int]] 5 +; CHECK-DAG: [[OtherBufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[OtherArraySize]] +; CHECK-DAG: [[OtherArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[OtherBufferArrayType]] +; CHECK-DAG: [[OtherVar]] = OpVariable [[OtherArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @DifferentArraySizesAreDifferentVariables() #0 { +; Make sure we use different variables when the array sizes are different +; same in case one function calls the other. +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[OtherVar]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 5, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll new file mode 100644 index 0000000000000..ec94a8eeac2e4 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll new file mode 100644 index 0000000000000..9371a792f84b2 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll @@ -0,0 +1,39 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler +; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] + %buffer0 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] + %buffer1 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll new file mode 100644 index 0000000000000..151c4aa6d4365 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll @@ -0,0 +1,46 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: ShaderNonUniform +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler +; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[SamplerType]] [[ac0]] + %buffer0 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[SamplerType]] [[ac1]] + %buffer1 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll new file mode 100644 index 0000000000000..908a81777a04a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability StorageImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Image1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll new file mode 100644 index 0000000000000..4a582b31d60f1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability StorageImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Image1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll new file mode 100644 index 0000000000000..d144dcf505fa1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability ImageBuffer +; CHECK-NEXT: OpCapability StorageTexelBufferArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @void() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll new file mode 100644 index 0000000000000..2f96eda4518f0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability ImageBuffer +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability StorageTexelBufferArrayNonUniformIndexingEXT +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll new file mode 100644 index 0000000000000..117363241bd96 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability SampledBuffer +; CHECK-NEXT: OpCapability UniformTexelBufferArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll new file mode 100644 index 0000000000000..cec16a8e7c8b4 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability SampledBuffer +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability UniformTexelBufferArrayNonUniformIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } From b03c8c4fdda6e58cb1afe3aa90bf9f2df08a7970 Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Wed, 30 Oct 2024 13:28:32 -0600 Subject: [PATCH 47/69] libc: strlcpy/strlcat shouldn't bzero the rest of `buf` (#114259) When running Bionic's testsuite over llvm-libc, tests broke because e.g., ``` const char *str = "abc"; char buf[7]{"111111"}; strlcpy(buf, str, 7); ASSERT_EQ(buf, {'1', '1', '1', '\0', '\0', '\0', '\0'}); ``` On my machine (Debian w/ glibc and clang-16), a `printf` loop over `buf` gets unrolled into a series of const `printf` at compile-time: ``` printf("%d\n", '1'); printf("%d\n", '1'); printf("%d\n", '1'); printf("%d\n", 0); printf("%d\n", '1'); printf("%d\n", '1'); printf("%d\n", 0); ``` Seems best to match existing precedent here. --- libc/src/string/string_utils.h | 2 +- libc/test/src/string/strlcat_test.cpp | 9 +++++++++ libc/test/src/string/strlcpy_test.cpp | 3 +-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 78381e46e480d..240b28f15718a 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -221,7 +221,7 @@ LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src, return len; size_t n = len < size - 1 ? len : size - 1; inline_memcpy(dst, src, n); - inline_bzero(dst + n, size - n); + dst[n] = '\0'; return len; } diff --git a/libc/test/src/string/strlcat_test.cpp b/libc/test/src/string/strlcat_test.cpp index 1ffa4b0e921e2..5757fc92b39d2 100644 --- a/libc/test/src/string/strlcat_test.cpp +++ b/libc/test/src/string/strlcat_test.cpp @@ -27,6 +27,15 @@ TEST(LlvmLibcStrlcatTest, Smaller) { EXPECT_STREQ(buf, "abcd"); } +TEST(LlvmLibcStrlcatTest, SmallerNoOverwriteAfter0) { + const char *str = "cd"; + char buf[8]{"ab\0\0efg"}; + + EXPECT_EQ(LIBC_NAMESPACE::strlcat(buf, str, 8), size_t(4)); + EXPECT_STREQ(buf, "abcd"); + EXPECT_STREQ(buf + 5, "fg"); +} + TEST(LlvmLibcStrlcatTest, No0) { const char *str = "cd"; char buf[7]{"ab"}; diff --git a/libc/test/src/string/strlcpy_test.cpp b/libc/test/src/string/strlcpy_test.cpp index 5a1e30c12963f..ecf0e925a265c 100644 --- a/libc/test/src/string/strlcpy_test.cpp +++ b/libc/test/src/string/strlcpy_test.cpp @@ -25,6 +25,5 @@ TEST(LlvmLibcStrlcpyTest, Smaller) { EXPECT_EQ(LIBC_NAMESPACE::strlcpy(buf, str, 7), size_t(3)); EXPECT_STREQ(buf, "abc"); - for (const char *p = buf + 3; p < buf + 7; p++) - EXPECT_EQ(*p, '\0'); + EXPECT_STREQ(buf + 4, "11"); } From e4dfb51da4cd16cbb3ab18944a43ff5518d9f548 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 30 Oct 2024 15:30:34 -0400 Subject: [PATCH 48/69] Fix documentation build This fixes the build after the removal of the clang-format status page. --- clang/docs/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/docs/index.rst b/clang/docs/index.rst index 1096432813fac..66a4540a0bcac 100644 --- a/clang/docs/index.rst +++ b/clang/docs/index.rst @@ -93,7 +93,6 @@ Using Clang Tools ClangCheck ClangFormat ClangFormatStyleOptions - ClangFormattedStatus ClangLinkerWrapper ClangNVLinkWrapper ClangOffloadBundler From d2109640a3e352b49a698edc232eeaac648fe590 Mon Sep 17 00:00:00 2001 From: Ilya Enkovich Date: Wed, 30 Oct 2024 14:41:28 -0500 Subject: [PATCH 49/69] [MLIR] [AMX] Fix strides used by AMX lowering for tile loads and stores. (#113476) --- .../AMX/Transforms/LegalizeForLLVMExport.cpp | 62 +++++++++---------- mlir/test/Dialect/AMX/legalize-for-llvm.mlir | 28 +++++++++ 2 files changed, 58 insertions(+), 32 deletions(-) diff --git a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp index c8cfcc3d945be..46c7bfbf3ffcc 100644 --- a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp @@ -37,40 +37,38 @@ std::pair getTileSizes(ConversionPatternRewriter &rewriter, rewriter.create(loc, llvmInt16Type, nattr)); } -/// Verifies if the stride matches proper tile access. -LogicalResult verifyStride(MemRefType mType) { - if (mType.getRank() < 2) - return failure(); - int64_t last = mType.getRank() - 1; - int64_t offset; - SmallVector strides; - if (failed(getStridesAndOffset(mType, strides, offset)) || strides[last] != 1) - return failure(); - return success(); -} - /// Maps the 2-dim memref shape to the 64-bit stride. Note that the buffer /// shape may "envelop" the actual tile shape, and may be dynamically sized. -Value getStride(ConversionPatternRewriter &rewriter, - const LLVMTypeConverter &typeConverter, MemRefType mType, - Value base, Location loc) { - assert(mType.getRank() >= 2); - int64_t last = mType.getRank() - 1; +/// Returns failure if proper stride couldn't be found. +FailureOr getStride(ConversionPatternRewriter &rewriter, + const LLVMTypeConverter &typeConverter, + MemRefType mType, Value base, Location loc) { + if (mType.getRank() < 2) + return failure(); + int64_t preLast = mType.getRank() - 2; Type llvmInt64Type = IntegerType::get(&typeConverter.getContext(), 64); unsigned width = mType.getElementType().getIntOrFloatBitWidth(); assert(llvm::isPowerOf2_64(width) && width >= 8); unsigned bytes = width >> 3; - if (mType.isDynamicDim(last)) { - // Dynamic size needs code to compute the stride at runtime. + int64_t offset; + SmallVector strides; + if (failed(getStridesAndOffset(mType, strides, offset)) || + strides.back() != 1) + return failure(); + if (strides[preLast] == ShapedType::kDynamic) { + // Dynamic stride needs code to compute the stride at runtime. MemRefDescriptor memrefDescriptor(base); auto attr = rewriter.getI64IntegerAttr(bytes); Value scale = rewriter.create(loc, llvmInt64Type, attr); - return rewriter.create( - loc, llvmInt64Type, scale, memrefDescriptor.size(rewriter, loc, last)); + return rewriter + .create(loc, llvmInt64Type, scale, + memrefDescriptor.stride(rewriter, loc, preLast)) + .getResult(); } - // Use direct constant for static size. - auto attr = rewriter.getI64IntegerAttr(mType.getDimSize(last) * bytes); - return rewriter.create(loc, llvmInt64Type, attr); + // Use direct constant for static stride. + auto attr = rewriter.getI64IntegerAttr(strides[preLast] * bytes); + return rewriter.create(loc, llvmInt64Type, attr) + .getResult(); } struct TileZeroConversion : public ConvertOpToLLVMPattern { @@ -102,16 +100,16 @@ struct TileLoadConversion : public ConvertOpToLLVMPattern { std::pair tsz = getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc()); // Determine stride. - if (failed(verifyStride(mType))) + auto stride = getStride(rewriter, *getTypeConverter(), mType, + adaptor.getBase(), op.getLoc()); + if (failed(stride)) return failure(); - Value stride = getStride(rewriter, *getTypeConverter(), mType, - adaptor.getBase(), op.getLoc()); // Replace operation with intrinsic. Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(), adaptor.getIndices(), rewriter); Type resType = typeConverter->convertType(vType); rewriter.replaceOpWithNewOp( - op, resType, tsz.first, tsz.second, ptr, stride); + op, resType, tsz.first, tsz.second, ptr, stride.value()); return success(); } }; @@ -128,15 +126,15 @@ struct TileStoreConversion : public ConvertOpToLLVMPattern { std::pair tsz = getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc()); // Determine stride. - if (failed(verifyStride(mType))) + auto stride = getStride(rewriter, *getTypeConverter(), mType, + adaptor.getBase(), op.getLoc()); + if (failed(stride)) return failure(); - Value stride = getStride(rewriter, *getTypeConverter(), mType, - adaptor.getBase(), op.getLoc()); // Replace operation with intrinsic. Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(), adaptor.getIndices(), rewriter); rewriter.replaceOpWithNewOp( - op, tsz.first, tsz.second, ptr, stride, adaptor.getVal()); + op, tsz.first, tsz.second, ptr, stride.value(), adaptor.getVal()); return success(); } }; diff --git a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir index 992203153939f..3cacbd0044f82 100644 --- a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir +++ b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir @@ -43,3 +43,31 @@ func.func @mulf(%arg0: memref, %arg1: memref) { amx.tile_store %arg1[%0, %0], %4 : memref, vector<16x16xf32> return } + +// CHECK-LABEL: strides( +// CHECK: %[[CST_64_1:.+]] = llvm.mlir.constant(64 : i64) : i64 +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_1]] +// CHECK: %[[CST_128_1:.+]] = llvm.mlir.constant(128 : i64) : i64 +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_1]] +// CHECK: llvm.mlir.constant(2 : i64) : i64 +// CHECK: llvm.extractvalue %{{.+}}[4, 0] +// CHECK: %[[STRIDE_1:.+]] = llvm.mul +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_1]] +// CHECK: %[[CST_64_2:.+]] = llvm.mlir.constant(64 : i64) : i64 +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_2]] +// CHECK: %[[CST_128_2:.+]] = llvm.mlir.constant(128 : i64) : i64 +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_2]] +// CHECK: llvm.mlir.constant(2 : i64) : i64 +// CHECK: llvm.extractvalue %{{.+}}[4, 0] +// CHECK: %[[STRIDE_2:.+]] = llvm.mul +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_2]] +func.func @strides(%arg0: memref<16x32xbf16>, %arg1: memref<16x32xbf16, strided<[64, 1]>>, %arg2: memref<16x32xbf16, strided<[?, 1]>>) { + %0 = arith.constant 0 : index + %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into vector<16x32xbf16> + %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16, strided<[64, 1]>> into vector<16x32xbf16> + %3 = amx.tile_load %arg2[%0, %0] : memref<16x32xbf16, strided<[?, 1]>> into vector<16x32xbf16> + amx.tile_store %arg0[%0, %0], %3 : memref<16x32xbf16>, vector<16x32xbf16> + amx.tile_store %arg1[%0, %0], %1 : memref<16x32xbf16, strided<[64, 1]>>, vector<16x32xbf16> + amx.tile_store %arg2[%0, %0], %2 : memref<16x32xbf16, strided<[?, 1]>>, vector<16x32xbf16> + return +} From c616f24bcb00150fedc999d47933603e099dd659 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Wed, 30 Oct 2024 20:49:21 +0100 Subject: [PATCH 50/69] [SPIR-V] Do instruction selection for G_BITCAST on an earlier stage (#114216) This PR implements instruction selection for G_BITCAST on an earlier stage to avoid MachineVerifier complains on subtle semantics difference between G_BITCAST and OpBitcast. We do instruction selections for OpBitcast after IR Translation instead of calling MIB.buildBitcast() generating the general op code G_BITCAST, because when MachineVerifier validates G_BITCAST we see a check of a kind: 'if Source Type is equal to Destination Type then report error "bitcast must change the type"'. This doesn't take into account the notion of a typed pointer that is important for SPIR-V where a user may and should use bitcast between pointers with different pointee types (https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast). It's important for correct lowering in SPIR-V, because interpretation of the data type is not left to instructions that utilize the pointer, but encoded by the pointer declaration, and the SPIRV target can and must handle the declaration and use of pointers that specify the type of data they point to. It's not feasible to improve validation of G_BITCAST using just information provided by low level types of source and destination. Therefore we don't produce G_BITCAST as the general op code with semantics different from OpBitcast, but rather lower to OpBitcast immediately. See discussion in https://github.com/llvm/llvm-project/pull/110270 for even more context. --- llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp | 61 ++++++++++++++++--- .../pointers/phi-valid-operand-types-rev.ll | 5 +- .../SPIRV/pointers/phi-valid-operand-types.ll | 5 +- 3 files changed, 54 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 3c2af34dd5523..cc34cf877dea9 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -165,6 +165,57 @@ static MachineInstr *findAssignTypeInstr(Register Reg, return nullptr; } +static void buildOpBitcast(SPIRVGlobalRegistry *GR, MachineIRBuilder &MIB, + Register ResVReg, Register OpReg) { + SPIRVType *ResType = GR->getSPIRVTypeForVReg(ResVReg); + SPIRVType *OpType = GR->getSPIRVTypeForVReg(OpReg); + assert(ResType && OpType && "Operand types are expected"); + if (!GR->isBitcastCompatible(ResType, OpType)) + report_fatal_error("incompatible result and operand types in a bitcast"); + MachineRegisterInfo *MRI = MIB.getMRI(); + if (!MRI->getRegClassOrNull(ResVReg)) + MRI->setRegClass(ResVReg, GR->getRegClass(ResType)); + MIB.buildInstr(SPIRV::OpBitcast) + .addDef(ResVReg) + .addUse(GR->getSPIRVTypeID(ResType)) + .addUse(OpReg); +} + +// We do instruction selections early instead of calling MIB.buildBitcast() +// generating the general op code G_BITCAST. When MachineVerifier validates +// G_BITCAST we see a check of a kind: if Source Type is equal to Destination +// Type then report error "bitcast must change the type". This doesn't take into +// account the notion of a typed pointer that is important for SPIR-V where a +// user may and should use bitcast between pointers with different pointee types +// (https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast). +// It's important for correct lowering in SPIR-V, because interpretation of the +// data type is not left to instructions that utilize the pointer, but encoded +// by the pointer declaration, and the SPIRV target can and must handle the +// declaration and use of pointers that specify the type of data they point to. +// It's not feasible to improve validation of G_BITCAST using just information +// provided by low level types of source and destination. Therefore we don't +// produce G_BITCAST as the general op code with semantics different from +// OpBitcast, but rather lower to OpBitcast immediately. As for now, the only +// difference would be that CombinerHelper couldn't transform known patterns +// around G_BUILD_VECTOR. See discussion +// in https://github.com/llvm/llvm-project/pull/110270 for even more context. +static void selectOpBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + SmallVector ToErase; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() != TargetOpcode::G_BITCAST) + continue; + MIB.setInsertPt(*MI.getParent(), MI); + buildOpBitcast(GR, MIB, MI.getOperand(0).getReg(), + MI.getOperand(1).getReg()); + ToErase.push_back(&MI); + } + } + for (MachineInstr *MI : ToErase) + MI->eraseFromParent(); +} + static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineIRBuilder MIB) { // Get access to information about available extensions @@ -202,15 +253,6 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, } else { GR->assignSPIRVTypeToVReg(AssignedPtrType, Def, MF); MIB.buildBitcast(Def, Source); - // MachineVerifier requires that bitcast must change the type. - // Change AddressSpace if needed to hint that Def and Source points to - // different types: this doesn't change actual code generation. - LLT DefType = MRI->getType(Def); - if (DefType == MRI->getType(Source)) - MRI->setType(Def, - LLT::pointer((DefType.getAddressSpace() + 1) % - SPIRVSubtarget::MaxLegalAddressSpace, - GR->getPointerSize())); } } } @@ -1007,6 +1049,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { removeImplicitFallthroughs(MF, MIB); insertSpirvDecorations(MF, MIB); insertInlineAsm(MF, GR, ST, MIB); + selectOpBitcasts(MF, GR, MIB); return true; } diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll index 6fa3f4e53cc59..8d14c3a359963 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll @@ -1,7 +1,4 @@ -; The goal of the test case is to ensure that OpPhi is consistent with respect to operand types. -; -verify-machineinstrs is not available due to mutually exclusive requirements for G_BITCAST and G_PHI. - -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Char:]] = OpTypeInt 8 0 diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll index 4fbaae2556730..07824d4ed6cd8 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll @@ -1,7 +1,4 @@ -; The goal of the test case is to ensure that OpPhi is consistent with respect to operand types. -; -verify-machineinstrs is not available due to mutually exclusive requirements for G_BITCAST and G_PHI. - -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Char:]] = OpTypeInt 8 0 From b1320d36339e38b073088fd45013a3c692adb301 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 30 Oct 2024 12:59:59 -0700 Subject: [PATCH 51/69] [libc][i386] setjmp/longjmp (#112437) Link: #93709 --- libc/include/llvm-libc-types/jmp_buf.h | 7 +++++++ libc/src/setjmp/x86_64/longjmp.cpp | 25 ++++++++++++++++++++++- libc/src/setjmp/x86_64/setjmp.cpp | 28 +++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/libc/include/llvm-libc-types/jmp_buf.h b/libc/include/llvm-libc-types/jmp_buf.h index 60e033c6c65a9..f246e6491cf55 100644 --- a/libc/include/llvm-libc-types/jmp_buf.h +++ b/libc/include/llvm-libc-types/jmp_buf.h @@ -19,6 +19,13 @@ typedef struct { __UINT64_TYPE__ r15; __UINTPTR_TYPE__ rsp; __UINTPTR_TYPE__ rip; +#elif defined(__i386__) + long ebx; + long esi; + long edi; + long ebp; + long esp; + long eip; #elif defined(__riscv) /* Program counter. */ long int __pc; diff --git a/libc/src/setjmp/x86_64/longjmp.cpp b/libc/src/setjmp/x86_64/longjmp.cpp index c293c55a6f9fb..143c9deb11e9a 100644 --- a/libc/src/setjmp/x86_64/longjmp.cpp +++ b/libc/src/setjmp/x86_64/longjmp.cpp @@ -11,12 +11,34 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" -#if !defined(LIBC_TARGET_ARCH_IS_X86_64) +#if !defined(LIBC_TARGET_ARCH_IS_X86) #error "Invalid file include" #endif namespace LIBC_NAMESPACE_DECL { +#ifdef __i386__ +[[gnu::naked]] +LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { + asm(R"( + mov 0x4(%%esp), %%ecx + mov 0x8(%%esp), %%eax + cmpl $0x1, %%eax + adcl $0x0, %%eax + + mov %c[ebx](%%ecx), %%ebx + mov %c[esi](%%ecx), %%esi + mov %c[edi](%%ecx), %%edi + mov %c[ebp](%%ecx), %%ebp + mov %c[esp](%%ecx), %%esp + + jmp *%c[eip](%%ecx) + )" ::[ebx] "i"(offsetof(__jmp_buf, ebx)), + [esi] "i"(offsetof(__jmp_buf, esi)), [edi] "i"(offsetof(__jmp_buf, edi)), + [ebp] "i"(offsetof(__jmp_buf, ebp)), [esp] "i"(offsetof(__jmp_buf, esp)), + [eip] "i"(offsetof(__jmp_buf, eip))); +} +#else [[gnu::naked]] LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { asm(R"( @@ -38,5 +60,6 @@ LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { [r15] "i"(offsetof(__jmp_buf, r15)), [rsp] "i"(offsetof(__jmp_buf, rsp)), [rip] "i"(offsetof(__jmp_buf, rip))); } +#endif } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/setjmp/x86_64/setjmp.cpp b/libc/src/setjmp/x86_64/setjmp.cpp index f6e82642edd7d..5ac10fa87b39a 100644 --- a/libc/src/setjmp/x86_64/setjmp.cpp +++ b/libc/src/setjmp/x86_64/setjmp.cpp @@ -11,12 +11,37 @@ #include "src/__support/macros/config.h" #include "src/setjmp/setjmp_impl.h" -#if !defined(LIBC_TARGET_ARCH_IS_X86_64) +#if !defined(LIBC_TARGET_ARCH_IS_X86) #error "Invalid file include" #endif namespace LIBC_NAMESPACE_DECL { +#ifdef __i386__ +[[gnu::naked]] +LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { + asm(R"( + mov 4(%%esp), %%eax + + mov %%ebx, %c[ebx](%%eax) + mov %%esi, %c[esi](%%eax) + mov %%edi, %c[edi](%%eax) + mov %%ebp, %c[ebp](%%eax) + + lea 4(%%esp), %%ecx + mov %%ecx, %c[esp](%%eax) + + mov (%%esp), %%ecx + mov %%ecx, %c[eip](%%eax) + + xorl %%eax, %%eax + retl)" ::[ebx] "i"(offsetof(__jmp_buf, ebx)), + [esi] "i"(offsetof(__jmp_buf, esi)), [edi] "i"(offsetof(__jmp_buf, edi)), + [ebp] "i"(offsetof(__jmp_buf, ebp)), [esp] "i"(offsetof(__jmp_buf, esp)), + [eip] "i"(offsetof(__jmp_buf, eip)) + : "eax", "ecx"); +} +#else [[gnu::naked]] LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { asm(R"( @@ -41,5 +66,6 @@ LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { [rip] "i"(offsetof(__jmp_buf, rip)) : "rax"); } +#endif } // namespace LIBC_NAMESPACE_DECL From e89f8212333ea8e9b534fb32382bb5cacae71b35 Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Wed, 30 Oct 2024 13:05:40 -0700 Subject: [PATCH 52/69] [NFC][NVPTX] Cleanup getPreferredVectorAction() (#114115) `v2*16` is a legal type in NVPTX. Thus, this is dead code. --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index a95cba586b8fc..01abf9591e342 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1335,8 +1335,6 @@ NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) return TypeSplitVector; - if (Isv2x16VT(VT)) - return TypeLegal; return TargetLoweringBase::getPreferredVectorAction(VT); } From 0167a92e28d5c8eac00595300a1366bdce28678d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 13:06:58 -0700 Subject: [PATCH 53/69] [RISCV] Use unsigned instead of int64_t for two small positive shift amounts. NFC --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index dc3f8254cb4e0..6291842e071a3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -693,7 +693,7 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { // The constants that can be encoded in the THeadMemIdx instructions // are of the form (sign_extend(imm5) << imm2). - int64_t Shift; + unsigned Shift; for (Shift = 0; Shift < 4; Shift++) if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) break; @@ -3366,7 +3366,7 @@ bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2) { if (auto *C = dyn_cast(N)) { int64_t Offset = C->getSExtValue(); - int64_t Shift; + unsigned Shift; for (Shift = 0; Shift < 4; Shift++) if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) break; From 1c2824e3a44f6c7cfd3e236597c4af671ce7c95e Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Wed, 30 Oct 2024 20:23:20 +0000 Subject: [PATCH 54/69] [NFC][Coro] Add helpers for coro cloning with a TimeTraceScope (#112948) A helper (2 overloads) that consolidates corocloner creation and the actual cloning. The helpers create a TimeTraceScope to make it easier to see how long the cloning takes. Extracted from #109032 (commit 1) --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 58 +++++++++++++------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 0395ee62ae988..070df429bfc26 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Coroutines/ABI.h" #include "llvm/Transforms/Coroutines/CoroInstr.h" @@ -118,7 +119,6 @@ class CoroCloner { TargetTransformInfo &TTI; -public: /// Create a cloner for a switch lowering. CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, Kind FKind, TargetTransformInfo &TTI) @@ -140,6 +140,30 @@ class CoroCloner { assert(ActiveSuspend && "need active suspend point for continuation"); } +public: + /// Create a clone for a switch lowering. + static Function *createClone(Function &OrigF, const Twine &Suffix, + coro::Shape &Shape, Kind FKind, + TargetTransformInfo &TTI) { + TimeTraceScope FunctionScope("CoroCloner"); + + CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI); + Cloner.create(); + return Cloner.getFunction(); + } + + /// Create a clone for a continuation lowering. + static Function *createClone(Function &OrigF, const Twine &Suffix, + coro::Shape &Shape, Function *NewF, + AnyCoroSuspendInst *ActiveSuspend, + TargetTransformInfo &TTI) { + TimeTraceScope FunctionScope("CoroCloner"); + + CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI); + Cloner.create(); + return Cloner.getFunction(); + } + Function *getFunction() const { assert(NewF != nullptr && "declaration not yet set"); return NewF; @@ -1466,13 +1490,16 @@ struct SwitchCoroutineSplitter { TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Switch); + // Create a resume clone by cloning the body of the original function, + // setting new entry block and replacing coro.suspend an appropriate value + // to force resume or cleanup pass for every suspend point. createResumeEntryBlock(F, Shape); - auto *ResumeClone = - createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); - auto *DestroyClone = - createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); - auto *CleanupClone = - createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); + auto *ResumeClone = CoroCloner::createClone( + F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); + auto *DestroyClone = CoroCloner::createClone( + F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); + auto *CleanupClone = CoroCloner::createClone( + F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); @@ -1562,17 +1589,6 @@ struct SwitchCoroutineSplitter { } private: - // Create a resume clone by cloning the body of the original function, setting - // new entry block and replacing coro.suspend an appropriate value to force - // resume or cleanup pass for every suspend point. - static Function *createClone(Function &F, const Twine &Suffix, - coro::Shape &Shape, CoroCloner::Kind FKind, - TargetTransformInfo &TTI) { - CoroCloner Cloner(F, Suffix, Shape, FKind, TTI); - Cloner.create(); - return Cloner.getFunction(); - } - // Create an entry block for a resume function with a switch that will jump to // suspend points. static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { @@ -1872,7 +1888,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, auto *Suspend = Shape.CoroSuspends[Idx]; auto *Clone = Clones[Idx]; - CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create(); + CoroCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend, + TTI); } } @@ -2001,7 +2018,8 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, auto Suspend = Shape.CoroSuspends[i]; auto Clone = Clones[i]; - CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create(); + CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend, + TTI); } } From 84a78abdf5999e58e4120e20594ac2ad37472295 Mon Sep 17 00:00:00 2001 From: Artem Pianykh Date: Wed, 30 Oct 2024 20:23:43 +0000 Subject: [PATCH 55/69] [NFC][Utils] Extract CloneFunctionAttributesInto from CloneFunctionInto (#112976) This patch is a part of step-by-step refactoring of CloneFunctionInto. The goal is to extract reusable pieces out of it that will be later used to optimize function cloning e.g. in coroutine processing. Extracted from #109032 (commit 2) --- llvm/include/llvm/Transforms/Utils/Cloning.h | 8 +++ llvm/lib/Transforms/Utils/CloneFunction.cpp | 54 ++++++++++++-------- 2 files changed, 40 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index a4be24e32c527..1e8ef0102450e 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -175,6 +175,14 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's attributes into NewFunc, transforming values based on the +/// mappings in VMap. +void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 5dc82a8dfb2db..a2d38717f38d1 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -87,28 +87,14 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, return NewBB; } -// Clone OldFunc into NewFunc, transforming the old arguments into references to -// VMap values. -// -void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - CloneFunctionChangeType Changes, - SmallVectorImpl &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat); - assert(NameSuffix && "NameSuffix cannot be null!"); - -#ifndef NDEBUG - for (const Argument &I : OldFunc->args()) - assert(VMap.count(&I) && "No mapping from source argument specified!"); -#endif - - bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; - - // Copy all attributes other than those stored in the AttributeList. We need - // to remap the parameter indices of the AttributeList. +void llvm::CloneFunctionAttributesInto(Function *NewFunc, + const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + // Copy all attributes other than those stored in Function's AttributeList + // which holds e.g. parameters and return value attributes. AttributeList NewAttrs = NewFunc->getAttributes(); NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); @@ -140,6 +126,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) { if (Argument *NewArg = dyn_cast(VMap[&OldArg])) { + // Remap the parameter indices. NewArgAttrs[NewArg->getArgNo()] = OldAttrs.getParamAttrs(OldArg.getArgNo()); } @@ -148,6 +135,29 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, NewFunc->setAttributes( AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), NewArgAttrs)); +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + CloneFunctionChangeType Changes, + SmallVectorImpl &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat); + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); +#endif + + bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; + + CloneFunctionAttributesInto(NewFunc, OldFunc, VMap, ModuleLevelChanges, + TypeMapper, Materializer); // Everything else beyond this point deals with function instructions, // so if we are dealing with a function declaration, we're done. From bfe486fe764667d514124faf2b39afb7e7322640 Mon Sep 17 00:00:00 2001 From: Renaud Kauffmann Date: Wed, 30 Oct 2024 13:24:47 -0700 Subject: [PATCH 56/69] Passing descriptors by reference to CUDA runtime calls (#114288) Passing a descriptor as a `const Descriptor &` or a `const Descriptor *` generates a FIR signature where the box is passed by value. This is an issue, as it requires a load of the box to be passed. But since, ultimately, all boxes are passed by reference a temporary is generated in LLVM and the reference to the temporary is passed. The boxes addresses are registered with the CUDA runtime but the temporaries are not, thus preventing the runtime to properly map a host side address to its device side counterpart. To address this issue, this PR changes the signatures to the transfer functions to pass a descriptor as a `Descriptor *`, which will in turn generate a FIR signature with that takes a box reference as an argument. --- flang/include/flang/Runtime/CUDA/memory.h | 9 +++--- .../Optimizer/Transforms/CUFOpConversion.cpp | 11 +++----- flang/runtime/CUDA/memory.cpp | 9 +++--- flang/test/Fir/CUDA/cuda-data-transfer.fir | 28 ++++++++----------- 4 files changed, 23 insertions(+), 34 deletions(-) diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 3c3ae73d4ad7a..fb48152d70718 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -36,19 +36,18 @@ void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a pointer to a descriptor. -void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, +void RTDECL(CUFDataTransferDescPtr)(Descriptor *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a pointer. -void RTDECL(CUFDataTransferPtrDesc)(void *dst, const Descriptor &src, +void RTDECL(CUFDataTransferPtrDesc)(void *dst, Descriptor *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a descriptor. -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dst, - const Descriptor &src, unsigned mode, const char *sourceFile = nullptr, - int sourceLine = 0); +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dst, Descriptor *src, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index f1f3a95b220df..e3e441360e949 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -529,8 +529,8 @@ struct CUFDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); - mlir::Value dst = builder.loadIfRef(loc, op.getDst()); - mlir::Value src = builder.loadIfRef(loc, op.getSrc()); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, dst, src, modeValue, sourceFile, sourceLine)}; builder.create(loc, func, args); @@ -603,11 +603,8 @@ struct CUFDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(5)); - mlir::Value dst = - dstIsDesc ? builder.loadIfRef(loc, op.getDst()) : op.getDst(); - mlir::Value src = mlir::isa(srcTy) - ? builder.loadIfRef(loc, op.getSrc()) - : op.getSrc(); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{ fir::runtime::createArguments(builder, loc, fTy, dst, src, bytes, modeValue, sourceFile, sourceLine)}; diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index fc48b4343eea9..4778a4ae77683 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -73,23 +73,22 @@ void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); } -void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, +void RTDEF(CUFDataTransferDescPtr)(Descriptor *desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a pointer to a descriptor"); } -void RTDEF(CUFDataTransferPtrDesc)(void *addr, const Descriptor &desc, +void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a descriptor to a pointer"); } -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dstDesc, - const Descriptor &srcDesc, unsigned mode, const char *sourceFile, - int sourceLine) { +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, + unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer between two descriptors"); diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index c33c50115b9fc..b99e09fb76468 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -15,11 +15,9 @@ func.func @_QPsub1() { // CHECK-LABEL: func.func @_QPsub1() // CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eadev"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eahost"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) -// CHECK: %[[AHOST_LOAD:.*]] = fir.load %[[AHOST]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.box, i32, !fir.ref, i32) -> none +// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> none func.func @_QPsub2() { %0 = cuf.alloc !fir.box>> {bindc_name = "adev", data_attr = #cuf.cuda, uniq_name = "_QFsub2Eadev"} -> !fir.ref>>> @@ -76,19 +74,17 @@ func.func @_QPsub4() { // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { %0 = fir.dummy_scope : !fir.dscope @@ -122,19 +118,17 @@ func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub6() { %0 = cuf.alloc i32 {bindc_name = "idev", data_attr = #cuf.cuda, uniq_name = "_QFsub6Eidev"} -> !fir.ref From f7c36d2f88e05a1747fa7916ad2fefdd9d459a55 Mon Sep 17 00:00:00 2001 From: Wanyi Date: Wed, 30 Oct 2024 17:00:40 -0400 Subject: [PATCH 57/69] [lldb] Fix API test for file redirection to existing files (#114119) API test failed for remote platform in [#112657](https://github.com/llvm/llvm-project/pull/112657) Previously when putting files onto remote platform, I used `platform file write -d ` which actually required a `platform file open ` first in order to obtain a file descriptor. eg. in file [TestGDBRemotePlatformFile.py](https://github.com/llvm/llvm-project/blob/94e7d9c0bfe517507ea08b00fb00c32fb2837a82/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemotePlatformFile.py#L24-L32) To fix this, use the `platform put-file` method, which is used in the `redirect_stdin` from this test already. --- .../python_api/process/io/TestProcessIO.py | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/lldb/test/API/python_api/process/io/TestProcessIO.py b/lldb/test/API/python_api/process/io/TestProcessIO.py index 3b5c7c48c51f4..5d9727add399b 100644 --- a/lldb/test/API/python_api/process/io/TestProcessIO.py +++ b/lldb/test/API/python_api/process/io/TestProcessIO.py @@ -99,31 +99,38 @@ def test_stdout_stderr_redirection(self): @expectedFlakeyLinux(bugnumber="llvm.org/pr26437") @skipIfDarwinEmbedded # debugserver can't create/write files on the device def test_stdout_stderr_redirection_to_existing_files(self): - """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR without redirecting STDIN to output files already exist.""" + """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR redirect to output files already exist.""" self.setup_test() self.build() self.create_target() - self.write_file_with_placeholder(self.output_file) - self.write_file_with_placeholder(self.error_file) - self.redirect_stdout() - self.redirect_stderr() - self.run_process(True) - output = self.read_output_file_and_delete() - error = self.read_error_file_and_delete() - self.check_process_output(output, error) - def write_file_with_placeholder(self, target_file): + # Create the output and error files with placeholder placeholder = "This content should be overwritten." + # Local file directory and working directory are the same for local debugging + f = open(self.local_output_file, "w") + f.write(placeholder) + f.close() + f = open(self.local_error_file, "w") + f.write(placeholder) + f.close() if lldb.remote_platform: self.runCmd( - 'platform file write "{target}" -d "{data}"'.format( - target=target_file, data=placeholder + 'platform put-file "{local}" "{remote}"'.format( + local=self.local_output_file, remote=self.output_file + ) + ) + self.runCmd( + 'platform put-file "{local}" "{remote}"'.format( + local=self.local_error_file, remote=self.error_file ) ) - else: - f = open(target_file, "w") - f.write(placeholder) - f.close() + + self.redirect_stdout() + self.redirect_stderr() + self.run_process(True) + output = self.read_output_file_and_delete() + error = self.read_error_file_and_delete() + self.check_process_output(output, error) # target_file - path on local file system or remote file system if running remote # local_file - path on local system From 4afa9787560d00474c6ab600be70d59fa7eae87f Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Wed, 30 Oct 2024 18:04:26 -0300 Subject: [PATCH 58/69] Revert "[Clang][Sema] Always use latest redeclaration of primary template" (#114304) Clang importer doesn't seem to work well with this change, see discussion in the original PR. Reverts llvm/llvm-project#114258 --- clang/include/clang/AST/DeclTemplate.h | 52 +++++++++-- clang/lib/AST/Decl.cpp | 10 +-- clang/lib/AST/DeclCXX.cpp | 4 +- clang/lib/AST/DeclTemplate.cpp | 56 +----------- clang/lib/Sema/SemaDecl.cpp | 4 +- clang/lib/Sema/SemaInit.cpp | 2 +- clang/lib/Sema/SemaTemplateInstantiate.cpp | 14 +-- clang/test/AST/ast-dump-decl.cpp | 2 +- .../CXX/temp/temp.spec/temp.expl.spec/p7.cpp | 87 ------------------- 9 files changed, 66 insertions(+), 165 deletions(-) diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 0ca3fd48e81cf..a572e3380f165 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -857,6 +857,16 @@ class RedeclarableTemplateDecl : public TemplateDecl, /// \endcode bool isMemberSpecialization() const { return Common.getInt(); } + /// Determines whether any redeclaration of this template was + /// a specialization of a member template. + bool hasMemberSpecialization() const { + for (const auto *D : redecls()) { + if (D->isMemberSpecialization()) + return true; + } + return false; + } + /// Note that this member template is a specialization. void setMemberSpecialization() { assert(!isMemberSpecialization() && "already a member specialization"); @@ -1955,7 +1965,13 @@ class ClassTemplateSpecializationDecl : public CXXRecordDecl, /// specialization which was specialized by this. llvm::PointerUnion - getSpecializedTemplateOrPartial() const; + getSpecializedTemplateOrPartial() const { + if (const auto *PartialSpec = + SpecializedTemplate.dyn_cast()) + return PartialSpec->PartialSpecialization; + + return SpecializedTemplate.get(); + } /// Retrieve the set of template arguments that should be used /// to instantiate members of the class template or class template partial @@ -2192,6 +2208,17 @@ class ClassTemplatePartialSpecializationDecl return InstantiatedFromMember.getInt(); } + /// Determines whether any redeclaration of this this class template partial + /// specialization was a specialization of a member partial specialization. + bool hasMemberSpecialization() const { + for (const auto *D : redecls()) { + if (cast(D) + ->isMemberSpecialization()) + return true; + } + return false; + } + /// Note that this member template is a specialization. void setMemberSpecialization() { return InstantiatedFromMember.setInt(true); } @@ -2713,7 +2740,13 @@ class VarTemplateSpecializationDecl : public VarDecl, /// Retrieve the variable template or variable template partial /// specialization which was specialized by this. llvm::PointerUnion - getSpecializedTemplateOrPartial() const; + getSpecializedTemplateOrPartial() const { + if (const auto *PartialSpec = + SpecializedTemplate.dyn_cast()) + return PartialSpec->PartialSpecialization; + + return SpecializedTemplate.get(); + } /// Retrieve the set of template arguments that should be used /// to instantiate the initializer of the variable template or variable @@ -2947,6 +2980,18 @@ class VarTemplatePartialSpecializationDecl return InstantiatedFromMember.getInt(); } + /// Determines whether any redeclaration of this this variable template + /// partial specialization was a specialization of a member partial + /// specialization. + bool hasMemberSpecialization() const { + for (const auto *D : redecls()) { + if (cast(D) + ->isMemberSpecialization()) + return true; + } + return false; + } + /// Note that this member template is a specialization. void setMemberSpecialization() { return InstantiatedFromMember.setInt(true); } @@ -3119,9 +3164,6 @@ class VarTemplateDecl : public RedeclarableTemplateDecl { return makeSpecIterator(getSpecializations(), true); } - /// Merge \p Prev with our RedeclarableTemplateDecl::Common. - void mergePrevDecl(VarTemplateDecl *Prev); - // Implement isa/cast/dyncast support static bool classof(const Decl *D) { return classofKind(D->getKind()); } static bool classofKind(Kind K) { return K == VarTemplate; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index cd173d1726379..86913763ef9ff 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2708,7 +2708,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { if (isTemplateInstantiation(VDTemplSpec->getTemplateSpecializationKind())) { auto From = VDTemplSpec->getInstantiatedFrom(); if (auto *VTD = From.dyn_cast()) { - while (!VTD->isMemberSpecialization()) { + while (!VTD->hasMemberSpecialization()) { if (auto *NewVTD = VTD->getInstantiatedFromMemberTemplate()) VTD = NewVTD; else @@ -2718,7 +2718,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { } if (auto *VTPSD = From.dyn_cast()) { - while (!VTPSD->isMemberSpecialization()) { + while (!VTPSD->hasMemberSpecialization()) { if (auto *NewVTPSD = VTPSD->getInstantiatedFromMember()) VTPSD = NewVTPSD; else @@ -2732,7 +2732,7 @@ VarDecl *VarDecl::getTemplateInstantiationPattern() const { // If this is the pattern of a variable template, find where it was // instantiated from. FIXME: Is this necessary? if (VarTemplateDecl *VTD = VD->getDescribedVarTemplate()) { - while (!VTD->isMemberSpecialization()) { + while (!VTD->hasMemberSpecialization()) { if (auto *NewVTD = VTD->getInstantiatedFromMemberTemplate()) VTD = NewVTD; else @@ -4153,7 +4153,7 @@ FunctionDecl::getTemplateInstantiationPattern(bool ForDefinition) const { if (FunctionTemplateDecl *Primary = getPrimaryTemplate()) { // If we hit a point where the user provided a specialization of this // template, we're done looking. - while (!ForDefinition || !Primary->isMemberSpecialization()) { + while (!ForDefinition || !Primary->hasMemberSpecialization()) { if (auto *NewPrimary = Primary->getInstantiatedFromMemberTemplate()) Primary = NewPrimary; else @@ -4170,7 +4170,7 @@ FunctionTemplateDecl *FunctionDecl::getPrimaryTemplate() const { if (FunctionTemplateSpecializationInfo *Info = TemplateOrSpecialization .dyn_cast()) { - return Info->getTemplate()->getMostRecentDecl(); + return Info->getTemplate(); } return nullptr; } diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 1c92fd9e3ff06..db0ea62a2323e 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -2030,7 +2030,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { if (auto *TD = dyn_cast(this)) { auto From = TD->getInstantiatedFrom(); if (auto *CTD = From.dyn_cast()) { - while (!CTD->isMemberSpecialization()) { + while (!CTD->hasMemberSpecialization()) { if (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) CTD = NewCTD; else @@ -2040,7 +2040,7 @@ const CXXRecordDecl *CXXRecordDecl::getTemplateInstantiationPattern() const { } if (auto *CTPSD = From.dyn_cast()) { - while (!CTPSD->isMemberSpecialization()) { + while (!CTPSD->hasMemberSpecialization()) { if (auto *NewCTPSD = CTPSD->getInstantiatedFromMemberTemplate()) CTPSD = NewCTPSD; else diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 1db02d0d04448..755ec72f00bf7 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -993,17 +993,7 @@ ClassTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get()->getMostRecentDecl(); -} - -llvm::PointerUnion -ClassTemplateSpecializationDecl::getSpecializedTemplateOrPartial() const { - if (const auto *PartialSpec = - SpecializedTemplate.dyn_cast()) - return PartialSpec->PartialSpecialization->getMostRecentDecl(); - - return SpecializedTemplate.get()->getMostRecentDecl(); + return SpecializedTemplate.get(); } SourceRange @@ -1293,39 +1283,6 @@ VarTemplateDecl::newCommon(ASTContext &C) const { return CommonPtr; } -void VarTemplateDecl::mergePrevDecl(VarTemplateDecl *Prev) { - // If we haven't created a common pointer yet, then it can just be created - // with the usual method. - if (!getCommonPtrInternal()) - return; - - Common *ThisCommon = static_cast(getCommonPtrInternal()); - Common *PrevCommon = nullptr; - SmallVector PreviousDecls; - for (; Prev; Prev = Prev->getPreviousDecl()) { - if (CommonBase *C = Prev->getCommonPtrInternal()) { - PrevCommon = static_cast(C); - break; - } - PreviousDecls.push_back(Prev); - } - - // If the previous redecl chain hasn't created a common pointer yet, then just - // use this common pointer. - if (!PrevCommon) { - for (auto *D : PreviousDecls) - D->setCommonPtr(ThisCommon); - return; - } - - // Ensure we don't leak any important state. - assert(ThisCommon->Specializations.empty() && - ThisCommon->PartialSpecializations.empty() && - "Can't merge incompatible declarations!"); - - setCommonPtr(PrevCommon); -} - VarTemplateSpecializationDecl * VarTemplateDecl::findSpecialization(ArrayRef Args, void *&InsertPos) { @@ -1448,16 +1405,7 @@ VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const { if (const auto *PartialSpec = SpecializedTemplate.dyn_cast()) return PartialSpec->PartialSpecialization->getSpecializedTemplate(); - return SpecializedTemplate.get()->getMostRecentDecl(); -} - -llvm::PointerUnion -VarTemplateSpecializationDecl::getSpecializedTemplateOrPartial() const { - if (const auto *PartialSpec = - SpecializedTemplate.dyn_cast()) - return PartialSpec->PartialSpecialization->getMostRecentDecl(); - - return SpecializedTemplate.get()->getMostRecentDecl(); + return SpecializedTemplate.get(); } SourceRange VarTemplateSpecializationDecl::getSourceRange() const { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 3e8b76e8dfd62..f8e5f3c6d309d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -4696,10 +4696,8 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) { // Keep a chain of previous declarations. New->setPreviousDecl(Old); - if (NewTemplate) { - NewTemplate->mergePrevDecl(OldTemplate); + if (NewTemplate) NewTemplate->setPreviousDecl(OldTemplate); - } // Inherit access appropriately. New->setAccess(Old->getAccess()); diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index e2a59f63ccf58..573e90aced3ee 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -9954,7 +9954,7 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer( auto SynthesizeAggrGuide = [&](InitListExpr *ListInit) { auto *Pattern = Template; while (Pattern->getInstantiatedFromMemberTemplate()) { - if (Pattern->isMemberSpecialization()) + if (Pattern->hasMemberSpecialization()) break; Pattern = Pattern->getInstantiatedFromMemberTemplate(); } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index de0ec0128905f..b63063813f1b5 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -343,7 +343,7 @@ struct TemplateInstantiationArgumentCollecter // If this function was instantiated from a specialized member that is // a function template, we're done. assert(FD->getPrimaryTemplate() && "No function template?"); - if (FD->getPrimaryTemplate()->isMemberSpecialization()) + if (FD->getPrimaryTemplate()->hasMemberSpecialization()) return Done(); // If this function is a generic lambda specialization, we are done. @@ -442,11 +442,11 @@ struct TemplateInstantiationArgumentCollecter Specialized = CTSD->getSpecializedTemplateOrPartial(); if (auto *CTPSD = Specialized.dyn_cast()) { - if (CTPSD->isMemberSpecialization()) + if (CTPSD->hasMemberSpecialization()) return Done(); } else { auto *CTD = Specialized.get(); - if (CTD->isMemberSpecialization()) + if (CTD->hasMemberSpecialization()) return Done(); } return UseNextDecl(CTSD); @@ -478,11 +478,11 @@ struct TemplateInstantiationArgumentCollecter Specialized = VTSD->getSpecializedTemplateOrPartial(); if (auto *VTPSD = Specialized.dyn_cast()) { - if (VTPSD->isMemberSpecialization()) + if (VTPSD->hasMemberSpecialization()) return Done(); } else { auto *VTD = Specialized.get(); - if (VTD->isMemberSpecialization()) + if (VTD->hasMemberSpecialization()) return Done(); } return UseNextDecl(VTSD); @@ -4141,7 +4141,7 @@ getPatternForClassTemplateSpecialization( CXXRecordDecl *Pattern = nullptr; Specialized = ClassTemplateSpec->getSpecializedTemplateOrPartial(); if (auto *CTD = Specialized.dyn_cast()) { - while (!CTD->isMemberSpecialization()) { + while (!CTD->hasMemberSpecialization()) { if (auto *NewCTD = CTD->getInstantiatedFromMemberTemplate()) CTD = NewCTD; else @@ -4151,7 +4151,7 @@ getPatternForClassTemplateSpecialization( } else if (auto *CTPSD = Specialized .dyn_cast()) { - while (!CTPSD->isMemberSpecialization()) { + while (!CTPSD->hasMemberSpecialization()) { if (auto *NewCTPSD = CTPSD->getInstantiatedFromMemberTemplate()) CTPSD = NewCTPSD; else diff --git a/clang/test/AST/ast-dump-decl.cpp b/clang/test/AST/ast-dump-decl.cpp index 7b998f20944f4..e84241cee922f 100644 --- a/clang/test/AST/ast-dump-decl.cpp +++ b/clang/test/AST/ast-dump-decl.cpp @@ -530,7 +530,7 @@ namespace testCanonicalTemplate { // CHECK-NEXT: | `-ClassTemplateDecl 0x{{.+}} parent 0x{{.+}} col:40 friend_undeclared TestClassTemplate{{$}} // CHECK-NEXT: | |-TemplateTypeParmDecl 0x{{.+}} col:23 typename depth 1 index 0 T2{{$}} // CHECK-NEXT: | `-CXXRecordDecl 0x{{.+}} parent 0x{{.+}} col:40 class TestClassTemplate{{$}} - // CHECK-NEXT: `-ClassTemplateSpecializationDecl 0x{{.+}} line:[[@LINE-19]]:31 class TestClassTemplate definition implicit_instantiation{{$}} + // CHECK-NEXT: `-ClassTemplateSpecializationDecl 0x{{.+}} line:[[@LINE-19]]:31 class TestClassTemplate definition implicit_instantiation{{$}} // CHECK-NEXT: |-DefinitionData pass_in_registers empty aggregate standard_layout trivially_copyable pod trivial literal has_constexpr_non_copy_move_ctor can_const_default_init{{$}} // CHECK-NEXT: | |-DefaultConstructor exists trivial constexpr defaulted_is_constexpr{{$}} // CHECK-NEXT: | |-CopyConstructor simple trivial has_const_param implicit_has_const_param{{$}} diff --git a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp index e7e4738032f64..87127366eb58a 100644 --- a/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp +++ b/clang/test/CXX/temp/temp.spec/temp.expl.spec/p7.cpp @@ -177,93 +177,6 @@ namespace Defined { static_assert(A::B::y == 2); } // namespace Defined -namespace Constrained { - template - struct A { - template requires V - static constexpr int f(); // expected-note {{declared here}} - - template requires V - static const int x; // expected-note {{declared here}} - - template requires V - static const int x; // expected-note {{declared here}} - - template requires V - struct B; // expected-note {{template is declared here}} - - template requires V - struct B; // expected-note {{template is declared here}} - }; - - template<> - template requires V - constexpr int A::f() { - return A::f(); - } - - template<> - template requires V - constexpr int A::x = A::x; - - template<> - template requires V - constexpr int A::x = A::x; - - template<> - template requires V - struct A::B { - static constexpr int y = A::B::y; - }; - - template<> - template requires V - struct A::B { - static constexpr int y = A::B::y; - }; - - template<> - template requires V - constexpr int A::f() { - return 1; - } - - template<> - template requires V - constexpr int A::x = 1; - - template<> - template requires V - constexpr int A::x = 2; - - template<> - template requires V - struct A::B { - static constexpr int y = 1; - }; - - template<> - template requires V - struct A::B { - static constexpr int y = 2; - }; - - static_assert(A::f() == 0); // expected-error {{static assertion expression is not an integral constant expression}} - // expected-note@-1 {{undefined function 'f' cannot be used in a constant expression}} - static_assert(A::x == 0); // expected-error {{static assertion expression is not an integral constant expression}} - // expected-note@-1 {{initializer of 'x' is unknown}} - static_assert(A::x == 0); // expected-error {{static assertion expression is not an integral constant expression}} - // expected-note@-1 {{initializer of 'x' is unknown}} - static_assert(A::B::y == 0); // expected-error {{implicit instantiation of undefined template 'Constrained::A::B'}} - static_assert(A::B::y == 0); // expected-error {{implicit instantiation of undefined template 'Constrained::A::B'}} - - static_assert(A::f() == 1); - static_assert(A::x == 1); - static_assert(A::x == 2); - static_assert(A::B::y == 1); - static_assert(A::B::y == 2); -} // namespace Constrained - namespace Dependent { template struct A { From 74d8f3952c4acf6d57948983d7c5b0d0a7763c28 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Wed, 30 Oct 2024 14:06:42 -0700 Subject: [PATCH 59/69] [HLSL] Remove old resource annotations for UAVs and SRVs (#114139) UAVs and SRVs have already been converted to use LLVM target types and we can disable generating of the !hlsl.uavs and !hlsl.srvs! annotations. This will enable adding tests for structured buffers with user defined types that this old resource annotations code does not handle (it crashes). Part 1 of #114126 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 10 ++++++++ .../builtins/RWBuffer-annotations.hlsl | 24 ------------------- .../builtins/RWBuffer-elementtype.hlsl | 14 ----------- .../RWStructuredBuffer-elementtype.hlsl | 14 ----------- .../RasterizerOrderedBuffer-annotations.hlsl | 20 ---------------- .../StructuredBuffer-annotations.hlsl | 22 ----------------- .../StructuredBuffer-elementtype.hlsl | 14 ----------- clang/test/CodeGenHLSL/cbuf.hlsl | 2 -- 8 files changed, 10 insertions(+), 110 deletions(-) delete mode 100644 clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl delete mode 100644 clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl delete mode 100644 clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 06558ce796f2e..7ba0d61501818 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -306,6 +306,16 @@ void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { continue; llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass; + if (RC == llvm::hlsl::ResourceClass::UAV || + RC == llvm::hlsl::ResourceClass::SRV) + // UAVs and SRVs have already been converted to use LLVM target types, + // we can disable generating of these resource annotations. This will + // enable progress on structured buffers with user defined types this + // resource annotations code does not handle and it crashes. + // This whole function is going to be removed as soon as cbuffers are + // converted to target types (llvm/llvm-project #114126). + return; + bool IsROV = AttrResType->getAttrs().IsROV; llvm::hlsl::ResourceKind RK = HLSLResAttr->getResourceKind(); llvm::hlsl::ElementType ET = calculateElementType(CGM.getContext(), Ty); diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl deleted file mode 100644 index e1e047485e4df..0000000000000 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -RWBuffer Buffer1; -RWBuffer > BufferArray[4]; - -RWBuffer Buffer2 : register(u3); -RWBuffer > BufferArray2[4] : register(u4); - -RWBuffer Buffer3 : register(u3, space1); -RWBuffer > BufferArray3[4] : register(u4, space1); - - - -[numthreads(1,1,1)] -void main() { -} - -// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl index fa81b53fd9bdd..16120a44a9e4d 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { BufF16x2[GI] = 0; BufF32x3[GI] = 0; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl index 727f416cde57f..71b5b7a75fa43 100644 --- a/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { BufF16x2[GI] = 0; BufF32x3[GI] = 0; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl deleted file mode 100644 index 5155f12902597..0000000000000 --- a/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -RasterizerOrderedBuffer Buffer1; -RasterizerOrderedBuffer > BufferArray[4]; - -RasterizerOrderedBuffer Buffer2 : register(u3); -RasterizerOrderedBuffer > BufferArray2[4] : register(u4); - -RasterizerOrderedBuffer Buffer3 : register(u3, space1); -RasterizerOrderedBuffer > BufferArray3[4] : register(u4, space1); - -void main() {} - -// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 true, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 true, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 true, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 true, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 true, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 true, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl deleted file mode 100644 index a88ea774f3320..0000000000000 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -StructuredBuffer Buffer1; -StructuredBuffer > BufferArray[4]; - -StructuredBuffer Buffer2 : register(t3); -StructuredBuffer > BufferArray2[4] : register(t4); - -StructuredBuffer Buffer3 : register(t3, space1); -StructuredBuffer > BufferArray3[4] : register(t4, space1); - -[numthreads(1,1,1)] -void main() { -} - -// CHECK: !hlsl.srvs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl index 4c30119498ff1..205e13b4de394 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { half2 v12 = BufF16x2[GI]; float3 v13 = BufF32x3[GI]; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/cbuf.hlsl b/clang/test/CodeGenHLSL/cbuf.hlsl index 78d9768b22fc8..3f9d4514967dd 100644 --- a/clang/test/CodeGenHLSL/cbuf.hlsl +++ b/clang/test/CodeGenHLSL/cbuf.hlsl @@ -23,6 +23,4 @@ float foo() { } // CHECK: !hlsl.cbufs = !{![[CBMD:[0-9]+]]} -// CHECK: !hlsl.srvs = !{![[TBMD:[0-9]+]]} // CHECK: ![[CBMD]] = !{ptr @[[CB]], i32 13, i32 0, i1 false, i32 0, i32 2} -// CHECK: ![[TBMD]] = !{ptr @[[TB]], i32 15, i32 0, i1 false, i32 2, i32 1} From 14045de250ea126029d43ff8f2f68e9614c394bc Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 30 Oct 2024 23:36:46 +0200 Subject: [PATCH 60/69] [RISCV] Account for factor in interleave memory op costs (#111511) Currently we cost an interleaved memory op as if it were a load/store of the widened vector type, but this was undercosting in all cases when compared to the measured performance of todays hardware. On the x280 at NF=2 and spacemit-x60 at NF=2,3 and 4, a segmented load is carried out as a wide load and NF LMUL shuffle ops: https://github.com/preames/bp3-microarch#vlseg_lmul_x_sew_throughput All other NFs go through a slow path. On the spacemit-x60 this is proportional to VLMAX * NF, and on the x280 proportional to the number of segments. This patch increases the cost by implementing a wide load + NF LMUL shuffle op cost for the lowest common denominator NF=2, and then a slower cost proportional to VL for the other NFs. In a follow up patch we can add a tuning flag to use the faster cost model for NF=3 and 4 on the spacemit-x60. Note that the FIXME about illegal vectors seems to have been fixed in #100436 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 31 ++-- .../LoopVectorize/RISCV/dead-ops-cost.ll | 74 ++++----- .../LoopVectorize/RISCV/interleaved-cost.ll | 144 +++++++++--------- 3 files changed, 130 insertions(+), 119 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 988cb194cd603..f050fb569946d 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -723,8 +723,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( // The interleaved memory access pass will lower interleaved memory ops (i.e // a load and store followed by a specific shuffle) to vlseg/vsseg - // intrinsics. In those cases then we can treat it as if it's just one (legal) - // memory op + // intrinsics. if (!UseMaskForCond && !UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { auto *VTy = cast(VecTy); @@ -734,19 +733,27 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( auto *SubVecTy = VectorType::get(VTy->getElementType(), VTy->getElementCount().divideCoefficientBy(Factor)); - if (VTy->getElementCount().isKnownMultipleOf(Factor) && TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment, AddressSpace, DL)) { - // FIXME: We use the memory op cost of the *legalized* type here, - // because it's getMemoryOpCost returns a really expensive cost for - // types like <6 x i8>, which show up when doing interleaves of - // Factor=3 etc. Should the memory op cost of these be cheaper? - auto *LegalVTy = VectorType::get(VTy->getElementType(), - LT.second.getVectorElementCount()); - InstructionCost LegalMemCost = getMemoryOpCost( - Opcode, LegalVTy, Alignment, AddressSpace, CostKind); - return LT.first + LegalMemCost; + + // Most available hardware today optimizes NF=2 as as one wide memory op + // + Factor * LMUL shuffle ops. + if (Factor == 2) { + InstructionCost Cost = + getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind); + MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT(); + Cost += Factor * TLI->getLMULCost(SubVecVT); + return LT.first * Cost; + } + + // Otherwise, the cost is proportional to the number of elements (VL * + // Factor ops). + InstructionCost MemOpCost = + getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, + CostKind, {TTI::OK_AnyValue, TTI::OP_None}); + unsigned NumLoads = getEstimatedVLFor(VTy); + return NumLoads * MemOpCost; } } } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 0e55ad65cdb2c..6724afd6ca10f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -410,45 +410,49 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) -; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 16, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP10]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP8]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[STRIDED_VEC5]] to <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[STEP_ADD]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i8( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = zext [[TMP16]] to +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], [[TMP18]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -462,9 +466,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 4 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -481,7 +485,7 @@ loop: %ext = zext i8 %l.1 to i32 %gep.dst = getelementptr i32, ptr %dst, i64 %iv store i32 %ext, ptr %gep.dst, align 4 - %iv.next = add nsw i64 %iv, 4 + %iv.next = add nsw i64 %iv, 2 %ec = icmp slt i64 %iv, %N br i1 %ec, label %loop, label %exit diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll index fa346b4eac02d..6477f14e3c698 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll @@ -6,26 +6,26 @@ define void @i8_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_2' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -49,16 +49,16 @@ define void @i8_factor_3(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_3' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -86,16 +86,16 @@ define void @i8_factor_4(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_4' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 @@ -127,14 +127,14 @@ define void @i8_factor_5(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_5' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 0 @@ -170,14 +170,14 @@ define void @i8_factor_6(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_6' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 0 @@ -217,14 +217,14 @@ define void @i8_factor_7(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_7' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 0 @@ -268,14 +268,14 @@ define void @i8_factor_8(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_8' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 0 From 56dcfbef453d6cc390fc7a734db417e047616526 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Oct 2024 14:47:29 -0700 Subject: [PATCH 61/69] [RISCV] Remove duplicate vector conversion pseudos. (#114287) These pseudos used to be handled by CustomInserter to insert the rounding mode change for vector ceil, floor, etc. At some point they were changed to use the InsertReadWriteCSR pass instead of the custom inserter. I believe that makes them redundant with the pseudos used by the RVV intrinsics with rounding mode operand. --- .../Target/RISCV/RISCVInstrInfoVPseudos.td | 114 ------------------ .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 20 +-- 2 files changed, 10 insertions(+), 124 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index d5b0fa340684b..19557d424d1be 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1134,46 +1134,6 @@ class VPseudoUnaryMask_NoExcept TargetConstraintType = 1> : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, OpClass:$rs2, vec_rm:$frm, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let Constraints = !interleave([Constraint, "$rd = $passthru"], ","); - let TargetOverlapConstraintType = TargetConstraintType; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasVecPolicyOp = 1; - let HasRoundModeOp = 1; -} - -class VPseudoUnaryMask_FRM TargetConstraintType = 1> : - Pseudo<(outs GetVRegNoV0.R:$rd), - (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, vec_rm:$frm, - AVL:$vl, sew:$sew, vec_policy:$policy), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let Constraints = !interleave([Constraint, "$rd = $passthru"], ","); - let TargetOverlapConstraintType = TargetConstraintType; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasVecPolicyOp = 1; - let UsesMaskPolicy = 1; - let HasRoundModeOp = 1; -} - class VPseudoUnaryNoMaskGPROut : Pseudo<(outs GPR:$rd), (ins VR:$rs2, AVL:$vl, sew:$sew), []>, @@ -3578,23 +3538,6 @@ multiclass VPseudoConversionRoundingMode TargetConstraintType = 1> { - let VLMul = MInfo.value, SEW=sew in { - defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoUnaryNoMask_FRM; - def suffix # "_MASK" : VPseudoUnaryMask_FRM, - RISCVMaskedPseudo; - } -} - multiclass VPseudoConversionNoExcept, - SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVFROUND_NOEXCEPT_V { foreach m = MxListF in { defm _V : VPseudoConversionNoExcept, @@ -3645,15 +3580,6 @@ multiclass VPseudoVCVTF_V_RM { } } -multiclass VPseudoVCVTF_RM_V { - foreach m = MxListF in { - foreach e = SchedSEWSet.val in - defm _V : VPseudoConversionRM, - SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e, - forcePassthruRead=true>; - } -} - multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -3672,15 +3598,6 @@ multiclass VPseudoVWCVTI_V_RM { } } -multiclass VPseudoVWCVTI_RM_V { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListFW in { - defm _V : VPseudoConversionRM, - SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { @@ -3721,15 +3638,6 @@ multiclass VPseudoVNCVTI_W_RM { } } -multiclass VPseudoVNCVTI_RM_W { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in { - defm _W : VPseudoConversionRM, - SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVNCVTF_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -3742,17 +3650,6 @@ multiclass VPseudoVNCVTF_W_RM { } } -multiclass VPseudoVNCVTF_RM_W { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListFW in { - foreach e = SchedSEWSet.val in - defm _W : VPseudoConversionRM, - SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e, - forcePassthruRead=true>; - } -} - multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -6583,9 +6480,6 @@ defm PseudoVFCVT_XU_F : VPseudoVCVTI_V_RM; defm PseudoVFCVT_X_F : VPseudoVCVTI_V_RM; } -defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V; -defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; - defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; @@ -6594,8 +6488,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFCVT_F_XU : VPseudoVCVTF_V_RM; defm PseudoVFCVT_F_X : VPseudoVCVTF_V_RM; } -defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V; -defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -6606,8 +6498,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V_RM; defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V_RM; } -defm PseudoVFWCVT_RM_XU_F : VPseudoVWCVTI_RM_V; -defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V; defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V; defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V; @@ -6627,8 +6517,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W_RM; defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W_RM; } -defm PseudoVFNCVT_RM_XU_F : VPseudoVNCVTI_RM_W; -defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W; defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W; defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W; @@ -6637,8 +6525,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W_RM; defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W_RM; } -defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W; -defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W; let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 18749f00a10a5..33e1ed120cd08 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2639,8 +2639,8 @@ foreach fvti = AllFloatVectors in { // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2IVL_V_RM; defm : VPatConvertFP2IVL_V_RM; -defm : VPatConvertFP2I_RM_VL_V; -defm : VPatConvertFP2I_RM_VL_V; +defm : VPatConvertFP2I_RM_VL_V; +defm : VPatConvertFP2I_RM_VL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; @@ -2648,14 +2648,14 @@ defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V_RM; defm : VPatConvertI2FPVL_V_RM; -defm : VPatConvertI2FP_RM_VL_V; -defm : VPatConvertI2FP_RM_VL_V; +defm : VPatConvertI2FP_RM_VL_V; +defm : VPatConvertI2FP_RM_VL_V; // 13.18. Widening Floating-Point/Integer Type-Convert Instructions defm : VPatWConvertFP2IVL_V_RM; defm : VPatWConvertFP2IVL_V_RM; -defm : VPatWConvertFP2I_RM_VL_V; -defm : VPatWConvertFP2I_RM_VL_V; +defm : VPatWConvertFP2I_RM_VL_V; +defm : VPatWConvertFP2I_RM_VL_V; defm : VPatWConvertFP2IVL_V; defm : VPatWConvertFP2IVL_V; @@ -2696,8 +2696,8 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions defm : VPatNConvertFP2IVL_W_RM; defm : VPatNConvertFP2IVL_W_RM; -defm : VPatNConvertFP2I_RM_VL_W; -defm : VPatNConvertFP2I_RM_VL_W; +defm : VPatNConvertFP2I_RM_VL_W; +defm : VPatNConvertFP2I_RM_VL_W; defm : VPatNConvertFP2IVL_W; defm : VPatNConvertFP2IVL_W; @@ -2705,8 +2705,8 @@ defm : VPatNConvertFP2IVL_W; defm : VPatNConvertI2FPVL_W_RM; -defm : VPatNConvertI2FP_RM_VL_W; -defm : VPatNConvertI2FP_RM_VL_W; +defm : VPatNConvertI2FP_RM_VL_W; +defm : VPatNConvertI2FP_RM_VL_W; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; From 5d35747f6de9295400327744b389f303e3e2b13d Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 30 Oct 2024 15:05:13 -0700 Subject: [PATCH 62/69] [libc] Refactor statvfs tests (#114147) The previous statvfs tests had several issues, this patch updates them to meet current standards. --- .../test/src/sys/statvfs/linux/CMakeLists.txt | 6 +- .../src/sys/statvfs/linux/fstatvfs_test.cpp | 81 ++++++++++--------- .../src/sys/statvfs/linux/statvfs_test.cpp | 75 ++++++++--------- 3 files changed, 80 insertions(+), 82 deletions(-) diff --git a/libc/test/src/sys/statvfs/linux/CMakeLists.txt b/libc/test/src/sys/statvfs/linux/CMakeLists.txt index 1f8688868e043..fa1e9052d1cac 100644 --- a/libc/test/src/sys/statvfs/linux/CMakeLists.txt +++ b/libc/test/src/sys/statvfs/linux/CMakeLists.txt @@ -8,8 +8,9 @@ add_libc_unittest( statvfs_test.cpp DEPENDS libc.src.errno.errno - libc.src.sys.statvfs.linux.statfs_utils libc.src.sys.statvfs.statvfs + libc.src.sys.stat.mkdirat + libc.src.sys.stat.rmdir libc.test.UnitTest.ErrnoSetterMatcher ) @@ -21,8 +22,9 @@ add_libc_unittest( fstatvfs_test.cpp DEPENDS libc.src.errno.errno - libc.src.sys.statvfs.linux.statfs_utils libc.src.sys.statvfs.fstatvfs + libc.src.sys.stat.mkdirat + libc.src.sys.stat.rmdir libc.src.fcntl.open libc.src.unistd.close libc.test.UnitTest.ErrnoSetterMatcher diff --git a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp index 2f3e0b96ff095..efd1e688280b5 100644 --- a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp @@ -1,49 +1,56 @@ +//===-- Unittests for fstatvfs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" +#include "src/sys/stat/mkdirat.h" #include "src/sys/statvfs/fstatvfs.h" -#include "src/sys/statvfs/linux/statfs_utils.h" #include "src/unistd/close.h" +#include "src/unistd/rmdir.h" #include "test/UnitTest/ErrnoSetterMatcher.h" -#include "test/UnitTest/LibcTest.h" -#include +#include "test/UnitTest/Test.h" + using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; -#ifdef SYS_statfs64 -using StatFs = statfs64; -#else -using StatFs = statfs; -#endif - -namespace LIBC_NAMESPACE_DECL { -static int fstatfs(int fd, StatFs *buf) { - using namespace statfs_utils; - if (cpp::optional result = linux_fstatfs(fd)) { - *buf = *result; - return 0; - } - return -1; -} -} // namespace LIBC_NAMESPACE_DECL - -struct PathFD { - int fd; - explicit PathFD(const char *path) - : fd(LIBC_NAMESPACE::open(path, O_CLOEXEC | O_PATH)) {} - ~PathFD() { LIBC_NAMESPACE::close(fd); } - operator int() const { return fd; } -}; - -TEST(LlvmLibcSysStatvfsTest, FstatfsBasic) { - StatFs buf; - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/"), &buf), Succeeds()); - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/proc"), &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(PROC_SUPER_MAGIC)); - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/sys"), &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(SYSFS_MAGIC)); +TEST(LlvmLibcSysFStatvfsTest, FStatvfsBasic) { + struct statvfs buf; + + int fd = LIBC_NAMESPACE::open("/", O_PATH); + ASSERT_ERRNO_SUCCESS(); + ASSERT_GT(fd, 0); + + // The root of the file directory must always exist + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Succeeds()); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); } -TEST(LlvmLibcSysStatvfsTest, FstatvfsInvalidFD) { +TEST(LlvmLibcSysFStatvfsTest, FStatvfsInvalidPath) { struct statvfs buf; - ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(-1, &buf), Fails(EBADF)); + + constexpr const char *FILENAME = "testdata/statvfs.testdir"; + auto TEST_DIR = libc_make_test_file_path(FILENAME); + + ASSERT_THAT(LIBC_NAMESPACE::mkdirat(AT_FDCWD, TEST_DIR, S_IRWXU), + Succeeds(0)); + + int fd = LIBC_NAMESPACE::open(TEST_DIR, O_PATH); + ASSERT_ERRNO_SUCCESS(); + ASSERT_GT(fd, 0); + + // create the file, assert it exists, then delete it and assert it doesn't + // exist anymore. + + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Succeeds()); + + ASSERT_THAT(LIBC_NAMESPACE::rmdir(TEST_DIR), Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Fails(ENOENT)); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Fails(ENOENT)); } diff --git a/libc/test/src/sys/statvfs/linux/statvfs_test.cpp b/libc/test/src/sys/statvfs/linux/statvfs_test.cpp index 5329adb54d64d..0b154e7aa3fb7 100644 --- a/libc/test/src/sys/statvfs/linux/statvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/statvfs_test.cpp @@ -1,54 +1,43 @@ +//===-- Unittests for statvfs ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include "src/sys/statvfs/linux/statfs_utils.h" +#include "src/sys/stat/mkdirat.h" #include "src/sys/statvfs/statvfs.h" +#include "src/unistd/rmdir.h" #include "test/UnitTest/ErrnoSetterMatcher.h" -#include "test/UnitTest/LibcTest.h" -#include +#include "test/UnitTest/Test.h" + using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; -#ifdef SYS_statfs64 -using StatFs = statfs64; -#else -using StatFs = statfs; -#endif - -namespace LIBC_NAMESPACE_DECL { -static int statfs(const char *path, StatFs *buf) { - using namespace statfs_utils; - if (cpp::optional result = linux_statfs(path)) { - *buf = *result; - return 0; - } - return -1; -} -} // namespace LIBC_NAMESPACE_DECL - -TEST(LlvmLibcSysStatfsTest, StatfsBasic) { - StatFs buf; - ASSERT_THAT(LIBC_NAMESPACE::statfs("/", &buf), Succeeds()); - ASSERT_THAT(LIBC_NAMESPACE::statfs("/proc", &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(PROC_SUPER_MAGIC)); - ASSERT_THAT(LIBC_NAMESPACE::statfs("/sys", &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(SYSFS_MAGIC)); +TEST(LlvmLibcSysStatvfsTest, StatvfsBasic) { + struct statvfs buf; + // The root of the file directory must always exist + ASSERT_THAT(LIBC_NAMESPACE::statvfs("/", &buf), Succeeds()); } -TEST(LlvmLibcSysStatfsTest, StatvfsInvalidPath) { +TEST(LlvmLibcSysStatvfsTest, StatvfsInvalidPath) { struct statvfs buf; + ASSERT_THAT(LIBC_NAMESPACE::statvfs("", &buf), Fails(ENOENT)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/nonexistent", &buf), Fails(ENOENT)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/dev/null/whatever", &buf), - Fails(ENOTDIR)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs(nullptr, &buf), Fails(EFAULT)); -} -TEST(LlvmLibcSysStatfsTest, StatvfsNameTooLong) { - struct statvfs buf; - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/", &buf), Succeeds()); - char *name = static_cast(__builtin_alloca(buf.f_namemax + 3)); - name[0] = '/'; - name[buf.f_namemax + 2] = '\0'; - for (unsigned i = 1; i < buf.f_namemax + 2; ++i) { - name[i] = 'a'; - } - ASSERT_THAT(LIBC_NAMESPACE::statvfs(name, &buf), Fails(ENAMETOOLONG)); + // create the file, assert it exists, then delete it and assert it doesn't + // exist anymore. + constexpr const char *FILENAME = "testdata/statvfs.testdir"; + auto TEST_DIR = libc_make_test_file_path(FILENAME); + + ASSERT_THAT(LIBC_NAMESPACE::mkdirat(AT_FDCWD, TEST_DIR, S_IRWXU), + Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::statvfs(TEST_DIR, &buf), Succeeds()); + + ASSERT_THAT(LIBC_NAMESPACE::rmdir(TEST_DIR), Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::statvfs(TEST_DIR, &buf), Fails(ENOENT)); } From 50c44478fe3f680374edf1363d2a3617b8ff2a0b Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Wed, 30 Oct 2024 16:08:03 -0600 Subject: [PATCH 63/69] [libc] fix behavior of strrchr(x, '\0') (#112620) `strrchr("foo", '\0')` is defined to point to the end of `foo`, rather than returning NULL. This wasn't caught by tests, since llvm-libc's `ASSERT_STREQ(nullptr, "");` is not an assertion error. While I'm here, refactor the test slightly to check for NULL more specifically. I considered adding fancier `ASSERT`s (and changing the semantics of `ASSERT_STREQ`), but opted for a more local fix by fair dice roll. --- libc/src/string/string_utils.h | 6 ++++-- libc/test/UnitTest/LibcTest.h | 8 +++++++ libc/test/src/string/StrchrTest.h | 36 +++++++++++++++++++------------ 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 240b28f15718a..22a1876da5369 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -239,11 +239,13 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src, int c) { char ch = static_cast(c); char *last_occurrence = nullptr; - for (; *src; ++src) { + while (true) { if (*src == ch) last_occurrence = const_cast(src); + if (!*src) + return last_occurrence; + ++src; } - return last_occurrence; } } // namespace internal diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index 2b972004e9eea..1707c3c0fdcfa 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -162,6 +162,14 @@ class Test { (unsigned long long)RHS, LHSStr, RHSStr, Loc); } + // Helper to allow macro invocations like `ASSERT_EQ(foo, nullptr)`. + template , ValType> = nullptr> + bool test(TestCond Cond, ValType LHS, std::nullptr_t, const char *LHSStr, + const char *RHSStr, internal::Location Loc) { + return test(Cond, LHS, static_cast(nullptr), LHSStr, RHSStr, Loc); + } + template < typename ValType, cpp::enable_if_t< diff --git a/libc/test/src/string/StrchrTest.h b/libc/test/src/string/StrchrTest.h index 74e172de95953..8c3fe5293008a 100644 --- a/libc/test/src/string/StrchrTest.h +++ b/libc/test/src/string/StrchrTest.h @@ -40,14 +40,16 @@ template struct StrchrTest : public LIBC_NAMESPACE::testing::Test { const char *src = "abcde"; // Should return null terminator. - ASSERT_STREQ(Func(src, '\0'), ""); + const char *nul_terminator = Func(src, '\0'); + ASSERT_NE(nul_terminator, nullptr); + ASSERT_STREQ(nul_terminator, ""); // Source string should not change. ASSERT_STREQ(src, "abcde"); } void characterNotWithinStringShouldReturnNullptr() { // Since 'z' is not within the string, should return nullptr. - ASSERT_STREQ(Func("123?", 'z'), nullptr); + ASSERT_EQ(Func("123?", 'z'), nullptr); } void theSourceShouldNotChange() { @@ -74,11 +76,13 @@ template struct StrchrTest : public LIBC_NAMESPACE::testing::Test { void emptyStringShouldOnlyMatchNullTerminator() { // Null terminator should match. - ASSERT_STREQ(Func("", '\0'), ""); + const char empty_string[] = ""; + ASSERT_EQ(static_cast(Func(empty_string, '\0')), + empty_string); // All other characters should not match. - ASSERT_STREQ(Func("", 'Z'), nullptr); - ASSERT_STREQ(Func("", '3'), nullptr); - ASSERT_STREQ(Func("", '*'), nullptr); + ASSERT_EQ(Func("", 'Z'), nullptr); + ASSERT_EQ(Func("", '3'), nullptr); + ASSERT_EQ(Func("", '*'), nullptr); } }; @@ -114,7 +118,9 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { const char *src = "abcde"; // Should return null terminator. - ASSERT_STREQ(Func(src, '\0'), ""); + const char *nul_terminator = Func(src, '\0'); + ASSERT_NE(nul_terminator, nullptr); + ASSERT_STREQ(nul_terminator, ""); // Source string should not change. ASSERT_STREQ(src, "abcde"); } @@ -122,9 +128,9 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void findsLastBehindFirstNullTerminator() { static const char src[6] = {'a', 'a', '\0', 'b', '\0', 'c'}; // 'b' is behind a null terminator, so should not be found. - ASSERT_STREQ(Func(src, 'b'), nullptr); + ASSERT_EQ(Func(src, 'b'), nullptr); // Same goes for 'c'. - ASSERT_STREQ(Func(src, 'c'), nullptr); + ASSERT_EQ(Func(src, 'c'), nullptr); // Should find the second of the two a's. ASSERT_STREQ(Func(src, 'a'), "a"); @@ -132,7 +138,7 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void characterNotWithinStringShouldReturnNullptr() { // Since 'z' is not within the string, should return nullptr. - ASSERT_STREQ(Func("123?", 'z'), nullptr); + ASSERT_EQ(Func("123?", 'z'), nullptr); } void shouldFindLastOfDuplicates() { @@ -146,11 +152,13 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void emptyStringShouldOnlyMatchNullTerminator() { // Null terminator should match. - ASSERT_STREQ(Func("", '\0'), ""); + const char empty_string[] = ""; + ASSERT_EQ(static_cast(Func(empty_string, '\0')), + empty_string); // All other characters should not match. - ASSERT_STREQ(Func("", 'A'), nullptr); - ASSERT_STREQ(Func("", '2'), nullptr); - ASSERT_STREQ(Func("", '*'), nullptr); + ASSERT_EQ(Func("", 'A'), nullptr); + ASSERT_EQ(Func("", '2'), nullptr); + ASSERT_EQ(Func("", '*'), nullptr); } }; From 36d56925706a32a065ec50d5a6b418e1f29a27b3 Mon Sep 17 00:00:00 2001 From: gulfemsavrun Date: Wed, 30 Oct 2024 15:10:29 -0700 Subject: [PATCH 64/69] Revert "[TLI] Add support for hypot libcall." (#114312) Reverts llvm/llvm-project#113724 --- llvm/include/llvm/Analysis/TargetLibraryInfo.def | 15 --------------- llvm/lib/Analysis/TargetLibraryInfo.cpp | 2 -- llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 3 --- .../Transforms/InferFunctionAttrs/annotate.ll | 9 --------- .../tools/llvm-tli-checker/ps4-tli-check.yaml | 12 ------------ llvm/unittests/Analysis/TargetLibraryInfoTest.cpp | 3 --- 6 files changed, 44 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index fd53a26ef8fc1..3e23e398f6a79 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1671,21 +1671,6 @@ TLI_DEFINE_ENUM_INTERNAL(htons) TLI_DEFINE_STRING_INTERNAL("htons") TLI_DEFINE_SIG_INTERNAL(Int16, Int16) -/// double hypot(double x, double y); -TLI_DEFINE_ENUM_INTERNAL(hypot) -TLI_DEFINE_STRING_INTERNAL("hypot") -TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl, Dbl) - -/// float hypotf(float x, float y); -TLI_DEFINE_ENUM_INTERNAL(hypotf) -TLI_DEFINE_STRING_INTERNAL("hypotf") -TLI_DEFINE_SIG_INTERNAL(Flt, Flt, Flt) - -/// long double hypotl(long double x, long double y); -TLI_DEFINE_ENUM_INTERNAL(hypotl) -TLI_DEFINE_STRING_INTERNAL("hypotl") -TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl, LDbl) - /// int iprintf(const char *format, ...); TLI_DEFINE_ENUM_INTERNAL(iprintf) TLI_DEFINE_STRING_INTERNAL("iprintf") diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 7f0b98ab3c151..0ee83d217a500 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -300,7 +300,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_expf); TLI.setUnavailable(LibFunc_floorf); TLI.setUnavailable(LibFunc_fmodf); - TLI.setUnavailable(LibFunc_hypotf); TLI.setUnavailable(LibFunc_log10f); TLI.setUnavailable(LibFunc_logf); TLI.setUnavailable(LibFunc_modff); @@ -332,7 +331,6 @@ static void initializeLibCalls(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_floorl); TLI.setUnavailable(LibFunc_fmodl); TLI.setUnavailable(LibFunc_frexpl); - TLI.setUnavailable(LibFunc_hypotl); TLI.setUnavailable(LibFunc_ldexpl); TLI.setUnavailable(LibFunc_log10l); TLI.setUnavailable(LibFunc_logl); diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index e039457f313b2..5fd4fd78c28a9 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1215,9 +1215,6 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_fmod: case LibFunc_fmodf: case LibFunc_fmodl: - case LibFunc_hypot: - case LibFunc_hypotf: - case LibFunc_hypotl: case LibFunc_isascii: case LibFunc_isdigit: case LibFunc_labs: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 452d90aa98d88..d8266f4c6703d 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -589,15 +589,6 @@ declare ptr @gets(ptr) ; CHECK: declare noundef i32 @gettimeofday(ptr nocapture noundef, ptr nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @gettimeofday(ptr, ptr) -; CHECK: declare double @hypot(double, double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] -declare double @hypot(double, double) - -; CHECK: declare float @hypotf(float, float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] -declare float @hypotf(float, float) - -; CHECK: declare x86_fp80 @hypotl(x86_fp80, x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] -declare x86_fp80 @hypotl(x86_fp80, x86_fp80) - ; CHECK: declare i32 @isascii(i32) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare i32 @isascii(i32) diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index d52f3c751b066..408b9c3993428 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -602,18 +602,6 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL - - Name: hypot - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - - Name: hypotf - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - - Name: hypotl - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Name: isdigit Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 982d00c5d3359..98f8989d4e6e9 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -249,9 +249,6 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare %struct* @getpwnam(i8*)\n" "declare i8* @gets(i8*)\n" "declare i32 @gettimeofday(%struct*, i8*)\n" - "declare double @hypot(double, double)\n" - "declare float @hypotf(float, float)\n" - "declare x86_fp80 @hypotl(x86_fp80, x86_fp80)\n" "declare i32 @_Z7isasciii(i32)\n" "declare i32 @_Z7isdigiti(i32)\n" "declare i64 @labs(i64)\n" From 1cecc58c3f15e3d0fe97b7f764d498e4005557e0 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 30 Oct 2024 15:13:06 -0700 Subject: [PATCH 65/69] [NVPTX] instcombine known pointer AS checks. (#112964) The change improves the code in general and, as a side effect, avoids crashing on an impossible address space casts guarded by `__isGlobal/__isShared`, which partially fixes https://github.com/llvm/llvm-project/issues/112760 It's still possible to trigger the issue by using explicit AS casts w/o AS checks, but LLVM should no longer crash on valid code. --- llvm/include/llvm/Support/NVPTXAddrSpace.h | 33 +++ .../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 12 +- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 63 +++- .../Transforms/InstCombine/NVPTX/isspacep.ll | 277 ++++++++++++++++++ 4 files changed, 372 insertions(+), 13 deletions(-) create mode 100644 llvm/include/llvm/Support/NVPTXAddrSpace.h create mode 100644 llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h new file mode 100644 index 0000000000000..93eae39e3d230 --- /dev/null +++ b/llvm/include/llvm/Support/NVPTXAddrSpace.h @@ -0,0 +1,33 @@ +//===---------------- NVPTXAddrSpace.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// NVPTX address space definition +/// +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_NVPTXADDRSPACE_H +#define LLVM_SUPPORT_NVPTXADDRSPACE_H + +namespace llvm { +namespace NVPTXAS { +enum AddressSpace : unsigned { + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, + ADDRESS_SPACE_SHARED = 3, + ADDRESS_SPACE_CONST = 4, + ADDRESS_SPACE_LOCAL = 5, + + ADDRESS_SPACE_PARAM = 101, +}; +} // end namespace NVPTXAS + +} // end namespace llvm + +#endif // LLVM_SUPPORT_NVPTXADDRSPACE_H diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 815b600fe93a9..d06e2c00ec3f9 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -16,18 +16,10 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H +#include "llvm/Support/NVPTXAddrSpace.h" namespace llvm { -enum AddressSpace { - ADDRESS_SPACE_GENERIC = 0, - ADDRESS_SPACE_GLOBAL = 1, - ADDRESS_SPACE_SHARED = 3, - ADDRESS_SPACE_CONST = 4, - ADDRESS_SPACE_LOCAL = 5, - - // NVVM Internal - ADDRESS_SPACE_PARAM = 101 -}; +using namespace NVPTXAS; namespace NVPTXII { enum { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index e35ba25b47880..31087a0054e9f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -15,10 +15,12 @@ #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include using namespace llvm; @@ -117,7 +119,8 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { } // Convert NVVM intrinsics to target-generic LLVM code where possible. -static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { +static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, + IntrinsicInst *II) { // Each NVVM intrinsic we can simplify can be replaced with one of: // // * an LLVM intrinsic, @@ -413,11 +416,65 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } +// Returns an instruction pointer (may be nullptr if we do not know the answer). +// Returns nullopt if `II` is not one of the `isspacep` intrinsics. +static std::optional +handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { + Value *Op0 = II.getArgOperand(0); + // Returns true/false when we know the answer, nullopt otherwise. + auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || + AS == NVPTXAS::ADDRESS_SPACE_PARAM) + return std::nullopt; // Got to check at run-time. + switch (IID) { + case Intrinsic::nvvm_isspacep_global: + return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; + case Intrinsic::nvvm_isspacep_local: + return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; + case Intrinsic::nvvm_isspacep_shared: + return AS == NVPTXAS::ADDRESS_SPACE_SHARED; + case Intrinsic::nvvm_isspacep_shared_cluster: + // We can't tell shared from shared_cluster at compile time from AS alone, + // but it can't be either is AS is not shared. + return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt + : std::optional{false}; + case Intrinsic::nvvm_isspacep_const: + return AS == NVPTXAS::ADDRESS_SPACE_CONST; + default: + llvm_unreachable("Unexpected intrinsic"); + } + }; + + switch (auto IID = II.getIntrinsicID()) { + case Intrinsic::nvvm_isspacep_global: + case Intrinsic::nvvm_isspacep_local: + case Intrinsic::nvvm_isspacep_shared: + case Intrinsic::nvvm_isspacep_shared_cluster: + case Intrinsic::nvvm_isspacep_const: { + auto *Ty = II.getType(); + unsigned AS = Op0->getType()->getPointerAddressSpace(); + // Peek through ASC to generic AS. + // TODO: we could dig deeper through both ASCs and GEPs. + if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) + if (auto *ASCO = dyn_cast(Op0)) + AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); + + if (std::optional Answer = CheckASMatch(IID, AS)) + return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer)); + return nullptr; // Don't know the answer, got to check at run time. + } + default: + return std::nullopt; + } +} + std::optional NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { + if (std::optional I = handleSpaceCheckIntrinsics(IC, II)) + return *I; + if (Instruction *I = convertNvvmIntrinsicToLlvm(IC, &II)) return I; - } + return std::nullopt; } diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll new file mode 100644 index 0000000000000..dedd85e1a8cda --- /dev/null +++ b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll @@ -0,0 +1,277 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; Source data in different AS. +@shared_data = dso_local addrspace(3) global i32 undef, align 4 +@global_data = dso_local addrspace(1) externally_initialized global i32 0, align 4 +@const_data = dso_local addrspace(4) externally_initialized constant i32 3, align 4 + +; Results get stored here. +@gen = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@g1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@g2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@s1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@s2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@c1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@c2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 +@l = dso_local addrspace(1) externally_initialized global i8 0, align 1 + +declare i1 @llvm.nvvm.isspacep.global(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.const(ptr nocapture) +declare i1 @llvm.nvvm.isspacep.local(ptr nocapture) + +define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, +; CHECK-LABEL: define dso_local void @check_global( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENP]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %genp) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %gp_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %sp_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %cp_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't ihave a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %lp_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, +; CHECK-LABEL: define dso_local void @check_shared( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENP]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %genp) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %gp_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %sp_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %cp_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %lp_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, +; CHECK-LABEL: define dso_local void @check_const( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENP]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %genp) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %gp_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %sp_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %cp_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %lp_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + +define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, +; CHECK-LABEL: define dso_local void @check_local( +; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENP]]) +; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 +; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 +; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 +; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 +; CHECK-NEXT: ret void +; + ptr addrspace(1) %gp, + ptr addrspace(3) %sp, + ptr addrspace(4) %cp, + ptr addrspace(5) %lp) local_unnamed_addr { +entry: + ; No constant folding for generic pointers of unknown origin. + %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %genp) + %storedv = zext i1 %gen0 to i8 + store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 + + %isg1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) + %isg18 = zext i1 %isg1 to i8 + store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 + + %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr + %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %gp_asc) + %isg28 = zext i1 %isg2 to i8 + store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 + + %iss1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) + %iss18 = zext i1 %iss1 to i8 + store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 + + %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr + %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %sp_asc) + %iss28 = zext i1 %iss2 to i8 + store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 + + %isc1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) + %isc18 = zext i1 %isc1 to i8 + store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 + + %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr + %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %cp_asc) + %isc28 = zext i1 %isc2 to i8 + store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 + + ; Local data can't have a constant address, so we can't have a constant ASC expression + ; We can only use an ASC instruction. + %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr + %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %lp_asc) + %isl8 = zext i1 %isl to i8 + store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 + + ret void +} + From d043670d66ce7958aec7837ee572f3dc8948f11a Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 31 Oct 2024 07:26:12 +0900 Subject: [PATCH 66/69] [mlir][func] Replace `ValueDecomposer` with target materialization (#114192) The `ValueDecomposer` in `DecomposeCallGraphTypes` was a workaround around missing 1:N support in the dialect conversion. Since #113032, the dialect conversion infrastructure supports 1:N type conversions and 1:N target materializations. The `ValueDecomposer` class is no longer needed. (However, target materializations must still be inserted manually, until we fully merge the 1:1 and 1:N drivers.) Note for LLVM integration: Register 1:N target materializations on the type converter instead of "decompose value conversions" on the `ValueDecomposer`. --- .../Func/Transforms/DecomposeCallGraphTypes.h | 62 +--------- .../Transforms/DecomposeCallGraphTypes.cpp | 111 +++++++++--------- .../Func/TestDecomposeCallGraphTypes.cpp | 60 ++++++---- 3 files changed, 93 insertions(+), 140 deletions(-) diff --git a/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h b/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h index 1d311b37b37a4..1be406bf3adf9 100644 --- a/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h +++ b/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h @@ -23,70 +23,10 @@ namespace mlir { -/// This class provides a hook that expands one Value into multiple Value's, -/// with a TypeConverter-inspired callback registration mechanism. -/// -/// For folks that are familiar with the dialect conversion framework / -/// TypeConverter, this is effectively the inverse of a source/argument -/// materialization. A target materialization is not what we want here because -/// it always produces a single Value, but in this case the whole point is to -/// decompose a Value into multiple Value's. -/// -/// The reason we need this inverse is easily understood by looking at what we -/// need to do for decomposing types for a return op. When converting a return -/// op, the dialect conversion framework will give the list of converted -/// operands, and will ensure that each converted operand, even if it expanded -/// into multiple types, is materialized as a single result. We then need to -/// undo that materialization to a single result, which we do with the -/// decomposeValue hooks registered on this object. -/// -/// TODO: Eventually, the type conversion infra should have this hook built-in. -/// See -/// https://llvm.discourse.group/t/extending-type-conversion-infrastructure/779/2 -class ValueDecomposer { -public: - /// This method tries to decompose a value of a certain type using provided - /// decompose callback functions. If it is unable to do so, the original value - /// is returned. - void decomposeValue(OpBuilder &, Location, Type, Value, - SmallVectorImpl &); - - /// This method registers a callback function that will be called to decompose - /// a value of a certain type into 0, 1, or multiple values. - template >::template arg_t<2>> - void addDecomposeValueConversion(FnT &&callback) { - decomposeValueConversions.emplace_back( - wrapDecomposeValueConversionCallback(std::forward(callback))); - } - -private: - using DecomposeValueConversionCallFn = - std::function( - OpBuilder &, Location, Type, Value, SmallVectorImpl &)>; - - /// Generate a wrapper for the given decompose value conversion callback. - template - DecomposeValueConversionCallFn - wrapDecomposeValueConversionCallback(FnT &&callback) { - return - [callback = std::forward(callback)]( - OpBuilder &builder, Location loc, Type type, Value value, - SmallVectorImpl &newValues) -> std::optional { - if (T derivedType = dyn_cast(type)) - return callback(builder, loc, derivedType, value, newValues); - return std::nullopt; - }; - } - - SmallVector decomposeValueConversions; -}; - /// Populates the patterns needed to drive the conversion process for -/// decomposing call graph types with the given `ValueDecomposer`. +/// decomposing call graph types with the given `TypeConverter`. void populateDecomposeCallGraphTypesPatterns(MLIRContext *context, const TypeConverter &typeConverter, - ValueDecomposer &decomposer, RewritePatternSet &patterns); } // namespace mlir diff --git a/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp b/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp index 357f993710a26..de4aba2ed327d 100644 --- a/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp +++ b/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp @@ -14,52 +14,48 @@ using namespace mlir; using namespace mlir::func; //===----------------------------------------------------------------------===// -// ValueDecomposer +// Helper functions //===----------------------------------------------------------------------===// -void ValueDecomposer::decomposeValue(OpBuilder &builder, Location loc, - Type type, Value value, - SmallVectorImpl &results) { - for (auto &conversion : decomposeValueConversions) - if (conversion(builder, loc, type, value, results)) - return; - results.push_back(value); +/// If the given value can be decomposed with the type converter, decompose it. +/// Otherwise, return the given value. +// TODO: Value decomposition should happen automatically through a 1:N adaptor. +// This function will disappear when the 1:1 and 1:N drivers are merged. +static SmallVector decomposeValue(OpBuilder &builder, Location loc, + Value value, + const TypeConverter *converter) { + // Try to convert the given value's type. If that fails, just return the + // given value. + SmallVector convertedTypes; + if (failed(converter->convertType(value.getType(), convertedTypes))) + return {value}; + if (convertedTypes.empty()) + return {}; + + // If the given value's type is already legal, just return the given value. + TypeRange convertedTypeRange(convertedTypes); + if (convertedTypeRange == TypeRange(value.getType())) + return {value}; + + // Try to materialize a target conversion. If the materialization did not + // produce values of the requested type, the materialization failed. Just + // return the given value in that case. + SmallVector result = converter->materializeTargetConversion( + builder, loc, convertedTypeRange, value); + if (result.empty()) + return {value}; + return result; } -//===----------------------------------------------------------------------===// -// DecomposeCallGraphTypesOpConversionPattern -//===----------------------------------------------------------------------===// - -namespace { -/// Base OpConversionPattern class to make a ValueDecomposer available to -/// inherited patterns. -template -class DecomposeCallGraphTypesOpConversionPattern - : public OpConversionPattern { -public: - DecomposeCallGraphTypesOpConversionPattern(const TypeConverter &typeConverter, - MLIRContext *context, - ValueDecomposer &decomposer, - PatternBenefit benefit = 1) - : OpConversionPattern(typeConverter, context, benefit), - decomposer(decomposer) {} - -protected: - ValueDecomposer &decomposer; -}; -} // namespace - //===----------------------------------------------------------------------===// // DecomposeCallGraphTypesForFuncArgs //===----------------------------------------------------------------------===// namespace { -/// Expand function arguments according to the provided TypeConverter and -/// ValueDecomposer. +/// Expand function arguments according to the provided TypeConverter. struct DecomposeCallGraphTypesForFuncArgs - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(func::FuncOp op, OpAdaptor adaptor, @@ -100,19 +96,22 @@ struct DecomposeCallGraphTypesForFuncArgs //===----------------------------------------------------------------------===// namespace { -/// Expand return operands according to the provided TypeConverter and -/// ValueDecomposer. +/// Expand return operands according to the provided TypeConverter. struct DecomposeCallGraphTypesForReturnOp - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult matchAndRewrite(ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { SmallVector newOperands; - for (Value operand : adaptor.getOperands()) - decomposer.decomposeValue(rewriter, op.getLoc(), operand.getType(), - operand, newOperands); + for (Value operand : adaptor.getOperands()) { + // TODO: We can directly take the values from the adaptor once this is a + // 1:N conversion pattern. + llvm::append_range(newOperands, + decomposeValue(rewriter, operand.getLoc(), operand, + getTypeConverter())); + } rewriter.replaceOpWithNewOp(op, newOperands); return success(); } @@ -124,12 +123,9 @@ struct DecomposeCallGraphTypesForReturnOp //===----------------------------------------------------------------------===// namespace { -/// Expand call op operands and results according to the provided TypeConverter -/// and ValueDecomposer. -struct DecomposeCallGraphTypesForCallOp - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; +/// Expand call op operands and results according to the provided TypeConverter. +struct DecomposeCallGraphTypesForCallOp : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(CallOp op, OpAdaptor adaptor, @@ -137,9 +133,13 @@ struct DecomposeCallGraphTypesForCallOp // Create the operands list of the new `CallOp`. SmallVector newOperands; - for (Value operand : adaptor.getOperands()) - decomposer.decomposeValue(rewriter, op.getLoc(), operand.getType(), - operand, newOperands); + for (Value operand : adaptor.getOperands()) { + // TODO: We can directly take the values from the adaptor once this is a + // 1:N conversion pattern. + llvm::append_range(newOperands, + decomposeValue(rewriter, operand.getLoc(), operand, + getTypeConverter())); + } // Create the new result types for the new `CallOp` and track the indices in // the new call op's results that correspond to the old call op's results. @@ -189,9 +189,8 @@ struct DecomposeCallGraphTypesForCallOp void mlir::populateDecomposeCallGraphTypesPatterns( MLIRContext *context, const TypeConverter &typeConverter, - ValueDecomposer &decomposer, RewritePatternSet &patterns) { + RewritePatternSet &patterns) { patterns .add(typeConverter, context, - decomposer); + DecomposeCallGraphTypesForReturnOp>(typeConverter, context); } diff --git a/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp b/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp index 92216da9f201e..de511c58ae6ee 100644 --- a/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp +++ b/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp @@ -21,23 +21,40 @@ namespace { /// given tuple value. If some tuple elements are, in turn, tuples, the elements /// of those are extracted recursively such that the returned values have the /// same types as `resultTypes.getFlattenedTypes()`. -static LogicalResult buildDecomposeTuple(OpBuilder &builder, Location loc, - TupleType resultType, Value value, - SmallVectorImpl &values) { - for (unsigned i = 0, e = resultType.size(); i < e; ++i) { - Type elementType = resultType.getType(i); - Value element = builder.create( - loc, elementType, value, builder.getI32IntegerAttr(i)); - if (auto nestedTupleType = dyn_cast(elementType)) { - // Recurse if the current element is also a tuple. - if (failed(buildDecomposeTuple(builder, loc, nestedTupleType, element, - values))) - return failure(); - } else { - values.push_back(element); +static SmallVector buildDecomposeTuple(OpBuilder &builder, + TypeRange resultTypes, + ValueRange inputs, Location loc) { + // Skip materialization if the single input value is not a tuple. + if (inputs.size() != 1) + return {}; + Value tuple = inputs.front(); + auto tupleType = dyn_cast(tuple.getType()); + if (!tupleType) + return {}; + // Skip materialization if the flattened types do not match the requested + // result types. + SmallVector flattenedTypes; + tupleType.getFlattenedTypes(flattenedTypes); + if (TypeRange(resultTypes) != TypeRange(flattenedTypes)) + return {}; + // Recursively decompose the tuple. + SmallVector result; + std::function decompose = [&](Value tuple) { + auto tupleType = dyn_cast(tuple.getType()); + if (!tupleType) { + // This is not a tuple. + result.push_back(tuple); + return; } - } - return success(); + for (unsigned i = 0, e = tupleType.size(); i < e; ++i) { + Type elementType = tupleType.getType(i); + Value element = builder.create( + loc, elementType, tuple, builder.getI32IntegerAttr(i)); + decompose(element); + } + }; + decompose(tuple); + return result; } /// Creates a `test.make_tuple` op out of the given inputs building a tuple of @@ -82,8 +99,8 @@ static Value buildMakeTupleOp(OpBuilder &builder, TupleType resultType, /// A pass for testing call graph type decomposition. /// -/// This instantiates the patterns with a TypeConverter and ValueDecomposer -/// that splits tuple types into their respective element types. +/// This instantiates the patterns with a TypeConverter that splits tuple types +/// into their respective element types. /// For example, `tuple --> T1, T2, T3`. struct TestDecomposeCallGraphTypes : public PassWrapper> { @@ -123,12 +140,9 @@ struct TestDecomposeCallGraphTypes return success(); }); typeConverter.addArgumentMaterialization(buildMakeTupleOp); + typeConverter.addTargetMaterialization(buildDecomposeTuple); - ValueDecomposer decomposer; - decomposer.addDecomposeValueConversion(buildDecomposeTuple); - - populateDecomposeCallGraphTypesPatterns(context, typeConverter, decomposer, - patterns); + populateDecomposeCallGraphTypesPatterns(context, typeConverter, patterns); if (failed(applyPartialConversion(module, target, std::move(patterns)))) return signalPassFailure(); From 04e876e6c6eee5332f5fff30c8778abe82ebf52f Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 30 Oct 2024 15:34:08 -0700 Subject: [PATCH 67/69] Revert "[NVPTX] instcombine known pointer AS checks." (#114319) Reverts llvm/llvm-project#112964 Crashes MLIR: https://lab.llvm.org/buildbot/#/builders/138/builds/5665 --- llvm/include/llvm/Support/NVPTXAddrSpace.h | 33 --- .../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 12 +- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 63 +--- .../Transforms/InstCombine/NVPTX/isspacep.ll | 277 ------------------ 4 files changed, 13 insertions(+), 372 deletions(-) delete mode 100644 llvm/include/llvm/Support/NVPTXAddrSpace.h delete mode 100644 llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll diff --git a/llvm/include/llvm/Support/NVPTXAddrSpace.h b/llvm/include/llvm/Support/NVPTXAddrSpace.h deleted file mode 100644 index 93eae39e3d230..0000000000000 --- a/llvm/include/llvm/Support/NVPTXAddrSpace.h +++ /dev/null @@ -1,33 +0,0 @@ -//===---------------- NVPTXAddrSpace.h -------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// NVPTX address space definition -/// -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_NVPTXADDRSPACE_H -#define LLVM_SUPPORT_NVPTXADDRSPACE_H - -namespace llvm { -namespace NVPTXAS { -enum AddressSpace : unsigned { - ADDRESS_SPACE_GENERIC = 0, - ADDRESS_SPACE_GLOBAL = 1, - ADDRESS_SPACE_SHARED = 3, - ADDRESS_SPACE_CONST = 4, - ADDRESS_SPACE_LOCAL = 5, - - ADDRESS_SPACE_PARAM = 101, -}; -} // end namespace NVPTXAS - -} // end namespace llvm - -#endif // LLVM_SUPPORT_NVPTXADDRSPACE_H diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index d06e2c00ec3f9..815b600fe93a9 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -16,10 +16,18 @@ #ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H #define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXBASEINFO_H -#include "llvm/Support/NVPTXAddrSpace.h" namespace llvm { -using namespace NVPTXAS; +enum AddressSpace { + ADDRESS_SPACE_GENERIC = 0, + ADDRESS_SPACE_GLOBAL = 1, + ADDRESS_SPACE_SHARED = 3, + ADDRESS_SPACE_CONST = 4, + ADDRESS_SPACE_LOCAL = 5, + + // NVVM Internal + ADDRESS_SPACE_PARAM = 101 +}; namespace NVPTXII { enum { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index 31087a0054e9f..e35ba25b47880 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -15,12 +15,10 @@ #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include using namespace llvm; @@ -119,8 +117,7 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) { } // Convert NVVM intrinsics to target-generic LLVM code where possible. -static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, - IntrinsicInst *II) { +static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // Each NVVM intrinsic we can simplify can be replaced with one of: // // * an LLVM intrinsic, @@ -416,65 +413,11 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, llvm_unreachable("All SpecialCase enumerators should be handled in switch."); } -// Returns an instruction pointer (may be nullptr if we do not know the answer). -// Returns nullopt if `II` is not one of the `isspacep` intrinsics. -static std::optional -handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) { - Value *Op0 = II.getArgOperand(0); - // Returns true/false when we know the answer, nullopt otherwise. - auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional { - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC || - AS == NVPTXAS::ADDRESS_SPACE_PARAM) - return std::nullopt; // Got to check at run-time. - switch (IID) { - case Intrinsic::nvvm_isspacep_global: - return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL; - case Intrinsic::nvvm_isspacep_local: - return AS == NVPTXAS::ADDRESS_SPACE_LOCAL; - case Intrinsic::nvvm_isspacep_shared: - return AS == NVPTXAS::ADDRESS_SPACE_SHARED; - case Intrinsic::nvvm_isspacep_shared_cluster: - // We can't tell shared from shared_cluster at compile time from AS alone, - // but it can't be either is AS is not shared. - return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt - : std::optional{false}; - case Intrinsic::nvvm_isspacep_const: - return AS == NVPTXAS::ADDRESS_SPACE_CONST; - default: - llvm_unreachable("Unexpected intrinsic"); - } - }; - - switch (auto IID = II.getIntrinsicID()) { - case Intrinsic::nvvm_isspacep_global: - case Intrinsic::nvvm_isspacep_local: - case Intrinsic::nvvm_isspacep_shared: - case Intrinsic::nvvm_isspacep_shared_cluster: - case Intrinsic::nvvm_isspacep_const: { - auto *Ty = II.getType(); - unsigned AS = Op0->getType()->getPointerAddressSpace(); - // Peek through ASC to generic AS. - // TODO: we could dig deeper through both ASCs and GEPs. - if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC) - if (auto *ASCO = dyn_cast(Op0)) - AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace(); - - if (std::optional Answer = CheckASMatch(IID, AS)) - return IC.replaceInstUsesWith(II, ConstantInt::get(Ty, *Answer)); - return nullptr; // Don't know the answer, got to check at run time. - } - default: - return std::nullopt; - } -} - std::optional NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { - if (std::optional I = handleSpaceCheckIntrinsics(IC, II)) - return *I; - if (Instruction *I = convertNvvmIntrinsicToLlvm(IC, &II)) + if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) { return I; - + } return std::nullopt; } diff --git a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll b/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll deleted file mode 100644 index dedd85e1a8cda..0000000000000 --- a/llvm/test/Transforms/InstCombine/NVPTX/isspacep.ll +++ /dev/null @@ -1,277 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -; Source data in different AS. -@shared_data = dso_local addrspace(3) global i32 undef, align 4 -@global_data = dso_local addrspace(1) externally_initialized global i32 0, align 4 -@const_data = dso_local addrspace(4) externally_initialized constant i32 3, align 4 - -; Results get stored here. -@gen = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@g1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@g2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@s1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@s2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@c1 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@c2 = dso_local addrspace(1) externally_initialized global i8 0, align 1 -@l = dso_local addrspace(1) externally_initialized global i8 0, align 1 - -declare i1 @llvm.nvvm.isspacep.global(ptr nocapture) -declare i1 @llvm.nvvm.isspacep.shared(ptr nocapture) -declare i1 @llvm.nvvm.isspacep.const(ptr nocapture) -declare i1 @llvm.nvvm.isspacep.local(ptr nocapture) - -define dso_local void @check_global(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, -; CHECK-LABEL: define dso_local void @check_global( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[GENP]]) -; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 -; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 -; CHECK-NEXT: ret void -; - ptr addrspace(1) %gp, - ptr addrspace(3) %sp, - ptr addrspace(4) %cp, - ptr addrspace(5) %lp) local_unnamed_addr { -entry: - ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.global(ptr %genp) - %storedv = zext i1 %gen0 to i8 - store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 - - %isg1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) - %isg18 = zext i1 %isg1 to i8 - store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - - %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %gp_asc) - %isg28 = zext i1 %isg2 to i8 - store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 - - %iss1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) - %iss18 = zext i1 %iss1 to i8 - store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - - %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %sp_asc) - %iss28 = zext i1 %iss2 to i8 - store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 - - %isc1 = tail call i1 @llvm.nvvm.isspacep.global(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) - %isc18 = zext i1 %isc1 to i8 - store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - - %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.global(ptr %cp_asc) - %isc28 = zext i1 %isc2 to i8 - store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 - - ; Local data can't ihave a constant address, so we can't have a constant ASC expression - ; We can only use an ASC instruction. - %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr - %isl = call i1 @llvm.nvvm.isspacep.global(ptr nonnull %lp_asc) - %isl8 = zext i1 %isl to i8 - store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 - - ret void -} - -define dso_local void @check_shared(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, -; CHECK-LABEL: define dso_local void @check_shared( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[GENP]]) -; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 -; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 -; CHECK-NEXT: ret void -; - ptr addrspace(1) %gp, - ptr addrspace(3) %sp, - ptr addrspace(4) %cp, - ptr addrspace(5) %lp) local_unnamed_addr { -entry: - ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %genp) - %storedv = zext i1 %gen0 to i8 - store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 - - %isg1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) - %isg18 = zext i1 %isg1 to i8 - store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - - %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %gp_asc) - %isg28 = zext i1 %isg2 to i8 - store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 - - %iss1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) - %iss18 = zext i1 %iss1 to i8 - store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - - %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %sp_asc) - %iss28 = zext i1 %iss2 to i8 - store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 - - %isc1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) - %isc18 = zext i1 %isc1 to i8 - store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - - %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %cp_asc) - %isc28 = zext i1 %isc2 to i8 - store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 - - ; Local data can't have a constant address, so we can't have a constant ASC expression - ; We can only use an ASC instruction. - %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr - %isl = call i1 @llvm.nvvm.isspacep.shared(ptr nonnull %lp_asc) - %isl8 = zext i1 %isl to i8 - store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 - - ret void -} - -define dso_local void @check_const(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, -; CHECK-LABEL: define dso_local void @check_const( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.const(ptr [[GENP]]) -; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 -; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 -; CHECK-NEXT: ret void -; - ptr addrspace(1) %gp, - ptr addrspace(3) %sp, - ptr addrspace(4) %cp, - ptr addrspace(5) %lp) local_unnamed_addr { -entry: - ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.const(ptr %genp) - %storedv = zext i1 %gen0 to i8 - store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 - - %isg1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) - %isg18 = zext i1 %isg1 to i8 - store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - - %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %gp_asc) - %isg28 = zext i1 %isg2 to i8 - store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 - - %iss1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) - %iss18 = zext i1 %iss1 to i8 - store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - - %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %sp_asc) - %iss28 = zext i1 %iss2 to i8 - store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 - - %isc1 = tail call i1 @llvm.nvvm.isspacep.const(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) - %isc18 = zext i1 %isc1 to i8 - store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - - %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.const(ptr %cp_asc) - %isc28 = zext i1 %isc2 to i8 - store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 - - ; Local data can't have a constant address, so we can't have a constant ASC expression - ; We can only use an ASC instruction. - %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr - %isl = call i1 @llvm.nvvm.isspacep.const(ptr nonnull %lp_asc) - %isl8 = zext i1 %isl to i8 - store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 - - ret void -} - -define dso_local void @check_local(ptr nocapture noundef readnone %out, ptr nocapture noundef readnone %genp, -; CHECK-LABEL: define dso_local void @check_local( -; CHECK-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr nocapture noundef readnone [[GENP:%.*]], ptr addrspace(1) [[GP:%.*]], ptr addrspace(3) [[SP:%.*]], ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[LP:%.*]]) local_unnamed_addr { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[GEN0:%.*]] = tail call i1 @llvm.nvvm.isspacep.local(ptr [[GENP]]) -; CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[GEN0]] to i8 -; CHECK-NEXT: store i8 [[STOREDV]], ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 -; CHECK-NEXT: store i8 0, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 -; CHECK-NEXT: store i8 1, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 -; CHECK-NEXT: ret void -; - ptr addrspace(1) %gp, - ptr addrspace(3) %sp, - ptr addrspace(4) %cp, - ptr addrspace(5) %lp) local_unnamed_addr { -entry: - ; No constant folding for generic pointers of unknown origin. - %gen0 = tail call i1 @llvm.nvvm.isspacep.local(ptr %genp) - %storedv = zext i1 %gen0 to i8 - store i8 %storedv, ptr addrspacecast (ptr addrspace(1) @gen to ptr), align 1 - - %isg1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(1) @global_data to ptr)) - %isg18 = zext i1 %isg1 to i8 - store i8 %isg18, ptr addrspacecast (ptr addrspace(1) @g1 to ptr), align 1 - - %gp_asc = addrspacecast ptr addrspace(1) %gp to ptr - %isg2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %gp_asc) - %isg28 = zext i1 %isg2 to i8 - store i8 %isg28, ptr addrspacecast (ptr addrspace(1) @g2 to ptr), align 1 - - %iss1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(3) @shared_data to ptr)) - %iss18 = zext i1 %iss1 to i8 - store i8 %iss18, ptr addrspacecast (ptr addrspace(1) @s1 to ptr), align 1 - - %sp_asc = addrspacecast ptr addrspace(3) %sp to ptr - %iss2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %sp_asc) - %iss28 = zext i1 %iss2 to i8 - store i8 %iss28, ptr addrspacecast (ptr addrspace(1) @s2 to ptr), align 1 - - %isc1 = tail call i1 @llvm.nvvm.isspacep.local(ptr addrspacecast (ptr addrspace(4) @const_data to ptr)) - %isc18 = zext i1 %isc1 to i8 - store i8 %isc18, ptr addrspacecast (ptr addrspace(1) @c1 to ptr), align 1 - - %cp_asc = addrspacecast ptr addrspace(4) %cp to ptr - %isc2 = tail call i1 @llvm.nvvm.isspacep.local(ptr %cp_asc) - %isc28 = zext i1 %isc2 to i8 - store i8 %isc28, ptr addrspacecast (ptr addrspace(1) @c2 to ptr), align 1 - - ; Local data can't have a constant address, so we can't have a constant ASC expression - ; We can only use an ASC instruction. - %lp_asc = addrspacecast ptr addrspace(5) %lp to ptr - %isl = call i1 @llvm.nvvm.isspacep.local(ptr nonnull %lp_asc) - %isl8 = zext i1 %isl to i8 - store i8 %isl8, ptr addrspacecast (ptr addrspace(1) @l to ptr), align 1 - - ret void -} - From 67c485798a16c4c656ff7a8a38cc98fe46d25154 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 30 Oct 2024 15:39:32 -0700 Subject: [PATCH 68/69] [mlir][spirv] Ignore extra comma for category_args in gen_spirv_dialect.py (#111776) In the code being parsed, the comma separates following traits from the category args. If there's no category args, it is still present. --- mlir/utils/spirv/gen_spirv_dialect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/utils/spirv/gen_spirv_dialect.py b/mlir/utils/spirv/gen_spirv_dialect.py index 2fb540ef10325..70c3d9db16889 100755 --- a/mlir/utils/spirv/gen_spirv_dialect.py +++ b/mlir/utils/spirv/gen_spirv_dialect.py @@ -989,6 +989,7 @@ def extract_td_op_info(op_def): op_tmpl_params, _ = get_string_between_nested(op_def, "<", ">") opstringname, rest = get_string_between(op_tmpl_params, '"', '"') category_args = rest.split("[", 1)[0] + category_args = category_args.rsplit(",", 1)[0] # Get traits traits, _ = get_string_between_nested(rest, "[", "]") From 6e75eec866133620dcba956bc7d6dbc554642249 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 30 Oct 2024 15:40:08 -0700 Subject: [PATCH 69/69] [mlir][spirv] Remove code for de-duplicating symbols in SPIR-V grammar (#111778) SPIR-V grammar was updated in upstream to have an "aliases" field instead of duplicating symbols with same values. See https://github.com/KhronosGroup/SPIRV-Headers/pull/447 for details. --- mlir/utils/spirv/gen_spirv_dialect.py | 101 +++----------------------- 1 file changed, 10 insertions(+), 91 deletions(-) diff --git a/mlir/utils/spirv/gen_spirv_dialect.py b/mlir/utils/spirv/gen_spirv_dialect.py index 70c3d9db16889..99ed3489b4cbd 100755 --- a/mlir/utils/spirv/gen_spirv_dialect.py +++ b/mlir/utils/spirv/gen_spirv_dialect.py @@ -127,44 +127,6 @@ def split_list_into_sublists(items): return chuncks -def uniquify_enum_cases(lst): - """Prunes duplicate enum cases from the list. - - Arguments: - - lst: List whose elements are to be uniqued. Assumes each element is a - (symbol, value) pair and elements already sorted according to value. - - Returns: - - A list with all duplicates removed. The elements are sorted according to - value and, for each value, uniqued according to symbol. - original list, - - A map from deduplicated cases to the uniqued case. - """ - cases = lst - uniqued_cases = [] - duplicated_cases = {} - - # First sort according to the value - cases.sort(key=lambda x: x[1]) - - # Then group them according to the value - for _, groups in itertools.groupby(cases, key=lambda x: x[1]): - # For each value, sort according to the enumerant symbol. - sorted_group = sorted(groups, key=lambda x: x[0]) - # Keep the "smallest" case, which is typically the symbol without extension - # suffix. But we have special cases that we want to fix. - case = sorted_group[0] - for i in range(1, len(sorted_group)): - duplicated_cases[sorted_group[i][0]] = case[0] - if case[0] == "HlslSemanticGOOGLE": - assert len(sorted_group) == 2, "unexpected new variant for HlslSemantic" - case = sorted_group[1] - duplicated_cases[sorted_group[0][0]] = case[0] - uniqued_cases.append(case) - - return uniqued_cases, duplicated_cases - - def toposort(dag, sort_fn): """Topologically sorts the given dag. @@ -197,14 +159,12 @@ def get_next_batch(dag): return sorted_nodes -def toposort_capabilities(all_cases, capability_mapping): +def toposort_capabilities(all_cases): """Returns topologically sorted capability (symbol, value) pairs. Arguments: - all_cases: all capability cases (containing symbol, value, and implied capabilities). - - capability_mapping: mapping from duplicated capability symbols to the - canonicalized symbol chosen for SPIRVBase.td. Returns: A list containing topologically sorted capability (symbol, value) pairs. @@ -215,13 +175,10 @@ def toposort_capabilities(all_cases, capability_mapping): # Get the current capability. cur = case["enumerant"] name_to_value[cur] = case["value"] - # Ignore duplicated symbols. - if cur in capability_mapping: - continue # Get capabilities implied by the current capability. prev = case.get("capabilities", []) - uniqued_prev = set([capability_mapping.get(c, c) for c in prev]) + uniqued_prev = set(prev) dag[cur] = uniqued_prev sorted_caps = toposort(dag, lambda x: name_to_value[x]) @@ -229,36 +186,12 @@ def toposort_capabilities(all_cases, capability_mapping): return [(c, name_to_value[c]) for c in sorted_caps] -def get_capability_mapping(operand_kinds): - """Returns the capability mapping from duplicated cases to canonicalized ones. - - Arguments: - - operand_kinds: all operand kinds' grammar spec - - Returns: - - A map mapping from duplicated capability symbols to the canonicalized - symbol chosen for SPIRVBase.td. - """ - # Find the operand kind for capability - cap_kind = {} - for kind in operand_kinds: - if kind["kind"] == "Capability": - cap_kind = kind - - kind_cases = [(case["enumerant"], case["value"]) for case in cap_kind["enumerants"]] - _, capability_mapping = uniquify_enum_cases(kind_cases) - - return capability_mapping - - -def get_availability_spec(enum_case, capability_mapping, for_op, for_cap): +def get_availability_spec(enum_case, for_op, for_cap): """Returns the availability specification string for the given enum case. Arguments: - enum_case: the enum case to generate availability spec for. It may contain 'version', 'lastVersion', 'extensions', or 'capabilities'. - - capability_mapping: mapping from duplicated capability symbols to the - canonicalized symbol chosen for SPIRVBase.td. - for_op: bool value indicating whether this is the availability spec for an op itself. - for_cap: bool value indicating whether this is the availability spec for @@ -313,10 +246,7 @@ def get_availability_spec(enum_case, capability_mapping, for_op, for_cap): if caps: canonicalized_caps = [] for c in caps: - if c in capability_mapping: - canonicalized_caps.append(capability_mapping[c]) - else: - canonicalized_caps.append(c) + canonicalized_caps.append(c) prefixed_caps = [ "SPIRV_C_{}".format(c) for c in sorted(set(canonicalized_caps)) ] @@ -357,7 +287,7 @@ def get_availability_spec(enum_case, capability_mapping, for_op, for_cap): return "{}{}{}".format(implies, "\n " if implies and avail else "", avail) -def gen_operand_kind_enum_attr(operand_kind, capability_mapping): +def gen_operand_kind_enum_attr(operand_kind): """Generates the TableGen EnumAttr definition for the given operand kind. Returns: @@ -388,13 +318,12 @@ def get_case_symbol(kind_name, case_name): # Special treatment for capability cases: we need to sort them topologically # because a capability can refer to another via the 'implies' field. kind_cases = toposort_capabilities( - operand_kind["enumerants"], capability_mapping + operand_kind["enumerants"] ) else: kind_cases = [ (case["enumerant"], case["value"]) for case in operand_kind["enumerants"] ] - kind_cases, _ = uniquify_enum_cases(kind_cases) max_len = max([len(symbol) for (symbol, _) in kind_cases]) # Generate the definition for each enum case @@ -412,7 +341,6 @@ def get_case_symbol(kind_name, case_name): value = int(case_pair[1]) avail = get_availability_spec( name_to_case_dict[name], - capability_mapping, False, kind_name == "Capability", ) @@ -648,11 +576,9 @@ def update_td_enum_attrs(path, operand_kinds, filter_list): ] filter_list.extend(existing_kinds) - capability_mapping = get_capability_mapping(operand_kinds) - # Generate definitions for all enums in filter list defs = [ - gen_operand_kind_enum_attr(kind, capability_mapping) + gen_operand_kind_enum_attr(kind) for kind in operand_kinds if kind["kind"] in filter_list ] @@ -762,7 +688,7 @@ def get_description(text, appendix): def get_op_definition( - instruction, opname, doc, existing_info, capability_mapping, settings + instruction, opname, doc, existing_info, settings ): """Generates the TableGen op definition for the given SPIR-V instruction. @@ -771,8 +697,6 @@ def get_op_definition( - doc: the instruction's SPIR-V HTML doc - existing_info: a dict containing potential manually specified sections for this instruction - - capability_mapping: mapping from duplicated capability symbols to the - canonicalized symbol chosen for SPIRVBase.td Returns: - A string containing the TableGen op definition @@ -840,7 +764,7 @@ def get_op_definition( operands = instruction.get("operands", []) # Op availability - avail = get_availability_spec(instruction, capability_mapping, True, False) + avail = get_availability_spec(instruction, True, False) if avail: avail = "\n\n {0}".format(avail) @@ -1021,7 +945,7 @@ def extract_td_op_info(op_def): def update_td_op_definitions( - path, instructions, docs, filter_list, inst_category, capability_mapping, settings + path, instructions, docs, filter_list, inst_category, settings ): """Updates SPIRVOps.td with newly generated op definition. @@ -1030,8 +954,6 @@ def update_td_op_definitions( - instructions: SPIR-V JSON grammar for all instructions - docs: SPIR-V HTML doc for all instructions - filter_list: a list containing new opnames to include - - capability_mapping: mapping from duplicated capability symbols to the - canonicalized symbol chosen for SPIRVBase.td. Returns: - A string containing all the TableGen op definitions @@ -1079,7 +1001,6 @@ def update_td_op_definitions( opname, docs[fixed_opname], op_info_dict.get(opname, {"inst_category": inst_category}), - capability_mapping, settings, ) ) @@ -1186,14 +1107,12 @@ def update_td_op_definitions( if args.new_inst is not None: assert args.op_td_path is not None docs = get_spirv_doc_from_html_spec(ext_html_url, args) - capability_mapping = get_capability_mapping(operand_kinds) update_td_op_definitions( args.op_td_path, instructions, docs, args.new_inst, args.inst_category, - capability_mapping, args, ) print("Done. Note that this script just generates a template; ", end="")