Skip to content

Commit

Permalink
[AArch64][SVE] Add ISel patterns for floating point compare with zero…
Browse files Browse the repository at this point in the history
… instructions

Additionally, lower the floating point compare SVE intrinsics to
SETCC_MERGE_ZERO ISD nodes to avoid duplicating ISel patterns.

Differential Revision: https://reviews.llvm.org/D105486
  • Loading branch information
brads55 committed Jul 8, 2021
1 parent 767eb9f commit 026bb84
Show file tree
Hide file tree
Showing 7 changed files with 319 additions and 145 deletions.
37 changes: 21 additions & 16 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14401,29 +14401,34 @@ static SDValue performIntrinsicCombine(SDNode *N,
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
break;
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_cmpge:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
break;
case Intrinsic::aarch64_sve_fcmpgt:
case Intrinsic::aarch64_sve_cmpgt:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
break;
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_cmpeq:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
break;
case Intrinsic::aarch64_sve_fcmpne:
case Intrinsic::aarch64_sve_cmpne:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;
case Intrinsic::aarch64_sve_fcmpuo:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUO));
break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
Expand Down
22 changes: 11 additions & 11 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1255,20 +1255,20 @@ let Predicates = [HasSVE] in {
defm CMPLO_PPzZI : sve_int_ucmp_vi<0b10, "cmplo", SETULT, SETUGT>;
defm CMPLS_PPzZI : sve_int_ucmp_vi<0b11, "cmpls", SETULE, SETUGE>;

defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", int_aarch64_sve_fcmpge, SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", int_aarch64_sve_fcmpgt, SETOGT, SETGT, SETOLT, SETLT>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", int_aarch64_sve_fcmpeq, SETOEQ, SETEQ, SETOEQ, SETEQ>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", int_aarch64_sve_fcmpne, SETONE, SETNE, SETONE, SETNE>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", int_aarch64_sve_fcmpuo, SETUO, SETUO, SETUO, SETUO>;
defm FCMGE_PPzZZ : sve_fp_3op_p_pd_cc<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZZ : sve_fp_3op_p_pd_cc<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
defm FCMEQ_PPzZZ : sve_fp_3op_p_pd_cc<0b010, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
defm FCMNE_PPzZZ : sve_fp_3op_p_pd_cc<0b011, "fcmne", SETONE, SETNE, SETONE, SETNE>;
defm FCMUO_PPzZZ : sve_fp_3op_p_pd_cc<0b100, "fcmuo", SETUO, SETUO, SETUO, SETUO>;
defm FACGE_PPzZZ : sve_fp_3op_p_pd<0b101, "facge", int_aarch64_sve_facge>;
defm FACGT_PPzZZ : sve_fp_3op_p_pd<0b111, "facgt", int_aarch64_sve_facgt>;

defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge">;
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt">;
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt">;
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle">;
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq">;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne">;
defm FCMGE_PPzZ0 : sve_fp_2op_p_pd<0b000, "fcmge", SETOGE, SETGE, SETOLE, SETLE>;
defm FCMGT_PPzZ0 : sve_fp_2op_p_pd<0b001, "fcmgt", SETOGT, SETGT, SETOLT, SETLT>;
defm FCMLT_PPzZ0 : sve_fp_2op_p_pd<0b010, "fcmlt", SETOLT, SETLT, SETOGT, SETGT>;
defm FCMLE_PPzZ0 : sve_fp_2op_p_pd<0b011, "fcmle", SETOLE, SETLE, SETOGE, SETGE>;
defm FCMEQ_PPzZ0 : sve_fp_2op_p_pd<0b100, "fcmeq", SETOEQ, SETEQ, SETOEQ, SETEQ>;
defm FCMNE_PPzZ0 : sve_fp_2op_p_pd<0b110, "fcmne", SETONE, SETNE, SETONE, SETNE>;

defm WHILELT_PWW : sve_int_while4_rr<0b010, "whilelt", int_aarch64_sve_whilelt>;
defm WHILELE_PWW : sve_int_while4_rr<0b011, "whilele", int_aarch64_sve_whilele>;
Expand Down
35 changes: 31 additions & 4 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -4394,6 +4394,14 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
(cmp $Op1, $Op3, $Op2)>;
}

multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt,
ValueType intvt, Instruction cmp> {
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, (SVEDup0), cc)),
(cmp $Op1, $Op2)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
(cmp $Op1, $Op2)>;
}

multiclass sve_int_cmp_0<bits<3> opc, string asm, CondCode cc, CondCode invcc> {
def _B : sve_int_cmp<0b0, 0b00, opc, asm, PPR8, ZPR8, ZPR8>;
def _H : sve_int_cmp<0b0, 0b01, opc, asm, PPR16, ZPR16, ZPR16>;
Expand Down Expand Up @@ -4754,10 +4762,13 @@ multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
def : SVE_3_Op_Pat<nxv2i1, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}

multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm, SDPatternOperator op,
multiclass sve_fp_3op_p_pd_cc<bits<3> opc, string asm,
CondCode cc1, CondCode cc2,
CondCode invcc1, CondCode invcc2>
: sve_fp_3op_p_pd<opc, asm, op> {
CondCode invcc1, CondCode invcc2> {
def _H : sve_fp_3op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
def _S : sve_fp_3op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
def _D : sve_fp_3op_p_pd<0b11, opc, asm, PPR64, ZPR64>;

defm : SVE_SETCC_Pat<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
Expand Down Expand Up @@ -4797,10 +4808,26 @@ class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{3-0} = Pd;
}

multiclass sve_fp_2op_p_pd<bits<3> opc, string asm> {
multiclass sve_fp_2op_p_pd<bits<3> opc, string asm,
CondCode cc1, CondCode cc2,
CondCode invcc1, CondCode invcc2> {
def _H : sve_fp_2op_p_pd<0b01, opc, asm, PPR16, ZPR16>;
def _S : sve_fp_2op_p_pd<0b10, opc, asm, PPR32, ZPR32>;
def _D : sve_fp_2op_p_pd<0b11, opc, asm, PPR64, ZPR64>;

defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat_With_Zero<cc1, invcc1, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;

defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
defm : SVE_SETCC_Pat_With_Zero<cc2, invcc2, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}


Expand Down
114 changes: 114 additions & 0 deletions llvm/test/CodeGen/AArch64/sve-fcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,117 @@ define <vscale x 4 x i1> @ne_fast(<vscale x 4 x float> %x, <vscale x 4 x float>
%y = fcmp fast one <vscale x 4 x float> %x, %x2
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @oeq_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: oeq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp oeq <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ogt_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ogt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp ogt <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @oge_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: oge_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp oge <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @olt_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: olt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmlt p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp olt <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ole_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ole_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp ole <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @one_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: one_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ret
%y = fcmp one <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ueq_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ueq_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ueq <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ugt_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ugt_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmle p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ugt <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @uge_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: uge_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmlt p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp uge <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ult_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ult_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ult <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @ule_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: ule_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp ule <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
define <vscale x 4 x i1> @une_zero(<vscale x 4 x float> %x) {
; CHECK-LABEL: une_zero:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: not p0.b, p0/z, p1.b
; CHECK-NEXT: ret
%y = fcmp une <vscale x 4 x float> %x, zeroinitializer
ret <vscale x 4 x i1> %y
}
Loading

0 comments on commit 026bb84

Please sign in to comment.