diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst index 76fdf55995059..d937cbfdf583c 100644 --- a/clang/docs/AddressSanitizer.rst +++ b/clang/docs/AddressSanitizer.rst @@ -26,7 +26,7 @@ Typical slowdown introduced by AddressSanitizer is **2x**. How to build ============ -Build LLVM/Clang with `CMake ` and enable +Build LLVM/Clang with `CMake `_ and enable the ``compiler-rt`` runtime. An example CMake configuration that will allow for the use/testing of AddressSanitizer: diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index b09162cd99f45..41b8bbb33baf1 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -21,7 +21,7 @@ The runtime slowdown introduced by RealtimeSanitizer is negligible. How to build ============ -Build LLVM/Clang with `CMake ` and enable the +Build LLVM/Clang with `CMake `_ and enable the ``compiler-rt`` runtime. An example CMake configuration that will allow for the use/testing of RealtimeSanitizer: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index aef637c65e1b0..fcf00d5ac0e8d 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -349,6 +349,11 @@ Modified Compiler Flags to utilize these vector libraries. The behavior for all other vector function libraries remains unchanged. +- The ``-Wnontrivial-memaccess`` warning has been updated to also warn about + passing non-trivially-copyable destrination parameter to ``memcpy``, + ``memset`` and similar functions for which it is a documented undefined + behavior. + Removed Compiler Flags ------------------------- diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h index 9d81cacb50735..713494178b97b 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h +++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h @@ -37,6 +37,14 @@ struct UncheckedOptionalAccessModelOptions { /// can't identify when their results are used safely (across calls), /// resulting in false positives in all such cases. Note: this option does not /// cover access through `operator[]`. + /// FIXME: we currently cache and equate the result of const accessors + /// returning pointers, so cover the case of operator-> followed by + /// operator->, which covers the common case of smart pointers. We also cover + /// some limited cases of returning references (if return type is an optional + /// type), so cover some cases of operator* followed by operator*. We don't + /// cover mixing operator-> and operator*. Once we are confident in this const + /// accessor caching, we shouldn't need the IgnoreSmartPointerDereference + /// option anymore. bool IgnoreSmartPointerDereference = false; }; diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 90475a361bb8f..9bd67e0cefebc 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4871,6 +4871,12 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_splitdouble"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9b9bdd7c800e3..34ff49d7238a7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -795,6 +795,10 @@ def warn_cstruct_memaccess : Warning< "%1 call is a pointer to record %2 that is not trivial to " "%select{primitive-default-initialize|primitive-copy}3">, InGroup; +def warn_cxxstruct_memaccess : Warning< + "first argument in call to " + "%0 is a pointer to non-trivially copyable type %1">, + InGroup; def note_nontrivial_field : Note< "field is non-trivial to %select{copy|default-initialize}0">; def err_non_trivial_c_union_in_invalid_context : Error< diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 1ab3b5e5f8156..dd384c1d76c5f 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -227,7 +227,7 @@ class alignas(8) Module { /// A mapping from the submodule name to the index into the /// \c SubModules vector at which that submodule resides. - llvm::StringMap SubModuleIndex; + mutable llvm::StringMap SubModuleIndex; /// The AST file if this is a top-level module which has a /// corresponding serialized AST file, or null otherwise. @@ -612,7 +612,6 @@ class alignas(8) Module { void setParent(Module *M) { assert(!Parent); Parent = M; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 75b567a347cb6..53e9e0ec83ddb 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -546,6 +546,17 @@ class ModuleMap { std::pair findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, bool IsExplicit); + /// Call \c ModuleMap::findOrCreateModule and throw away the information + /// whether the module was found or created. + Module *findOrCreateModuleFirst(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + return findOrCreateModule(Name, Parent, IsFramework, IsExplicit).first; + } + /// Create new submodule, assuming it does not exist. This function can only + /// be called when it is guaranteed that this submodule does not exist yet. + /// The parameters have same semantics as \c ModuleMap::findOrCreateModule. + Module *createModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); /// Create a global module fragment for a C++ module unit. /// diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 31ae2b94f5b61..da5dda063344f 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -338,6 +338,11 @@ auto isZeroParamConstMemberCall() { callee(cxxMethodDecl(parameterCountIs(0), isConst()))); } +auto isZeroParamConstMemberOperatorCall() { + return cxxOperatorCallExpr( + callee(cxxMethodDecl(parameterCountIs(0), isConst()))); +} + auto isNonConstMemberCall() { return cxxMemberCallExpr(callee(cxxMethodDecl(unless(isConst())))); } @@ -572,9 +577,10 @@ void handleConstMemberCall(const CallExpr *CE, return; } - // Cache if the const method returns a boolean type. + // Cache if the const method returns a boolean or pointer type. // We may decide to cache other return types in the future. - if (RecordLoc != nullptr && CE->getType()->isBooleanType()) { + if (RecordLoc != nullptr && + (CE->getType()->isBooleanType() || CE->getType()->isPointerType())) { Value *Val = State.Lattice.getOrCreateConstMethodReturnValue(*RecordLoc, CE, State.Env); if (Val == nullptr) @@ -597,6 +603,14 @@ void transferValue_ConstMemberCall(const CXXMemberCallExpr *MCE, MCE, dataflow::getImplicitObjectLocation(*MCE, State.Env), Result, State); } +void transferValue_ConstMemberOperatorCall( + const CXXOperatorCallExpr *OCE, const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + auto *RecordLoc = cast_or_null( + State.Env.getStorageLocation(*OCE->getArg(0))); + handleConstMemberCall(OCE, RecordLoc, Result, State); +} + void handleNonConstMemberCall(const CallExpr *CE, dataflow::RecordStorageLocation *RecordLoc, const MatchFinder::MatchResult &Result, @@ -1020,6 +1034,8 @@ auto buildTransferMatchSwitch() { // const accessor calls .CaseOfCFGStmt(isZeroParamConstMemberCall(), transferValue_ConstMemberCall) + .CaseOfCFGStmt(isZeroParamConstMemberOperatorCall(), + transferValue_ConstMemberOperatorCall) // non-const member calls that may modify the state of an object. .CaseOfCFGStmt(isNonConstMemberCall(), transferValue_NonConstMemberCall) diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index a7a3f6b37efef..330108d5b3e47 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name, NoUndeclaredIncludes = Parent->NoUndeclaredIncludes; ModuleMapIsPrivate = Parent->ModuleMapIsPrivate; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } } @@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) { } Module *Module::findSubmodule(StringRef Name) const { - llvm::StringMap::const_iterator Pos = SubModuleIndex.find(Name); - if (Pos == SubModuleIndex.end()) - return nullptr; + // Add new submodules into the index. + for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I) + SubModuleIndex[SubModules[I]->Name] = I; - return SubModules[Pos->getValue()]; + if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end()) + return SubModules[It->second]; + + return nullptr; } Module *Module::getGlobalModuleFragment() const { diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index d067ec218b527..700c2f9a5dbd1 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1465,7 +1465,7 @@ bool X86TargetInfo::validateAsmConstraint( } case 'f': // Any x87 floating point stack register. // Constraint 'f' cannot be used for output operands. - if (Info.ConstraintStr[0] == '=') + if (Info.ConstraintStr[0] == '=' || Info.ConstraintStr[0] == '+') return false; Info.setAllowsRegister(); return true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index df1f4017606f7..82e6326a1c42a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17,6 +17,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" +#include "CGValue.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -25,8 +26,10 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/OSLog.h" #include "clang/AST/OperationKinds.h" +#include "clang/AST/Type.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" @@ -67,6 +70,7 @@ #include "llvm/TargetParser/X86TargetParser.h" #include #include +#include using namespace clang; using namespace CodeGen; @@ -95,6 +99,76 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, I->addAnnotationMetadata("auto-init"); } +static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); + const auto *OutArg1 = dyn_cast(E->getArg(1)); + const auto *OutArg2 = dyn_cast(E->getArg(2)); + + CallArgList Args; + LValue Op1TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); + LValue Op2TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); + + if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) + Args.reverseWritebacks(); + + Value *LowBits = nullptr; + Value *HighBits = nullptr; + + if (CGF->CGM.getTarget().getTriple().isDXIL()) { + + llvm::Type *RetElementTy = CGF->Int32Ty; + if (auto *Op0VecTy = E->getArg(0)->getType()->getAs()) + RetElementTy = llvm::VectorType::get( + CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); + auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy); + + CallInst *CI = CGF->Builder.CreateIntrinsic( + RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble"); + + LowBits = CGF->Builder.CreateExtractValue(CI, 0); + HighBits = CGF->Builder.CreateExtractValue(CI, 1); + + } else { + // For Non DXIL targets we generate the instructions. + + if (!Op0->getType()->isVectorTy()) { + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); + Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); + + LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1); + } else { + int NumElements = 1; + if (const auto *VecTy = + E->getArg(0)->getType()->getAs()) + NumElements = VecTy->getNumElements(); + + FixedVectorType *Uint32VecTy = + FixedVectorType::get(CGF->Int32Ty, NumElements * 2); + Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy); + if (NumElements == 1) { + LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1); + } else { + SmallVector EvenMask, OddMask; + for (int I = 0, E = NumElements; I != E; ++I) { + EvenMask.push_back(I * 2); + OddMask.push_back(I * 2 + 1); + } + LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask); + HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask); + } + } + } + CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress()); + auto *LastInst = + CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress()); + CGF->EmitWritebacks(Args); + return LastInst; +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -18956,6 +19030,14 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.radians"); } + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { + + assert((E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() && + E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) && + "asuint operands types mismatch"); + return handleHlslSplitdouble(E, this); + } } return nullptr; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 72fd357be0f7d..fe581b01a365c 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Support/Path.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace clang; @@ -4257,12 +4258,6 @@ static void emitWriteback(CodeGenFunction &CGF, CGF.EmitBlock(contBB); } -static void emitWritebacks(CodeGenFunction &CGF, - const CallArgList &args) { - for (const auto &I : args.writebacks()) - emitWriteback(CGF, I); -} - static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF, const CallArgList &CallArgs) { ArrayRef Cleanups = @@ -4731,6 +4726,11 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { IsUsed = true; } +void CodeGenFunction::EmitWritebacks(const CallArgList &args) { + for (const auto &I : args.writebacks()) + emitWriteback(*this, I); +} + void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, QualType type) { DisableDebugLocationUpdates Dis(*this, E); @@ -5954,7 +5954,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Emit any call-associated writebacks immediately. Arguably this // should happen after any return-value munging. if (CallArgs.hasWritebacks()) - emitWritebacks(*this, CallArgs); + EmitWritebacks(CallArgs); // The stack cleanup for inalloca arguments has to run out of the normal // lexical order, so deactivate it and run it manually here. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 28d185d1a6b6f..d31763ca496be 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5460,9 +5460,8 @@ LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) { return getOrCreateOpaqueLValueMapping(e); } -void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, - CallArgList &Args, QualType Ty) { - +std::pair +CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) { // Emitting the casted temporary through an opaque value. LValue BaseLV = EmitLValue(E->getArgLValue()); OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV); @@ -5476,6 +5475,13 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, TempLV); OpaqueValueMappingData::bind(*this, E->getCastedTemporary(), TempLV); + return std::make_pair(BaseLV, TempLV); +} + +LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, + CallArgList &Args, QualType Ty) { + + auto [BaseLV, TempLV] = EmitHLSLOutArgLValues(E, Ty); llvm::Value *Addr = TempLV.getAddress().getBasePointer(); llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType()); @@ -5488,6 +5494,7 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(), LifetimeSize); Args.add(RValue::get(TmpAddr, *this), Ty); + return TempLV; } LValue diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e06efe0b7e8dc..192f488d97ec2 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4379,8 +4379,11 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e); LValue EmitHLSLArrayAssignLValue(const BinaryOperator *E); - void EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, - QualType Ty); + + std::pair EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, + QualType Ty); + LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, + QualType Ty); Address EmitExtVectorElementLValue(LValue V); @@ -5240,6 +5243,9 @@ class CodeGenFunction : public CodeGenTypeCache { SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum); + /// EmitWriteback - Emit callbacks for function. + void EmitWritebacks(const CallArgList &Args); + /// EmitCallArg - Emit a single call argument. void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 30dce60b3ff70..8ade4b27f360f 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -438,6 +438,24 @@ template constexpr uint asuint(T F) { return __detail::bit_cast(F); } +//===----------------------------------------------------------------------===// +// asuint splitdouble builtins +//===----------------------------------------------------------------------===// + +/// \fn void asuint(double D, out uint lowbits, out int highbits) +/// \brief Split and interprets the lowbits and highbits of double D into uints. +/// \param D The input double. +/// \param lowbits The output lowbits of D. +/// \param highbits The output highbits of D. +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double, out uint, out uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double2, out uint2, out uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double3, out uint3, out uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double4, out uint4, out uint4); + //===----------------------------------------------------------------------===// // atan builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 201ab91cf68ca..dc9d2bfd5629c 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(SkippedDir.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false, + Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. @@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(File.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false, + Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); @@ -866,6 +866,15 @@ std::pair ModuleMap::findOrCreateModule(StringRef Name, return std::make_pair(Sub, false); // Create a new module with this name. + Module *M = createModule(Name, Parent, IsFramework, IsExplicit); + return std::make_pair(M, true); +} + +Module *ModuleMap::createModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + assert(lookupModuleQualified(Name, Parent) == nullptr && + "Creating duplicate submodule"); + Module *Result = new (ModulesAlloc.Allocate()) Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent, IsFramework, IsExplicit, NumCreatedModules++); @@ -875,7 +884,7 @@ std::pair ModuleMap::findOrCreateModule(StringRef Name, Modules[Name] = Result; ModuleScopeIDs[Result] = CurrentModuleScopeID; } - return std::make_pair(Result, true); + return Result; } Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc, @@ -2123,9 +2132,8 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule = Map.createShadowedModule(ModuleName, Framework, ShadowingModule); } else { - ActiveModule = - Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit) - .first; + ActiveModule = Map.findOrCreateModuleFirst(ModuleName, ActiveModule, + Framework, Explicit); } ActiveModule->DefinitionLoc = ModuleNameLoc; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 27b274d74ce71..d027e4c6dfdb4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -8899,18 +8899,36 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << Call->getCallee()->getSourceRange()); else if (const auto *RT = PointeeTy->getAs()) { + + bool IsTriviallyCopyableCXXRecord = + RT->desugar().isTriviallyCopyableType(Context); + if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && RT->getDecl()->isNonTrivialToPrimitiveDefaultInitialize()) { DiagRuntimeBehavior(Dest->getExprLoc(), Dest, PDiag(diag::warn_cstruct_memaccess) << ArgIdx << FnName << PointeeTy << 0); SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this); + } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && + !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + // FIXME: Limiting this warning to dest argument until we decide + // whether it's valid for source argument too. + DiagRuntimeBehavior(Dest->getExprLoc(), Dest, + PDiag(diag::warn_cxxstruct_memaccess) + << FnName << PointeeTy); } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && RT->getDecl()->isNonTrivialToPrimitiveCopy()) { DiagRuntimeBehavior(Dest->getExprLoc(), Dest, PDiag(diag::warn_cstruct_memaccess) << ArgIdx << FnName << PointeeTy << 1); SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this); + } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && + !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + // FIXME: Limiting this warning to dest argument until we decide + // whether it's valid for source argument too. + DiagRuntimeBehavior(Dest->getExprLoc(), Dest, + PDiag(diag::warn_cxxstruct_memaccess) + << FnName << PointeeTy); } else { continue; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 1f6c5b8d4561b..a472538236e2d 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1698,18 +1698,27 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return true; } -static bool CheckArgsTypesAreCorrect( +bool CheckArgTypeIsCorrect( + Sema *S, Expr *Arg, QualType ExpectedType, + llvm::function_ref Check) { + QualType PassedType = Arg->getType(); + if (Check(PassedType)) { + if (auto *VecTyA = PassedType->getAs()) + ExpectedType = S->Context.getVectorType( + ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); + S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible) + << PassedType << ExpectedType << 1 << 0 << 0; + return true; + } + return false; +} + +bool CheckAllArgTypesAreCorrect( Sema *S, CallExpr *TheCall, QualType ExpectedType, llvm::function_ref Check) { for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { - QualType PassedType = TheCall->getArg(i)->getType(); - if (Check(PassedType)) { - if (auto *VecTyA = PassedType->getAs()) - ExpectedType = S->Context.getVectorType( - ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); - S->Diag(TheCall->getArg(0)->getBeginLoc(), - diag::err_typecheck_convert_incompatible) - << PassedType << ExpectedType << 1 << 0 << 0; + Expr *Arg = TheCall->getArg(i); + if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) { return true; } } @@ -1720,8 +1729,8 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasFloatingRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkAllFloatTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkAllFloatTypes); } static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { @@ -1732,8 +1741,19 @@ static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { : PassedType; return !BaseType->isHalfType() && !BaseType->isFloat32Type(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkFloatorHalf); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkFloatorHalf); +} + +static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall, + unsigned ArgIndex) { + auto *Arg = TheCall->getArg(ArgIndex); + SourceLocation OrigLoc = Arg->getExprLoc(); + if (Arg->IgnoreCasts()->isModifiableLvalue(S->Context, &OrigLoc) == + Expr::MLV_Valid) + return false; + S->Diag(OrigLoc, diag::error_hlsl_inout_lvalue) << Arg << 0; + return true; } static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { @@ -1742,24 +1762,24 @@ static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { return VecTy->getElementType()->isDoubleType(); return false; }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkDoubleVector); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkDoubleVector); } static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasIntegerRepresentation() && !PassedType->hasFloatingRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.IntTy, - checkAllSignedTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.IntTy, + checkAllSignedTypes); } static bool CheckUnsignedIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllUnsignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasUnsignedIntegerRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, - checkAllUnsignedTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, + checkAllUnsignedTypes); } static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall, @@ -2074,6 +2094,22 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { + if (SemaRef.checkArgCount(TheCall, 3)) + return true; + + if (CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.DoubleTy, 0) || + CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy, + 1) || + CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy, + 2)) + return true; + + if (CheckModifiableLValue(&SemaRef, TheCall, 1) || + CheckModifiableLValue(&SemaRef, TheCall, 2)) + return true; + break; + } case Builtin::BI__builtin_elementwise_acos: case Builtin::BI__builtin_elementwise_asin: case Builtin::BI__builtin_elementwise_atan: diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2419ed84e68ac..8d8f9378cfeab 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5756,6 +5756,14 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, return Err; ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; + // If we don't know the top-level module, there's no point in doing qualified + // lookup of its submodules; it won't find anything anywhere within this tree. + // Let's skip that and avoid some string lookups. + auto CreateModule = !KnowsTopLevelModule + ? &ModuleMap::createModule + : &ModuleMap::findOrCreateModuleFirst; + bool First = true; Module *CurrentModule = nullptr; RecordData Record; @@ -5829,11 +5837,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, if (Parent) ParentModule = getSubmodule(Parent); - // Retrieve this (sub)module from the module map, creating it if - // necessary. - CurrentModule = - ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) - .first; + CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule, + IsFramework, IsExplicit); SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; if (GlobalIndex >= SubmodulesLoaded.size() || diff --git a/clang/test/ClangScanDeps/print-timing.c b/clang/test/ClangScanDeps/print-timing.c index f27df1ebf732a..fa2a433b95537 100644 --- a/clang/test/ClangScanDeps/print-timing.c +++ b/clang/test/ClangScanDeps/print-timing.c @@ -3,7 +3,8 @@ // RUN: clang-scan-deps -compilation-database %t/cdb.json -print-timing > %t/result.json 2>%t/errs // RUN: cat %t/errs | FileCheck %s -// CHECK: clang-scan-deps timing: {{[0-9]+}}.{{[0-9][0-9]}}s wall, {{[0-9]+}}.{{[0-9][0-9]}}s process +// CHECK: wall time [s] process time [s] instruction count +// CHECK-NEXT: {{[0-9]+}}.{{([0-9]{4})}} {{[0-9]+}}.{{([0-9]{4})}} {{[0-9]+}} //--- cdb.json [] diff --git a/clang/test/CodeGen/X86/avx-cmp-builtins.c b/clang/test/CodeGen/X86/avx-cmp-builtins.c index c4e3c7ccd5498..2e4a383a6b3fc 100644 --- a/clang/test/CodeGen/X86/avx-cmp-builtins.c +++ b/clang/test/CodeGen/X86/avx-cmp-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -9,62 +9,124 @@ // Test LLVM IR codegen of cmpXY instructions // +// CHECK-LABEL: define dso_local <2 x double> @test_cmp_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A]], <2 x double> [[B]], i8 13) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// __m128d test_cmp_sd(__m128d a, __m128d b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13) return _mm_cmp_sd(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmp_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A]], <4 x float> [[B]], i8 13) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// __m128 test_cmp_ss(__m128 a, __m128 b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13) return _mm_cmp_ss(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpgt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 1) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpgt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpgt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 2) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpge_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpngt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 5) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpngt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpngt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpnge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 6) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpnge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpnge_ss(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpgt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 1) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpgt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpgt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 2) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpge_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpngt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 5) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpngt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpngt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpnge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 6) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpnge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpnge_sd(a, b); } diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c index d184d28f3e07a..1c05fa436983e 100644 --- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c @@ -1,7 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 -// FIXME: This is testing optimized generation of shuffle instructions and should be fixed. +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -10,201 +10,341 @@ // Test LLVM IR codegen of shuffle instructions, checking if the masks are correct // +// CHECK-LABEL: define dso_local <8 x float> @x( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFP:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFP]] +// __m256 x(__m256 a, __m256 b) { - // CHECK-LABEL: x - // CHECK: shufflevector{{.*}} return _mm256_shuffle_ps(a, b, 203); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm_permute_pd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[PERMIL]] +// __m128d test_mm_permute_pd(__m128d a) { - // CHECK-LABEL: test_mm_permute_pd - // CHECK: shufflevector{{.*}} return _mm_permute_pd(a, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +// CHECK-NEXT: ret <4 x double> [[PERMIL]] +// __m256d test_mm256_permute_pd(__m256d a) { - // CHECK-LABEL: test_mm256_permute_pd - // CHECK: shufflevector{{.*}} return _mm256_permute_pd(a, 5); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0x1b); } -// Test case for PR12401 +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps2( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps2(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps2 - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0xe6); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +// CHECK-NEXT: ret <8 x float> [[PERMIL]] +// __m256 test_mm256_permute_ps(__m256 a) { - // CHECK-LABEL: test_mm256_permute_ps - // CHECK: shufflevector{{.*}} return _mm256_permute_ps(a, 0x1b); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute2f128_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <4 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[VPERM]] +// __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_permute2f128_pd - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_pd(a, b, 0x31); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute2f128_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[VPERM]] +// __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_permute2f128_ps - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_ps(a, b, 0x13); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_permute2f128_si256( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <4 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[VPERM]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { - // CHECK-LABEL: test_mm256_permute2f128_si256 - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_si256(a, b, 0x20); } -__m128 -test_mm_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm_broadcast_ss - // CHECK: insertelement <4 x float> {{.*}}, i64 0 - // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x float> @test_mm_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x float> [[VECINIT4_I]] +// +__m128 test_mm_broadcast_ss(float const *__a) { return _mm_broadcast_ss(__a); } -__m256d -test_mm256_broadcast_sd(double const *__a) { - // CHECK-LABEL: test_mm256_broadcast_sd - // CHECK: insertelement <4 x double> {{.*}}, i64 0 - // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_broadcast_sd( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x double> [[VECINIT_I]], double [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x double> [[VECINIT2_I]], double [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x double> [[VECINIT3_I]], double [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x double> [[VECINIT4_I]] +// +__m256d test_mm256_broadcast_sd(double const *__a) { return _mm256_broadcast_sd(__a); } -__m256 -test_mm256_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm256_broadcast_ss - // CHECK: insertelement <8 x float> {{.*}}, i64 0 - // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP0]], i32 4 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP0]], i32 5 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP0]], i32 6 +// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <8 x float> [[VECINIT7_I]], float [[TMP0]], i32 7 +// CHECK-NEXT: ret <8 x float> [[VECINIT8_I]] +// +__m256 test_mm256_broadcast_ss(float const *__a) { return _mm256_broadcast_ss(__a); } // Make sure we have the correct mask for each insertf128 case. +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_0 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 0); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_1 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 1); } // Make sure we have the correct mask for each extractf128 case. +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_0(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_0 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_0 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 0); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_0(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_0(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 0); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_1(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_1 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_1 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 1); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_1(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 1); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_1(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 1); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_set_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I]] +// __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_set_m128 - // CHECK: shufflevector{{.*}} return _mm256_set_m128(hi, lo); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_set_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I]] +// __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_set_m128d - // CHECK: shufflevector{{.*}} return _mm256_set_m128d(hi, lo); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_set_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I]] +// __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_set_m128i - // CHECK: shufflevector{{.*}} return _mm256_set_m128i(hi, lo); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_setr_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I_I]] +// __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_setr_m128 - // CHECK: shufflevector{{.*}} return _mm256_setr_m128(lo, hi); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_setr_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I_I]] +// __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_setr_m128d - // CHECK: shufflevector{{.*}} return _mm256_setr_m128d(lo, hi); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_setr_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I_I]] +// __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_setr_m128i - // CHECK: shufflevector{{.*}} return _mm256_setr_m128i(lo, hi); } diff --git a/clang/test/CodeGen/X86/sse.c b/clang/test/CodeGen/X86/sse.c index a75b8dc77e86e..017bdd7846fa3 100644 --- a/clang/test/CodeGen/X86/sse.c +++ b/clang/test/CodeGen/X86/sse.c @@ -1,42 +1,72 @@ -// RUN: %clang_cc1 -ffreestanding -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s -// FIXME: This test currently depends on optimization - it should be rewritten to avoid it. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -ffreestanding -triple x86_64-- -target-feature +sse4.1 -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s #include // Byte-shifts look reversed due to xmm register layout +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128 - // CHECK: shufflevector <16 x i8> <{{.*}}, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> {{.*}}, <16 x i32> return _mm_slli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_0 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_slli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_16 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 16); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128 - // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> , <16 x i32> return _mm_srli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_0 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_srli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_16 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 16); } diff --git a/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl new file mode 100644 index 0000000000000..a883c9d5cc355 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-library %s -fnative-half-type -emit-llvm -O0 -o - | FileCheck %s --check-prefix=SPIRV + + + +// CHECK: define {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]]) +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load double, ptr [[VALD]].addr, align 8 +// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[LOAD]] to <2 x i32> +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0 +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1 +uint test_scalar(double D) { + uint A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[TRUNC:%.*]] = extractelement <1 x double> %D, i64 0 +// CHECK-NEXT: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[TRUNC]]) +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <1 x double>, ptr [[VALD]].addr, align 8 +// SPIRV-NEXT: [[TRUNC:%.*]] = extractelement <1 x double> [[LOAD]], i64 0 +// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[TRUNC]] to <2 x i32> +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0 +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1 +uint1 test_double1(double1 D) { + uint A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <2 x double>, ptr [[VALD]].addr, align 16 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <2 x double> [[LOAD]] to <4 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> +uint2 test_vector2(double2 D) { + uint2 A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <3 x double>, ptr [[VALD]].addr, align 32 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <3 x double> [[LOAD]] to <6 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> +uint3 test_vector3(double3 D) { + uint3 A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <4 x double>, ptr [[VALD]].addr, align 32 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <4 x double> [[LOAD]] to <8 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> +uint4 test_vector4(double4 D) { + uint4 A, B; + asuint(D, A, B); + return A + B; +} diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c index 6cd95c71604d4..28ef3ec6ce09c 100644 --- a/clang/test/Sema/asm.c +++ b/clang/test/Sema/asm.c @@ -204,6 +204,12 @@ double f_output_constraint(void) { return result; } +double f_output_constraint_2(void) { + double result; + __asm("foo1": "+f" (result)); // expected-error {{invalid output constraint '+f' in asm}} + return result; +} + void fn1(void) { int l; __asm__("" diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp index c456740ef7551..5448365489a51 100644 --- a/clang/test/SemaCXX/constexpr-string.cpp +++ b/clang/test/SemaCXX/constexpr-string.cpp @@ -670,6 +670,8 @@ namespace MemcpyEtc { constexpr bool test_address_of_incomplete_struct_type() { // expected-error {{never produces a constant}} struct Incomplete; extern Incomplete x, y; + // expected-warning@+2 {{first argument in call to '__builtin_memcpy' is a pointer to non-trivially copyable type 'Incomplete'}} + // expected-note@+1 {{explicitly cast the pointer to silence this warning}} __builtin_memcpy(&x, &x, 4); // expected-note@-1 2{{cannot constant evaluate 'memcpy' between objects of incomplete type 'Incomplete'}} return true; diff --git a/clang/test/SemaCXX/warn-memaccess.cpp b/clang/test/SemaCXX/warn-memaccess.cpp new file mode 100644 index 0000000000000..b4b7f6a6905b2 --- /dev/null +++ b/clang/test/SemaCXX/warn-memaccess.cpp @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wnontrivial-memaccess %s + +extern "C" void *bzero(void *, unsigned); +extern "C" void *memset(void *, int, unsigned); +extern "C" void *memmove(void *s1, const void *s2, unsigned n); +extern "C" void *memcpy(void *s1, const void *s2, unsigned n); + +class TriviallyCopyable {}; +class NonTriviallyCopyable { NonTriviallyCopyable(const NonTriviallyCopyable&);}; + +void test_bzero(TriviallyCopyable* tc, + NonTriviallyCopyable *ntc) { + // OK + bzero(tc, sizeof(*tc)); + + // expected-warning@+2{{first argument in call to 'bzero' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + bzero(ntc, sizeof(*ntc)); + + // OK + bzero((void*)ntc, sizeof(*ntc)); +} + +void test_memset(TriviallyCopyable* tc, + NonTriviallyCopyable *ntc) { + // OK + memset(tc, 0, sizeof(*tc)); + + // expected-warning@+2{{first argument in call to 'memset' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memset(ntc, 0, sizeof(*ntc)); + + // OK + memset((void*)ntc, 0, sizeof(*ntc)); +} + + +void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + // OK + memcpy(tc0, tc1, sizeof(*tc0)); + + // expected-warning@+2{{first argument in call to 'memcpy' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memcpy(ntc0, ntc1, sizeof(*ntc0)); + + // ~ OK + memcpy((void*)ntc0, ntc1, sizeof(*ntc0)); + + // OK + memcpy((void*)ntc0, (void*)ntc1, sizeof(*ntc0)); +} + +void test_memmove(TriviallyCopyable* tc0, TriviallyCopyable* tc1, + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + // OK + memmove(tc0, tc1, sizeof(*tc0)); + + // expected-warning@+2{{first argument in call to 'memmove' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memmove(ntc0, ntc1, sizeof(*ntc0)); + + // ~ OK + memmove((void*)ntc0, ntc1, sizeof(*ntc0)); + + // OK + memmove((void*)ntc0, (void*)ntc1, sizeof(*ntc0)); +} diff --git a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl index 8c56fdddb1c24..4adb0555c35be 100644 --- a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl @@ -6,6 +6,10 @@ uint4 test_asuint_too_many_arg(float p0, float p1) { // expected-error@-1 {{no matching function for call to 'asuint'}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'V', but 2 arguments were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'F', but 2 arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} } uint test_asuint_double(double p1) { @@ -23,3 +27,29 @@ uint test_asuint_half(half p1) { // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: could not match 'vector' against 'half'}} // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: substitution failure [with U = uint, T = half]: no type named 'Type'}} } + +void test_asuint_first_arg_const(double D) { + const uint A = 0; + uint B; + asuint(D, A, B); + // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} +} + +void test_asuint_second_arg_const(double D) { + const uint A = 0; + uint B; + asuint(D, B, A); + // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} +} + +void test_asuint_imidiate_value(double D) { + uint B; + asuint(D, B, 1); + // expected-error@-1 {{cannot bind non-lvalue argument 1 to out paramemter}} +} + +void test_asuint_expr(double D) { + uint B; + asuint(D, B, B + 1); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl new file mode 100644 index 0000000000000..18d2b692b335b --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify + +void test_no_second_arg(double D) { + __builtin_hlsl_elementwise_splitdouble(D); + // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} +} + +void test_no_third_arg(double D) { + uint A; + __builtin_hlsl_elementwise_splitdouble(D, A); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} +} + +void test_too_many_arg(double D) { + uint A, B, C; + __builtin_hlsl_elementwise_splitdouble(D, A, B, C); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} + +void test_first_arg_type_mismatch(bool3 D) { + uint3 A, B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{invalid operand of type 'bool3' (aka 'vector') where 'double' or a vector of such type is required}} +} + +void test_second_arg_type_mismatch(double D) { + bool A; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} +} + +void test_third_arg_type_mismatch(double D) { + bool A; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, A); + // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} +} + +void test_const_second_arg(double D) { + const uint A = 1; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{cannot bind non-lvalue argument A to out paramemter}} +} + +void test_const_third_arg(double D) { + uint A; + const uint B = 1; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{cannot bind non-lvalue argument B to out paramemter}} +} + +void test_number_second_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, (uint)1, B); + // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} +} + +void test_number_third_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, (uint)1); + // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} +} + +void test_expr_second_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B+1, B); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} + +void test_expr_third_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, B+1); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 7d36cee7a22b3..f474b1346b1be 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -1080,10 +1080,15 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { << NumExistsCalls << " exists() calls\n" << NumIsLocalCalls << " isLocal() calls\n"; - if (PrintTiming) - llvm::errs() << llvm::format( - "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n", - T.getTotalTime().getWallTime(), T.getTotalTime().getProcessTime()); + if (PrintTiming) { + llvm::errs() << "wall time [s]\t" + << "process time [s]\t" + << "instruction count\n"; + const llvm::TimeRecord &R = T.getTotalTime(); + llvm::errs() << llvm::format("%0.4f", R.getWallTime()) << "\t" + << llvm::format("%0.4f", R.getProcessTime()) << "\t" + << llvm::format("%llu", R.getInstructionsExecuted()) << "\n"; + } if (RoundTripArgs) if (FD && FD->roundTripCommands(llvm::errs())) diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp index 5b64eaca0e10d..de16f6be8eedb 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -1282,28 +1282,35 @@ static raw_ostream &operator<<(raw_ostream &OS, class UncheckedOptionalAccessTest : public ::testing::TestWithParam { protected: - void ExpectDiagnosticsFor(std::string SourceCode) { - ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target")); + void ExpectDiagnosticsFor(std::string SourceCode, + bool IgnoreSmartPointerDereference = true) { + ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target"), + IgnoreSmartPointerDereference); } - void ExpectDiagnosticsForLambda(std::string SourceCode) { + void ExpectDiagnosticsForLambda(std::string SourceCode, + bool IgnoreSmartPointerDereference = true) { ExpectDiagnosticsFor( - SourceCode, ast_matchers::hasDeclContext( - ast_matchers::cxxRecordDecl(ast_matchers::isLambda()))); + SourceCode, + ast_matchers::hasDeclContext( + ast_matchers::cxxRecordDecl(ast_matchers::isLambda())), + IgnoreSmartPointerDereference); } template - void ExpectDiagnosticsFor(std::string SourceCode, - FuncDeclMatcher FuncMatcher) { + void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher, + bool IgnoreSmartPointerDereference = true) { // Run in C++17 and C++20 mode to cover differences in the AST between modes // (e.g. C++20 can contain `CXXRewrittenBinaryOperator`). for (const char *CxxMode : {"-std=c++17", "-std=c++20"}) - ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode); + ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode, + IgnoreSmartPointerDereference); } template void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher, - const char *CxxMode) { + const char *CxxMode, + bool IgnoreSmartPointerDereference) { ReplaceAllOccurrences(SourceCode, "$ns", GetParam().NamespaceName); ReplaceAllOccurrences(SourceCode, "$optional", GetParam().TypeName); @@ -1328,8 +1335,7 @@ class UncheckedOptionalAccessTest template T Make(); )"); - UncheckedOptionalAccessModelOptions Options{ - /*IgnoreSmartPointerDereference=*/true}; + UncheckedOptionalAccessModelOptions Options{IgnoreSmartPointerDereference}; std::vector Diagnostics; llvm::Error Error = checkDataflow( AnalysisInputs( @@ -3721,6 +3727,50 @@ TEST_P(UncheckedOptionalAccessTest, ConstByValueAccessorWithModInBetween) { )cc"); } +TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessor) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + $ns::$optional x; + }; + + struct MyUniquePtr { + A* operator->() const; + }; + + void target(MyUniquePtr p) { + if (p->x) { + *p->x; + } + } + )cc", + /*IgnoreSmartPointerDereference=*/false); +} + +TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessorWithModInBetween) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + $ns::$optional x; + }; + + struct MyUniquePtr { + A* operator->() const; + void reset(A*); + }; + + void target(MyUniquePtr p) { + if (p->x) { + p.reset(nullptr); + *p->x; // [[unsafe]] + } + } + )cc", + /*IgnoreSmartPointerDereference=*/false); +} + TEST_P(UncheckedOptionalAccessTest, ConstBoolAccessor) { ExpectDiagnosticsFor(R"cc( #include "unchecked_optional_access_test.h" diff --git a/compiler-rt/include/fuzzer/FuzzedDataProvider.h b/compiler-rt/include/fuzzer/FuzzedDataProvider.h index 5903ed837917c..e57b95b6304a9 100644 --- a/compiler-rt/include/fuzzer/FuzzedDataProvider.h +++ b/compiler-rt/include/fuzzer/FuzzedDataProvider.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index 9fce67e61ed30..62f8d39a8abaa 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -473,8 +473,8 @@ struct NodeVisitor { READ_FEATURE(OmpDependClause::InOut) READ_FEATURE(OmpDependClause::Sink) READ_FEATURE(OmpDependClause::Source) - READ_FEATURE(OmpDependenceType) - READ_FEATURE(OmpDependenceType::Type) + READ_FEATURE(OmpTaskDependenceType) + READ_FEATURE(OmpTaskDependenceType::Type) READ_FEATURE(OmpDependSinkVec) READ_FEATURE(OmpDependSinkVecLength) READ_FEATURE(OmpEndAllocators) diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp index 5d3c5cd72eef0..d28ed0534d600 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -222,9 +222,9 @@ void OpenMPCounterVisitor::Post(const OmpLinearModifier::Type &c) { clauseDetails += "modifier=" + std::string{OmpLinearModifier::EnumToString(c)} + ";"; } -void OpenMPCounterVisitor::Post(const OmpDependenceType::Type &c) { +void OpenMPCounterVisitor::Post(const OmpTaskDependenceType::Type &c) { clauseDetails += - "type=" + std::string{OmpDependenceType::EnumToString(c)} + ";"; + "type=" + std::string{OmpTaskDependenceType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpMapClause::Type &c) { clauseDetails += "type=" + std::string{OmpMapClause::EnumToString(c)} + ";"; diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h index 380534ebbfd70..68c52db46e2f0 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h @@ -73,7 +73,7 @@ struct OpenMPCounterVisitor { void Post(const OmpDeviceTypeClause::Type &c); void Post(const OmpScheduleModifierType::ModType &c); void Post(const OmpLinearModifier::Type &c); - void Post(const OmpDependenceType::Type &c); + void Post(const OmpTaskDependenceType::Type &c); void Post(const OmpMapClause::Type &c); void Post(const OmpScheduleClause::ScheduleType &c); void Post(const OmpIfClause::DirectiveNameModifier &c); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index ccdfe980f6f38..31ad1b7c6ce5b 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -513,8 +513,8 @@ class ParseTreeDumper { NODE(OmpDependClause, InOut) NODE(OmpDependClause, Sink) NODE(OmpDependClause, Source) - NODE(parser, OmpDependenceType) - NODE_ENUM(OmpDependenceType, Type) + NODE(parser, OmpTaskDependenceType) + NODE_ENUM(OmpTaskDependenceType, Type) NODE(parser, OmpDependSinkVec) NODE(parser, OmpDependSinkVecLength) NODE(parser, OmpEndAllocators) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 2a312e29a3a44..506a470c5557b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3439,6 +3439,18 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +// Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] +// +// task-dependence-type -> // "dependence-type" in 5.1 and before +// IN | OUT | INOUT | // since 4.5 +// SOURCE | SINK | // since 4.5, until 5.1 +// MUTEXINOUTSET | DEPOBJ | // since 5.0 +// INOUTSET // since 5.2 +struct OmpTaskDependenceType { + ENUM_CLASS(Type, In, Out, Inout, Source, Sink) + WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Type); +}; + // [5.0] 2.1.6 iterator-specifier -> type-declaration-stmt = subscript-triple // iterator-modifier -> iterator-specifier-list struct OmpIteratorSpecifier { @@ -3534,27 +3546,27 @@ struct OmpDependSinkVecLength { std::tuple t; }; -// 2.13.9 depend-vec -> iterator [+/- depend-vec-length],...,iterator[...] +// 2.13.9 depend-vec -> induction-variable [depend-vec-length], ... struct OmpDependSinkVec { TUPLE_CLASS_BOILERPLATE(OmpDependSinkVec); std::tuple> t; }; -// 2.13.9 depend-type -> IN | OUT | INOUT | SOURCE | SINK -struct OmpDependenceType { - ENUM_CLASS(Type, In, Out, Inout, Source, Sink) - WRAPPER_CLASS_BOILERPLATE(OmpDependenceType, Type); -}; - -// 2.13.9 depend-clause -> DEPEND (((IN | OUT | INOUT) : variable-name-list) | -// SOURCE | SINK : depend-vec) +// Ref: [4.5:169-170], [5.0:255-256], [5.1:288-289], [5.2:323-324] +// +// depend-clause -> +// DEPEND(SOURCE) | // since 4.5, until 5.1 +// DEPEND(SINK: depend-vec) | // since 4.5, until 5.1 +// DEPEND([depend-modifier,]dependence-type: locator-list) // since 4.5 +// +// depend-modifier -> iterator-modifier // since 5.0 struct OmpDependClause { UNION_CLASS_BOILERPLATE(OmpDependClause); EMPTY_CLASS(Source); WRAPPER_CLASS(Sink, std::list); struct InOut { TUPLE_CLASS_BOILERPLATE(InOut); - std::tuple> t; + std::tuple t; }; std::variant u; }; diff --git a/flang/include/flang/Runtime/CUDA/registration.h b/flang/include/flang/Runtime/CUDA/registration.h index 009715613e29f..5237069a4c739 100644 --- a/flang/include/flang/Runtime/CUDA/registration.h +++ b/flang/include/flang/Runtime/CUDA/registration.h @@ -11,6 +11,7 @@ #include "flang/Runtime/entry-names.h" #include +#include namespace Fortran::runtime::cuda { @@ -23,6 +24,10 @@ void *RTDECL(CUFRegisterModule)(void *data); void RTDECL(CUFRegisterFunction)( void **module, const char *fctSym, char *fctName); +/// Register a device variable. +void RTDECL(CUFRegisterVariable)( + void **module, char *varSym, const char *varName, int64_t size); + } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index fbc031f3a93d7..8fb0dd4a1ec3a 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -798,11 +798,11 @@ bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const { return findRepeatableClause( [&](const omp::clause::Depend &clause, const parser::CharBlock &) { using Depend = omp::clause::Depend; - assert(std::holds_alternative(clause.u) && - "Only the modern form is handled at the moment"); - auto &modern = std::get(clause.u); - auto kind = std::get(modern.t); - auto &objects = std::get(modern.t); + assert(std::holds_alternative(clause.u) && + "Only the form with dependence type is handled at the moment"); + auto &depType = std::get(clause.u); + auto kind = std::get(depType.t); + auto &objects = std::get(depType.t); mlir::omp::ClauseTaskDependAttr dependTypeOperand = genDependKindAttr(firOpBuilder, kind); diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 3bd89b5432886..b1fa52751fbd7 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -555,7 +555,7 @@ Depend make(const parser::OmpClause::Depend &inp, using Iteration = Doacross::Vector::value_type; // LoopIterationT CLAUSET_ENUM_CONVERT( // - convert1, parser::OmpDependenceType::Type, Depend::TaskDependenceType, + convert1, parser::OmpTaskDependenceType::Type, Depend::TaskDependenceType, // clang-format off MS(In, In) MS(Out, Out) @@ -593,17 +593,13 @@ Depend make(const parser::OmpClause::Depend &inp, return Doacross{{/*DependenceType=*/Doacross::DependenceType::Sink, /*Vector=*/makeList(s.v, convert2)}}; }, - // Depend::WithLocators + // Depend::DepType [&](const wrapped::InOut &s) -> Variant { - auto &t0 = std::get(s.t); - auto &t1 = std::get>(s.t); - auto convert4 = [&](const parser::Designator &t) { - return makeObject(t, semaCtx); - }; - return Depend::WithLocators{ - {/*TaskDependenceType=*/convert1(t0.v), - /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(t1, convert4)}}; + auto &t0 = std::get(s.t); + auto &t1 = std::get(s.t); + return Depend::DepType{{/*TaskDependenceType=*/convert1(t0.v), + /*Iterator=*/std::nullopt, + /*LocatorList=*/makeObjects(t1, semaCtx)}}; }, }, inp.v.u)}; diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index a4761f24f16d7..dc39be8574f84 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -11,6 +11,7 @@ #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/allocatable.h" #include "mlir/IR/SymbolTable.h" @@ -58,6 +59,32 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase { prepareImplicitDeviceGlobals(funcOp, symTable); return mlir::WalkResult::advance(); }); + + // Copying the device global variable into the gpu module + mlir::SymbolTable parentSymTable(mod); + auto gpuMod = + parentSymTable.lookup(cudaDeviceModuleName); + if (gpuMod) { + mlir::SymbolTable gpuSymTable(gpuMod); + for (auto globalOp : mod.getOps()) { + auto attr = globalOp.getDataAttrAttr(); + if (!attr) + continue; + switch (attr.getValue()) { + case cuf::DataAttribute::Device: + case cuf::DataAttribute::Constant: + case cuf::DataAttribute::Managed: { + auto globalName{globalOp.getSymbol().getValue()}; + if (gpuSymTable.lookup(globalName)) { + break; + } + gpuSymTable.insert(globalOp->clone()); + } break; + default: + break; + } + } + } } }; } // namespace diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index ae0c351fed56d..3ca4e93a6c9b9 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -365,10 +365,10 @@ TYPE_PARSER(construct( TYPE_PARSER( construct(name, maybe(Parser{}))) -TYPE_PARSER( - construct("IN"_id >> pure(OmpDependenceType::Type::In) || - "INOUT" >> pure(OmpDependenceType::Type::Inout) || - "OUT" >> pure(OmpDependenceType::Type::Out))) +TYPE_PARSER(construct( + "IN"_id >> pure(OmpTaskDependenceType::Type::In) || + "INOUT" >> pure(OmpTaskDependenceType::Type::Inout) || + "OUT" >> pure(OmpTaskDependenceType::Type::Out))) TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct(construct( @@ -376,7 +376,7 @@ TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct( construct("SOURCE"_tok)) || construct(construct( - Parser{}, ":" >> nonemptyList(designator)))) + Parser{}, ":" >> Parser{}))) // 2.15.3.7 LINEAR (linear-list: linear-step) // linear-list -> list | modifier(list) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ba4155469073e..39fcb61609e33 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2216,9 +2216,9 @@ class UnparseVisitor { } void Unparse(const OmpDependClause::InOut &x) { Put("("); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put(":"); - Walk(std::get>(x.t), ","); + Walk(std::get(x.t)); Put(")"); } bool Pre(const OmpDependClause &x) { @@ -2829,7 +2829,7 @@ class UnparseVisitor { OmpLastprivateClause, LastprivateModifier) // OMP lastprivate-modifier WALK_NESTED_ENUM(OmpScheduleModifierType, ModType) // OMP schedule-modifier WALK_NESTED_ENUM(OmpLinearModifier, Type) // OMP linear-modifier - WALK_NESTED_ENUM(OmpDependenceType, Type) // OMP dependence-type + WALK_NESTED_ENUM(OmpTaskDependenceType, Type) // OMP task-dependence-type WALK_NESTED_ENUM(OmpScheduleClause, ScheduleType) // OMP schedule-type WALK_NESTED_ENUM(OmpDeviceClause, DeviceModifier) // OMP device modifier WALK_NESTED_ENUM(OmpDeviceTypeClause, Type) // OMP DEVICE_TYPE diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 455322d610d6c..599cc61a83bf0 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3288,15 +3288,21 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { parser::ToUpperCaseLetters(getDirectiveName(GetContext().directive))); } if (const auto *inOut{std::get_if(&x.v.u)}) { - const auto &designators{std::get>(inOut->t)}; - for (const auto &ele : designators) { - if (const auto *dataRef{std::get_if(&ele.u)}) { - CheckDependList(*dataRef); - if (const auto *arr{ - std::get_if>( - &dataRef->u)}) { - CheckArraySection(arr->value(), GetLastName(*dataRef), - llvm::omp::Clause::OMPC_depend); + for (const auto &object : std::get(inOut->t).v) { + if (const auto *name{std::get_if(&object.u)}) { + context_.Say(GetContext().clauseSource, + "Common block name ('%s') cannot appear in a DEPEND " + "clause"_err_en_US, + name->ToString()); + } else if (auto *designator{std::get_if(&object.u)}) { + if (auto *dataRef{std::get_if(&designator->u)}) { + CheckDependList(*dataRef); + if (const auto *arr{ + std::get_if>( + &dataRef->u)}) { + CheckArraySection(arr->value(), GetLastName(*dataRef), + llvm::omp::Clause::OMPC_depend); + } } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 979570a7d4103..014b7987a658b 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -435,6 +435,20 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { bool Pre(const parser::OpenMPAllocatorsConstruct &); void Post(const parser::OpenMPAllocatorsConstruct &); + void Post(const parser::OmpObjectList &x) { + // The objects from OMP clauses should have already been resolved, + // except common blocks (the ResolveNamesVisitor does not visit + // parser::Name, those are dealt with as members of other structures). + // Iterate over elements of x, and resolve any common blocks that + // are still unresolved. + for (const parser::OmpObject &obj : x.v) { + auto *name{std::get_if(&obj.u)}; + if (name && !name->symbol) { + Resolve(*name, currScope().MakeCommonBlock(name->source)); + } + } + } + // 2.15.3 Data-Sharing Attribute Clauses void Post(const parser::OmpDefaultClause &); bool Pre(const parser::OmpClause::Shared &x) { @@ -531,16 +545,9 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { return false; } - bool Pre(const parser::OmpDependClause &x) { - if (const auto *dependSink{ - std::get_if(&x.u)}) { - const auto &dependSinkVec{dependSink->v}; - for (const auto &dependSinkElement : dependSinkVec) { - const auto &name{std::get(dependSinkElement.t)}; - ResolveName(&name); - } - } - return false; + void Post(const parser::OmpDependSinkVec &x) { + const auto &name{std::get(x.t)}; + ResolveName(&name); } bool Pre(const parser::OmpClause::UseDevicePtr &x) { diff --git a/flang/runtime/CUDA/registration.cpp b/flang/runtime/CUDA/registration.cpp index 20d274c4d8d1c..b7b6ef389bffb 100644 --- a/flang/runtime/CUDA/registration.cpp +++ b/flang/runtime/CUDA/registration.cpp @@ -21,6 +21,9 @@ extern void __cudaRegisterFatBinaryEnd(void *); extern void __cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize); +extern void __cudaRegisterVar(void **fatCubinHandle, char *hostVar, + const char *deviceAddress, const char *deviceName, int ext, size_t size, + int constant, int global); void *RTDECL(CUFRegisterModule)(void *data) { void **fatHandle{__cudaRegisterFatBinary(data)}; @@ -34,6 +37,11 @@ void RTDEF(CUFRegisterFunction)( (uint3 *)0, (dim3 *)0, (dim3 *)0, (int *)0); } +void RTDEF(CUFRegisterVariable)( + void **module, char *varSym, const char *varName, int64_t size) { + __cudaRegisterVar(module, varSym, varName, varName, 0, size, 0, 0); +} + } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90 new file mode 100644 index 0000000000000..c83a938d5af21 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-device-global.f90 @@ -0,0 +1,13 @@ + +// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s + + +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} { + fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> + + gpu.module @cuda_device_mod [#nvvm.target] { + } +} + +// CHECK: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> diff --git a/flang/test/Semantics/OpenMP/depend04.f90 b/flang/test/Semantics/OpenMP/depend04.f90 new file mode 100644 index 0000000000000..8bdddb017d2c9 --- /dev/null +++ b/flang/test/Semantics/OpenMP/depend04.f90 @@ -0,0 +1,10 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 + +subroutine f00 + integer :: x + common /cc/ x +!ERROR: Common block name ('cc') cannot appear in a DEPEND clause + !$omp task depend(in: /cc/) + x = 0 + !$omp end task +end diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 13dc892978bb8..80545ee4b359f 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -51,10 +51,13 @@ add_proxy_header_library( libc.include.llvm-libc-macros.generic_error_number_macros ) +add_header_library(fcntl_overlay HDRS fcntl_overlay.h) add_proxy_header_library( fcntl_macros HDRS fcntl_macros.h + DEPENDS + .fcntl_overlay FULL_BUILD_DEPENDS libc.include.llvm-libc-macros.fcntl_macros libc.include.fcntl diff --git a/libc/hdr/fcntl_macros.h b/libc/hdr/fcntl_macros.h index 828cb984c0cb1..3a1ddeb0a2da1 100644 --- a/libc/hdr/fcntl_macros.h +++ b/libc/hdr/fcntl_macros.h @@ -15,7 +15,7 @@ #else // Overlay mode -#include +#include "hdr/fcntl_overlay.h" #endif // LLVM_LIBC_FULL_BUILD diff --git a/libc/hdr/fcntl_overlay.h b/libc/hdr/fcntl_overlay.h new file mode 100644 index 0000000000000..c1cc98b0ebb2c --- /dev/null +++ b/libc/hdr/fcntl_overlay.h @@ -0,0 +1,37 @@ +//===-- Including fcntl.h in overlay mode ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_FCNTL_OVERLAY_H +#define LLVM_LIBC_HDR_FCNTL_OVERLAY_H + +#ifdef LIBC_FULL_BUILD +#error "This header should only be included in overlay mode" +#endif + +// Overlay mode + +// glibc header might provide extern inline definitions for few +// functions, causing external alias errors. They are guarded by +// `__USE_FORTIFY_LEVEL`, which will be temporarily disabled +// with `_FORTIFY_SOURCE`. + +#ifdef __USE_FORTIFY_LEVEL +#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL 0 +#endif + +#include + +#ifdef LIBC_OLD_USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL +#undef LIBC_OLD_USE_FORTIFY_LEVEL +#endif + +#endif // LLVM_LIBC_HDR_FCNTL_OVERLAY_H diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index fab5245816bbe..e45979857d795 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -46,6 +46,17 @@ add_proxy_header_library( libc.include.llvm-libc-types.struct_timespec ) +add_proxy_header_library( + mode_t + HDRS + mode_t.h + DEPENDS + ../fcntl_overlay + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.mode_t + libc.include.fcntl +) + add_proxy_header_library( fenv_t HDRS diff --git a/libc/hdr/types/mode_t.h b/libc/hdr/types/mode_t.h new file mode 100644 index 0000000000000..abbbdb0a09d7b --- /dev/null +++ b/libc/hdr/types/mode_t.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from mode_t.h --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_MODE_T_H +#define LLVM_LIBC_HDR_MODE_T_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/mode_t.h" + +#else // Overlay mode + +#include "hdr/fcntl_overlay.h" + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_MODE_T_H diff --git a/libc/src/__support/File/linux/CMakeLists.txt b/libc/src/__support/File/linux/CMakeLists.txt index 5abbf11b3671c..84e3d5608361e 100644 --- a/libc/src/__support/File/linux/CMakeLists.txt +++ b/libc/src/__support/File/linux/CMakeLists.txt @@ -7,7 +7,7 @@ add_object_library( file.h lseekImpl.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.include.sys_stat libc.src.__support.CPP.new @@ -55,7 +55,7 @@ add_object_library( SRCS dir.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.__support.error_or diff --git a/libc/src/__support/File/linux/dir.cpp b/libc/src/__support/File/linux/dir.cpp index fc90ff097e460..5fe44fa8297b6 100644 --- a/libc/src/__support/File/linux/dir.cpp +++ b/libc/src/__support/File/linux/dir.cpp @@ -12,7 +12,7 @@ #include "src/__support/error_or.h" #include "src/__support/macros/config.h" -#include // For open flags +#include "hdr/fcntl_macros.h" // For open flags #include // For syscall numbers namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp index 22292336f300e..824c1f200e8c5 100644 --- a/libc/src/__support/File/linux/file.cpp +++ b/libc/src/__support/File/linux/file.cpp @@ -18,7 +18,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" // For error macros -#include // For mode_t and other flags to the open syscall +#include "hdr/fcntl_macros.h" // For mode_t and other flags to the open syscall #include // For S_IS*, S_IF*, and S_IR* flags. #include // For syscall numbers diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index b6796f40adce7..fa11458f99b6c 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -79,7 +79,7 @@ add_object_library( .futex_utils libc.config.app_h libc.include.sys_syscall - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.__support.CPP.atomic libc.src.__support.CPP.stringstream diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index ee3f63fa3cde3..c531d74c53355 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -22,7 +22,7 @@ #include #endif -#include +#include "hdr/fcntl_macros.h" #include // For EXEC_PAGESIZE. #include // For PR_SET_NAME #include // For CLONE_* flags. diff --git a/libc/src/fcntl/creat.h b/libc/src/fcntl/creat.h index e180e17c25788..3e00427638a36 100644 --- a/libc/src/fcntl/creat.h +++ b/libc/src/fcntl/creat.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_CREAT_H #define LLVM_LIBC_SRC_FCNTL_CREAT_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/linux/CMakeLists.txt b/libc/src/fcntl/linux/CMakeLists.txt index ee8ae63b8cf06..580db16cd4132 100644 --- a/libc/src/fcntl/linux/CMakeLists.txt +++ b/libc/src/fcntl/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../creat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -17,7 +17,7 @@ add_entrypoint_object( HDRS ../fcntl.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil ) @@ -28,7 +28,8 @@ add_entrypoint_object( HDRS ../open.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -40,7 +41,7 @@ add_entrypoint_object( HDRS ../openat.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/fcntl/linux/creat.cpp b/libc/src/fcntl/linux/creat.cpp index 2c5b5d736a3be..23abae243aed9 100644 --- a/libc/src/fcntl/linux/creat.cpp +++ b/libc/src/fcntl/linux/creat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/linux/open.cpp b/libc/src/fcntl/linux/open.cpp index 79b7b2b32c887..8b699ecdd2043 100644 --- a/libc/src/fcntl/linux/open.cpp +++ b/libc/src/fcntl/linux/open.cpp @@ -13,7 +13,8 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" +#include "hdr/types/mode_t.h" #include #include // For syscall numbers. diff --git a/libc/src/fcntl/linux/openat.cpp b/libc/src/fcntl/linux/openat.cpp index 0862082c22ebf..6063d9c00ad6c 100644 --- a/libc/src/fcntl/linux/openat.cpp +++ b/libc/src/fcntl/linux/openat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/types/mode_t.h" #include #include // For syscall numbers. diff --git a/libc/src/fcntl/open.h b/libc/src/fcntl/open.h index 19bb53c2e3203..11f0ae5379531 100644 --- a/libc/src/fcntl/open.h +++ b/libc/src/fcntl/open.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_OPEN_H #define LLVM_LIBC_SRC_FCNTL_OPEN_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/openat.h b/libc/src/fcntl/openat.h index d09791a84f735..051c8a2304dcb 100644 --- a/libc/src/fcntl/openat.h +++ b/libc/src/fcntl/openat.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_OPENAT_H #define LLVM_LIBC_SRC_FCNTL_OPENAT_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/math/cbrt.h b/libc/src/math/cbrt.h index a7d5fe80e57b3..8cf7d9b221df3 100644 --- a/libc/src/math/cbrt.h +++ b/libc/src/math/cbrt.h @@ -9,10 +9,12 @@ #ifndef LLVM_LIBC_SRC_MATH_CBRT_H #define LLVM_LIBC_SRC_MATH_CBRT_H -namespace LIBC_NAMESPACE { +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { double cbrt(double x); -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_CBRT_H diff --git a/libc/src/spawn/linux/CMakeLists.txt b/libc/src/spawn/linux/CMakeLists.txt index 9ef3a9d18b0c6..26148fe1c76db 100644 --- a/libc/src/spawn/linux/CMakeLists.txt +++ b/libc/src/spawn/linux/CMakeLists.txt @@ -5,7 +5,8 @@ add_entrypoint_object( HDRS ../posix_spawn.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.spawn libc.include.sys_syscall libc.include.signal diff --git a/libc/src/spawn/linux/posix_spawn.cpp b/libc/src/spawn/linux/posix_spawn.cpp index 4c0469b3ce384..fe82ba260148a 100644 --- a/libc/src/spawn/linux/posix_spawn.cpp +++ b/libc/src/spawn/linux/posix_spawn.cpp @@ -14,7 +14,8 @@ #include "src/__support/macros/config.h" #include "src/spawn/file_actions.h" -#include +#include "hdr/fcntl_macros.h" +#include "hdr/types/mode_t.h" #include // For SIGCHLD #include #include // For syscall numbers. diff --git a/libc/src/stdio/gpu/fprintf.cpp b/libc/src/stdio/gpu/fprintf.cpp index 6222589cc4bab..46196d7d2b10f 100644 --- a/libc/src/stdio/gpu/fprintf.cpp +++ b/libc/src/stdio/gpu/fprintf.cpp @@ -16,7 +16,7 @@ #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, fprintf, (::FILE *__restrict stream, const char *__restrict format, @@ -29,4 +29,4 @@ LLVM_LIBC_FUNCTION(int, fprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/printf.cpp b/libc/src/stdio/gpu/printf.cpp index d9903193ef165..be1885fd6801d 100644 --- a/libc/src/stdio/gpu/printf.cpp +++ b/libc/src/stdio/gpu/printf.cpp @@ -15,7 +15,7 @@ #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { va_list vlist; @@ -26,4 +26,4 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vfprintf.cpp b/libc/src/stdio/gpu/vfprintf.cpp index 961cfa48579e0..c92685f48c728 100644 --- a/libc/src/stdio/gpu/vfprintf.cpp +++ b/libc/src/stdio/gpu/vfprintf.cpp @@ -14,7 +14,7 @@ #include "src/errno/libc_errno.h" #include "src/stdio/gpu/vfprintf_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, vfprintf, (::FILE *__restrict stream, const char *__restrict format, @@ -24,4 +24,4 @@ LLVM_LIBC_FUNCTION(int, vfprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h index 93ce1649869fc..5010ee16d9607 100644 --- a/libc/src/stdio/gpu/vfprintf_utils.h +++ b/libc/src/stdio/gpu/vfprintf_utils.h @@ -9,10 +9,11 @@ #include "hdr/types/FILE.h" #include "src/__support/RPC/rpc_client.h" #include "src/__support/arg_list.h" +#include "src/__support/macros/config.h" #include "src/stdio/gpu/file.h" #include "src/string/string_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { template LIBC_INLINE int vfprintf_impl(::FILE *__restrict file, @@ -82,4 +83,4 @@ LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, #endif } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vprintf.cpp b/libc/src/stdio/gpu/vprintf.cpp index 2bb74d7f017b5..54012f3071844 100644 --- a/libc/src/stdio/gpu/vprintf.cpp +++ b/libc/src/stdio/gpu/vprintf.cpp @@ -13,7 +13,7 @@ #include "src/errno/libc_errno.h" #include "src/stdio/gpu/vfprintf_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, vprintf, (const char *__restrict format, va_list vlist)) { @@ -22,4 +22,4 @@ LLVM_LIBC_FUNCTION(int, vprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/linux/CMakeLists.txt b/libc/src/stdio/linux/CMakeLists.txt index d6241e1ca0439..1b2fcb33ce54d 100644 --- a/libc/src/stdio/linux/CMakeLists.txt +++ b/libc/src/stdio/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../remove.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -22,6 +22,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno + libc.hdr.fcntl_macros ) add_entrypoint_object( diff --git a/libc/src/stdio/linux/remove.cpp b/libc/src/stdio/linux/remove.cpp index 9e299aaf43e45..dbb4491d0e6cc 100644 --- a/libc/src/stdio/linux/remove.cpp +++ b/libc/src/stdio/linux/remove.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" // For AT_* macros. #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include // For AT_* macros. #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/stdio/linux/rename.cpp b/libc/src/stdio/linux/rename.cpp index 69fd22720ed19..fbcb29be48f4e 100644 --- a/libc/src/stdio/linux/rename.cpp +++ b/libc/src/stdio/linux/rename.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/stdio/rename.h" -#include "include/llvm-libc-macros/linux/fcntl-macros.h" +#include "hdr/fcntl_macros.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/__support/macros/config.h" diff --git a/libc/src/stdio/vsscanf.h b/libc/src/stdio/vsscanf.h index 992c44d3d95b9..c57b1743e477e 100644 --- a/libc/src/stdio/vsscanf.h +++ b/libc/src/stdio/vsscanf.h @@ -9,12 +9,14 @@ #ifndef LLVM_LIBC_SRC_STDIO_VSSCANF_H #define LLVM_LIBC_SRC_STDIO_VSSCANF_H +#include "src/__support/macros/config.h" + #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { int vsscanf(const char *s, const char *format, va_list vlist); -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STDIO_VSSCANF_H diff --git a/libc/src/sys/mman/linux/CMakeLists.txt b/libc/src/sys/mman/linux/CMakeLists.txt index 11188254cfbd4..47c16f79bc8d5 100644 --- a/libc/src/sys/mman/linux/CMakeLists.txt +++ b/libc/src/sys/mman/linux/CMakeLists.txt @@ -187,8 +187,7 @@ add_entrypoint_object( ../shm_open.h DEPENDS libc.src.fcntl.open - libc.include.llvm-libc-macros.fcntl_macros - libc.include.llvm-libc-types.mode_t + libc.hdr.types.mode_t .shm_common ) diff --git a/libc/src/sys/mman/linux/shm_open.cpp b/libc/src/sys/mman/linux/shm_open.cpp index d235e57aefdeb..11de482272d00 100644 --- a/libc/src/sys/mman/linux/shm_open.cpp +++ b/libc/src/sys/mman/linux/shm_open.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/sys/mman/shm_open.h" -#include "llvm-libc-macros/fcntl-macros.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" #include "src/sys/mman/linux/shm_common.h" diff --git a/libc/src/sys/mman/shm_open.h b/libc/src/sys/mman/shm_open.h index c890304aa4acf..1872dd30cb6f5 100644 --- a/libc/src/sys/mman/shm_open.h +++ b/libc/src/sys/mman/shm_open.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H #define LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/CMakeLists.txt b/libc/src/sys/stat/linux/CMakeLists.txt index 415d2fa5c8771..9aeb14636c2c1 100644 --- a/libc/src/sys/stat/linux/CMakeLists.txt +++ b/libc/src/sys/stat/linux/CMakeLists.txt @@ -5,7 +5,8 @@ add_entrypoint_object( HDRS ../chmod.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -19,6 +20,7 @@ add_entrypoint_object( HDRS ../fchmod.h DEPENDS + libc.hdr.types.mode_t libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -45,7 +47,8 @@ add_entrypoint_object( HDRS ../mkdir.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -84,7 +87,7 @@ add_entrypoint_object( ../stat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) @@ -97,7 +100,7 @@ add_entrypoint_object( ../lstat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) @@ -110,7 +113,7 @@ add_entrypoint_object( ../fstat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) diff --git a/libc/src/sys/stat/linux/chmod.cpp b/libc/src/sys/stat/linux/chmod.cpp index c91cabb514a8c..57d5bae6b8191 100644 --- a/libc/src/sys/stat/linux/chmod.cpp +++ b/libc/src/sys/stat/linux/chmod.cpp @@ -11,9 +11,10 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/fchmod.cpp b/libc/src/sys/stat/linux/fchmod.cpp index 7b6c7b7091a82..0d6fd359169aa 100644 --- a/libc/src/sys/stat/linux/fchmod.cpp +++ b/libc/src/sys/stat/linux/fchmod.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/fstat.cpp b/libc/src/sys/stat/linux/fstat.cpp index 411aa47bcda2a..35cf8f08f782d 100644 --- a/libc/src/sys/stat/linux/fstat.cpp +++ b/libc/src/sys/stat/linux/fstat.cpp @@ -13,7 +13,7 @@ #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/lstat.cpp b/libc/src/sys/stat/linux/lstat.cpp index 5a6eff068d1dd..354c5b6e029a4 100644 --- a/libc/src/sys/stat/linux/lstat.cpp +++ b/libc/src/sys/stat/linux/lstat.cpp @@ -14,7 +14,7 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/mkdir.cpp b/libc/src/sys/stat/linux/mkdir.cpp index 527c3d2058d2b..b319b5c8393de 100644 --- a/libc/src/sys/stat/linux/mkdir.cpp +++ b/libc/src/sys/stat/linux/mkdir.cpp @@ -11,9 +11,10 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/stat.cpp b/libc/src/sys/stat/linux/stat.cpp index c5149e6e3c883..de9cdb197d687 100644 --- a/libc/src/sys/stat/linux/stat.cpp +++ b/libc/src/sys/stat/linux/stat.cpp @@ -13,7 +13,7 @@ #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 9b0d752cefbd8..472438ca72e49 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -5,6 +5,7 @@ add_entrypoint_object( HDRS ../access.h DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -57,7 +58,7 @@ add_entrypoint_object( HDRS ../dup2.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -254,7 +255,7 @@ add_entrypoint_object( HDRS ../link.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -268,7 +269,7 @@ add_entrypoint_object( HDRS ../linkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -377,7 +378,7 @@ add_entrypoint_object( HDRS ../rmdir.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -391,7 +392,7 @@ add_entrypoint_object( HDRS ../readlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -405,7 +406,7 @@ add_entrypoint_object( HDRS ../readlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -419,7 +420,7 @@ add_entrypoint_object( HDRS ../symlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -433,7 +434,7 @@ add_entrypoint_object( HDRS ../symlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -485,7 +486,7 @@ add_entrypoint_object( HDRS ../unlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -499,7 +500,7 @@ add_entrypoint_object( HDRS ../unlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil diff --git a/libc/src/unistd/linux/access.cpp b/libc/src/unistd/linux/access.cpp index e9ad74989b056..2f7ebbcdf9e81 100644 --- a/libc/src/unistd/linux/access.cpp +++ b/libc/src/unistd/linux/access.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/dup2.cpp b/libc/src/unistd/linux/dup2.cpp index 51a19a71a7d85..c7c7c1a8ca786 100644 --- a/libc/src/unistd/linux/dup2.cpp +++ b/libc/src/unistd/linux/dup2.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/link.cpp b/libc/src/unistd/linux/link.cpp index 37ca58eab1096..477806a70df74 100644 --- a/libc/src/unistd/linux/link.cpp +++ b/libc/src/unistd/linux/link.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/linkat.cpp b/libc/src/unistd/linux/linkat.cpp index fcd6a5f75a196..40f68cc90c480 100644 --- a/libc/src/unistd/linux/linkat.cpp +++ b/libc/src/unistd/linux/linkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/readlink.cpp b/libc/src/unistd/linux/readlink.cpp index 7b15245004405..2055e6b3400f2 100644 --- a/libc/src/unistd/linux/readlink.cpp +++ b/libc/src/unistd/linux/readlink.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/readlinkat.cpp b/libc/src/unistd/linux/readlinkat.cpp index 19a9ff9fbeb72..e5e4d0d39bc9c 100644 --- a/libc/src/unistd/linux/readlinkat.cpp +++ b/libc/src/unistd/linux/readlinkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/rmdir.cpp b/libc/src/unistd/linux/rmdir.cpp index 8974468ebcf16..075af12af64c5 100644 --- a/libc/src/unistd/linux/rmdir.cpp +++ b/libc/src/unistd/linux/rmdir.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/symlink.cpp b/libc/src/unistd/linux/symlink.cpp index 5efd4df85edab..9e1b2886ea0f5 100644 --- a/libc/src/unistd/linux/symlink.cpp +++ b/libc/src/unistd/linux/symlink.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/symlinkat.cpp b/libc/src/unistd/linux/symlinkat.cpp index 63d2e6d1507a5..bcf2d0f8cc055 100644 --- a/libc/src/unistd/linux/symlinkat.cpp +++ b/libc/src/unistd/linux/symlinkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/unlink.cpp b/libc/src/unistd/linux/unlink.cpp index de7cae8b826eb..72d8e2398e3d7 100644 --- a/libc/src/unistd/linux/unlink.cpp +++ b/libc/src/unistd/linux/unlink.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/unlinkat.cpp b/libc/src/unistd/linux/unlinkat.cpp index e794f242b9459..4ed20f542f170 100644 --- a/libc/src/unistd/linux/unlinkat.cpp +++ b/libc/src/unistd/linux/unlinkat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/test/src/fcntl/CMakeLists.txt b/libc/test/src/fcntl/CMakeLists.txt index 48048b7fe8866..b522fef7439df 100644 --- a/libc/test/src/fcntl/CMakeLists.txt +++ b/libc/test/src/fcntl/CMakeLists.txt @@ -42,7 +42,7 @@ add_libc_unittest( SRCS openat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.fcntl.open libc.src.fcntl.openat diff --git a/libc/test/src/fcntl/openat_test.cpp b/libc/test/src/fcntl/openat_test.cpp index 9dafd125224a4..547359eb9f7a9 100644 --- a/libc/test/src/fcntl/openat_test.cpp +++ b/libc/test/src/fcntl/openat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcUniStd, OpenAndReadTest) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/sys/sendfile/CMakeLists.txt b/libc/test/src/sys/sendfile/CMakeLists.txt index 82efaa147bd89..ceaa4accdd06e 100644 --- a/libc/test/src/sys/sendfile/CMakeLists.txt +++ b/libc/test/src/sys/sendfile/CMakeLists.txt @@ -9,7 +9,7 @@ add_libc_unittest( SRCS sendfile_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open diff --git a/libc/test/src/sys/sendfile/sendfile_test.cpp b/libc/test/src/sys/sendfile/sendfile_test.cpp index 59025438a2467..a658212ddb72c 100644 --- a/libc/test/src/sys/sendfile/sendfile_test.cpp +++ b/libc/test/src/sys/sendfile/sendfile_test.cpp @@ -17,7 +17,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include namespace cpp = LIBC_NAMESPACE::cpp; diff --git a/libc/test/src/sys/stat/CMakeLists.txt b/libc/test/src/sys/stat/CMakeLists.txt index 877a129b627dd..dd3d0932755b7 100644 --- a/libc/test/src/sys/stat/CMakeLists.txt +++ b/libc/test/src/sys/stat/CMakeLists.txt @@ -9,7 +9,7 @@ add_libc_unittest( SRCS chmod_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -25,7 +25,7 @@ add_libc_unittest( SRCS fchmodat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -41,7 +41,7 @@ add_libc_unittest( SRCS fchmod_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -57,7 +57,7 @@ add_libc_unittest( SRCS mkdirat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.mkdirat @@ -71,7 +71,7 @@ add_libc_unittest( SRCS stat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.stat @@ -87,7 +87,7 @@ add_libc_unittest( SRCS lstat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.lstat @@ -103,7 +103,7 @@ add_libc_unittest( SRCS fstat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.fstat diff --git a/libc/test/src/sys/stat/chmod_test.cpp b/libc/test/src/sys/stat/chmod_test.cpp index c688996615cee..83ab0f45b6f08 100644 --- a/libc/test/src/sys/stat/chmod_test.cpp +++ b/libc/test/src/sys/stat/chmod_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcChmodTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fchmod_test.cpp b/libc/test/src/sys/stat/fchmod_test.cpp index 91c0f68b8708c..03eb79d95ddd6 100644 --- a/libc/test/src/sys/stat/fchmod_test.cpp +++ b/libc/test/src/sys/stat/fchmod_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcChmodTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fchmodat_test.cpp b/libc/test/src/sys/stat/fchmodat_test.cpp index c43ef8ae13315..09970b6e0fb16 100644 --- a/libc/test/src/sys/stat/fchmodat_test.cpp +++ b/libc/test/src/sys/stat/fchmodat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcFchmodatTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fstat_test.cpp b/libc/test/src/sys/stat/fstat_test.cpp index 1379eae26a47a..34c675d1a4e29 100644 --- a/libc/test/src/sys/stat/fstat_test.cpp +++ b/libc/test/src/sys/stat/fstat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcFStatTest, CreatAndReadMode) { diff --git a/libc/test/src/sys/stat/lstat_test.cpp b/libc/test/src/sys/stat/lstat_test.cpp index b44b3d1a59ce7..a723d5ae2e297 100644 --- a/libc/test/src/sys/stat/lstat_test.cpp +++ b/libc/test/src/sys/stat/lstat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcLStatTest, CreatAndReadMode) { diff --git a/libc/test/src/sys/stat/mkdirat_test.cpp b/libc/test/src/sys/stat/mkdirat_test.cpp index cbacc16b402d7..85e013de234e7 100644 --- a/libc/test/src/sys/stat/mkdirat_test.cpp +++ b/libc/test/src/sys/stat/mkdirat_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcMkdiratTest, CreateAndRemove) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/sys/stat/stat_test.cpp b/libc/test/src/sys/stat/stat_test.cpp index baf363382022a..0ddd8baaec1c9 100644 --- a/libc/test/src/sys/stat/stat_test.cpp +++ b/libc/test/src/sys/stat/stat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcStatTest, CreatAndReadMode) { diff --git a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp index 8cb5f867453e4..2f3e0b96ff095 100644 --- a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp @@ -1,4 +1,4 @@ -#include "llvm-libc-macros/linux/fcntl-macros.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" #include "src/sys/statvfs/fstatvfs.h" diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index e03e56b3cf8ad..ce936cebad426 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -24,11 +24,12 @@ add_libc_unittest( SRCS chdir_test.cpp DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.src.errno.errno - libc.src.fcntl.open libc.src.unistd.chdir libc.src.unistd.close + libc.src.fcntl.open libc.test.UnitTest.ErrnoSetterMatcher ) @@ -223,7 +224,7 @@ add_libc_unittest( SRCS rmdir_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.sys.stat.mkdir libc.src.unistd.rmdir @@ -262,7 +263,7 @@ add_libc_unittest( SRCS readlinkat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.src.errno.errno libc.src.unistd.readlinkat @@ -410,7 +411,7 @@ add_libc_unittest( syscall_test.cpp DEPENDS libc.include.unistd - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.src.errno.errno libc.src.unistd.__llvm_libc_syscall diff --git a/libc/test/src/unistd/chdir_test.cpp b/libc/test/src/unistd/chdir_test.cpp index 51dc7bb15d3ee..e1bdcd77119f7 100644 --- a/libc/test/src/unistd/chdir_test.cpp +++ b/libc/test/src/unistd/chdir_test.cpp @@ -13,7 +13,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcChdirTest, ChangeAndOpen) { // The idea of this test is that we will first open an existing test file diff --git a/libc/test/src/unistd/fchdir_test.cpp b/libc/test/src/unistd/fchdir_test.cpp index ae88e1f22ed6b..0e39fde17c67b 100644 --- a/libc/test/src/unistd/fchdir_test.cpp +++ b/libc/test/src/unistd/fchdir_test.cpp @@ -13,7 +13,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcChdirTest, ChangeAndOpen) { // The idea of this test is that we will first open an existing test file diff --git a/libc/test/src/unistd/readlinkat_test.cpp b/libc/test/src/unistd/readlinkat_test.cpp index 1fa683b02b5b5..9e4bb9af02e76 100644 --- a/libc/test/src/unistd/readlinkat_test.cpp +++ b/libc/test/src/unistd/readlinkat_test.cpp @@ -15,7 +15,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" namespace cpp = LIBC_NAMESPACE::cpp; diff --git a/libc/test/src/unistd/rmdir_test.cpp b/libc/test/src/unistd/rmdir_test.cpp index 93cb0f3f53c1b..4f4cd94c5cf0b 100644 --- a/libc/test/src/unistd/rmdir_test.cpp +++ b/libc/test/src/unistd/rmdir_test.cpp @@ -12,7 +12,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcRmdirTest, CreateAndRemove) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/unistd/syscall_test.cpp b/libc/test/src/unistd/syscall_test.cpp index cee29bd9afa30..f6cc3eab9aabe 100644 --- a/libc/test/src/unistd/syscall_test.cpp +++ b/libc/test/src/unistd/syscall_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include // For S_* flags. #include // For syscall numbers. #include diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h index 54af1fa1a1cc5..3fa948ecc43cf 100644 --- a/libcxx/include/__memory/uninitialized_algorithms.h +++ b/libcxx/include/__memory/uninitialized_algorithms.h @@ -638,7 +638,8 @@ __uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _ __guard.__complete(); std::__allocator_destroy(__alloc, __first, __last); } else { - __builtin_memcpy(__result, __first, sizeof(_Tp) * (__last - __first)); + // Casting to void* to suppress clang complaining that this is technically UB. + __builtin_memcpy(static_cast(__result), __first, sizeof(_Tp) * (__last - __first)); } } diff --git a/libcxx/test/std/utilities/expected/types.h b/libcxx/test/std/utilities/expected/types.h index 2b6983fb399c6..df73ebdfe495e 100644 --- a/libcxx/test/std/utilities/expected/types.h +++ b/libcxx/test/std/utilities/expected/types.h @@ -162,7 +162,7 @@ template struct TailClobberer { constexpr TailClobberer() noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, Constant, sizeof(*this)); + std::memset(static_cast(this), Constant, sizeof(*this)); } // Always set `b` itself to `false` so that the comparison works. b = false; @@ -245,7 +245,7 @@ struct BoolWithPadding { constexpr explicit BoolWithPadding() noexcept : BoolWithPadding(false) {} constexpr BoolWithPadding(bool val) noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, 0, sizeof(*this)); + std::memset(static_cast(this), 0, sizeof(*this)); } val_ = val; } @@ -268,7 +268,7 @@ struct IntWithoutPadding { constexpr explicit IntWithoutPadding() noexcept : IntWithoutPadding(0) {} constexpr IntWithoutPadding(int val) noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, 0, sizeof(*this)); + std::memset(static_cast(this), 0, sizeof(*this)); } val_ = val; } diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index 13ee98289c36b..18f51f8072640 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -465,14 +465,14 @@ class safe_allocator { TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { T* memory = std::allocator().allocate(n); if (!TEST_IS_CONSTANT_EVALUATED) - std::memset(memory, 0, sizeof(T) * n); + std::memset(static_cast(memory), 0, sizeof(T) * n); return memory; } TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { if (!TEST_IS_CONSTANT_EVALUATED) - DoNotOptimize(std::memset(p, 0, sizeof(T) * n)); + DoNotOptimize(std::memset(static_cast(p), 0, sizeof(T) * n)); std::allocator().deallocate(p, n); } diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index 3592291bb2d06..cca00335bc3c6 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -11,12 +11,10 @@ #include #include -#include #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Breakpoint/StoppointHitCounter.h" #include "lldb/Core/Address.h" -#include "lldb/Symbol/LineEntry.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-private.h" @@ -284,25 +282,6 @@ class BreakpointLocation /// Returns the breakpoint location ID. lldb::break_id_t GetID() const { return m_loc_id; } - /// Set the line entry that should be shown to users for this location. - /// It is up to the caller to verify that this is a valid entry to show. - /// The current use of this is to distinguish among line entries from a - /// virtual inlined call stack that all share the same address. - /// The line entry must have the same start address as the address for this - /// location. - bool SetPreferredLineEntry(const LineEntry &line_entry) { - if (m_address == line_entry.range.GetBaseAddress()) { - m_preferred_line_entry = line_entry; - return true; - } - assert(0 && "Tried to set a preferred line entry with a different address"); - return false; - } - - const std::optional GetPreferredLineEntry() { - return m_preferred_line_entry; - } - protected: friend class BreakpointSite; friend class BreakpointLocationList; @@ -327,16 +306,6 @@ class BreakpointLocation /// If it returns false we should continue, otherwise stop. bool IgnoreCountShouldStop(); - /// If this location knows that the virtual stack frame it represents is - /// not frame 0, return the suggested stack frame instead. This will happen - /// when the location's address contains a "virtual inlined call stack" and - /// the breakpoint was set on a file & line that are not at the bottom of that - /// stack. For now we key off the "preferred line entry" - looking for that - /// in the blocks that start with the stop PC. - /// This version of the API doesn't take an "inlined" parameter because it - /// only changes frames in the inline stack. - std::optional GetSuggestedStackFrameIndex(); - private: void SwapLocation(lldb::BreakpointLocationSP swap_from); @@ -400,11 +369,6 @@ class BreakpointLocation lldb::break_id_t m_loc_id; ///< Breakpoint location ID. StoppointHitCounter m_hit_counter; ///< Number of times this breakpoint /// location has been hit. - /// If this exists, use it to print the stop description rather than the - /// LineEntry m_address resolves to directly. Use this for instance when the - /// location was given somewhere in the virtual inlined call stack since the - /// Address always resolves to the lowest entry in the stack. - std::optional m_preferred_line_entry; void SetShouldResolveIndirectFunctions(bool do_resolve) { m_should_resolve_indirect_functions = do_resolve; diff --git a/lldb/include/lldb/Breakpoint/BreakpointSite.h b/lldb/include/lldb/Breakpoint/BreakpointSite.h index 7b3f7be23639f..17b76d51c1ae5 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointSite.h +++ b/lldb/include/lldb/Breakpoint/BreakpointSite.h @@ -170,11 +170,6 @@ class BreakpointSite : public std::enable_shared_from_this, /// \see lldb::DescriptionLevel void GetDescription(Stream *s, lldb::DescriptionLevel level); - // This runs through all the breakpoint locations owning this site and returns - // the greatest of their suggested stack frame indexes. This only handles - // inlined stack changes. - std::optional GetSuggestedStackFrameIndex(); - /// Tell whether a breakpoint has a location at this site. /// /// \param[in] bp_id diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index c864b88c6b32a..4a0e9047b5469 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -84,14 +84,10 @@ class Declaration { /// \param[in] declaration /// The const Declaration object to compare with. /// - /// \param[in] full - /// Same meaning as Full in FileSpec::Equal. True means an empty - /// directory is not equal to a specified one, false means it is equal. - /// /// \return /// Returns \b true if \b declaration is at the same file and /// line, \b false otherwise. - bool FileAndLineEqual(const Declaration &declaration, bool full) const; + bool FileAndLineEqual(const Declaration &declaration) const; /// Dump a description of this object to a Stream. /// diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index 45beac129e86f..fae90364deaf0 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -77,18 +77,6 @@ class StopInfo : public std::enable_shared_from_this { m_description.clear(); } - /// This gives the StopInfo a chance to suggest a stack frame to select. - /// Passing true for inlined_stack will request changes to the inlined - /// call stack. Passing false will request changes to the real stack - /// frame. The inlined stack gets adjusted before we call into the thread - /// plans so they can reason based on the correct values. The real stack - /// adjustment is handled after the frame recognizers get a chance to adjust - /// the frame. - virtual std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) { - return {}; - } - virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } /// A Continue operation can result in a false stop event diff --git a/lldb/include/lldb/Target/ThreadPlanStepInRange.h b/lldb/include/lldb/Target/ThreadPlanStepInRange.h index 9da8370ef1c92..f9ef87942a7c0 100644 --- a/lldb/include/lldb/Target/ThreadPlanStepInRange.h +++ b/lldb/include/lldb/Target/ThreadPlanStepInRange.h @@ -80,8 +80,8 @@ class ThreadPlanStepInRange : public ThreadPlanStepRange, bool m_step_past_prologue; // FIXME: For now hard-coded to true, we could put // a switch in for this if there's // demand for that. - LazyBool m_virtual_step; // true if we've just done a "virtual step", i.e. - // just moved the inline stack depth. + bool m_virtual_step; // true if we've just done a "virtual step", i.e. just + // moved the inline stack depth. ConstString m_step_into_target; ThreadPlanStepInRange(const ThreadPlanStepInRange &) = delete; const ThreadPlanStepInRange & diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index c7ea50407ae1c..ad9057c8141e9 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -508,20 +508,8 @@ void BreakpointLocation::GetDescription(Stream *s, s->PutCString("re-exported target = "); else s->PutCString("where = "); - - // If there's a preferred line entry for printing, use that. - bool show_function_info = true; - if (auto preferred = GetPreferredLineEntry()) { - sc.line_entry = *preferred; - // FIXME: We're going to get the function name wrong when the preferred - // line entry is not the lowest one. For now, just leave the function - // out in this case, but we really should also figure out how to easily - // fake the function name here. - show_function_info = false; - } sc.DumpStopContext(s, m_owner.GetTarget().GetProcessSP().get(), m_address, - false, true, false, show_function_info, - show_function_info, show_function_info); + false, true, false, true, true, true); } else { if (sc.module_sp) { s->EOL(); @@ -549,10 +537,7 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.line_entry.line > 0) { s->EOL(); s->Indent("location = "); - if (auto preferred = GetPreferredLineEntry()) - preferred->DumpStopContext(s, true); - else - sc.line_entry.DumpStopContext(s, true); + sc.line_entry.DumpStopContext(s, true); } } else { @@ -671,50 +656,6 @@ void BreakpointLocation::SendBreakpointLocationChangedEvent( } } -std::optional BreakpointLocation::GetSuggestedStackFrameIndex() { - auto preferred_opt = GetPreferredLineEntry(); - if (!preferred_opt) - return {}; - LineEntry preferred = *preferred_opt; - SymbolContext sc; - if (!m_address.CalculateSymbolContext(&sc)) - return {}; - // Don't return anything special if frame 0 is the preferred line entry. - // We not really telling the stack frame list to do anything special in that - // case. - if (!LineEntry::Compare(sc.line_entry, preferred)) - return {}; - - if (!sc.block) - return {}; - - // Blocks have their line info in Declaration form, so make one here: - Declaration preferred_decl(preferred.GetFile(), preferred.line, - preferred.column); - - uint32_t depth = 0; - Block *inlined_block = sc.block->GetContainingInlinedBlock(); - while (inlined_block) { - // If we've moved to a block that this isn't the start of, that's not - // our inlining info or call site, so we can stop here. - Address start_address; - if (!inlined_block->GetStartAddress(start_address) || - start_address != m_address) - return {}; - - const InlineFunctionInfo *info = inlined_block->GetInlinedFunctionInfo(); - if (info) { - if (preferred_decl == info->GetDeclaration()) - return depth; - if (preferred_decl == info->GetCallSite()) - return depth + 1; - } - inlined_block = inlined_block->GetInlinedParent(); - depth++; - } - return {}; -} - void BreakpointLocation::SwapLocation(BreakpointLocationSP swap_from) { m_address = swap_from->m_address; m_should_resolve_indirect_functions = diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 9643602d78c75..8307689c7640c 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -340,21 +340,6 @@ void BreakpointResolver::AddLocation(SearchFilter &filter, } BreakpointLocationSP bp_loc_sp(AddLocation(line_start)); - // If the address that we resolved the location to returns a different - // LineEntry from the one in the incoming SC, we're probably dealing with an - // inlined call site, so set that as the preferred LineEntry: - LineEntry resolved_entry; - if (!skipped_prologue && bp_loc_sp && - line_start.CalculateSymbolContextLineEntry(resolved_entry) && - LineEntry::Compare(resolved_entry, sc.line_entry)) { - // FIXME: The function name will also be wrong here. Do we need to record - // that as well, or can we figure that out again when we report this - // breakpoint location. - if (!bp_loc_sp->SetPreferredLineEntry(sc.line_entry)) { - LLDB_LOG(log, "Tried to add a preferred line entry that didn't have the " - "same address as this location's address."); - } - } if (log && bp_loc_sp && !GetBreakpoint()->IsInternal()) { StreamString s; bp_loc_sp->GetDescription(&s, lldb::eDescriptionLevelVerbose); diff --git a/lldb/source/Breakpoint/BreakpointSite.cpp b/lldb/source/Breakpoint/BreakpointSite.cpp index 9700a57d3346e..3ca93f908e30b 100644 --- a/lldb/source/Breakpoint/BreakpointSite.cpp +++ b/lldb/source/Breakpoint/BreakpointSite.cpp @@ -87,23 +87,6 @@ void BreakpointSite::GetDescription(Stream *s, lldb::DescriptionLevel level) { m_constituents.GetDescription(s, level); } -std::optional BreakpointSite::GetSuggestedStackFrameIndex() { - - std::optional result; - std::lock_guard guard(m_constituents_mutex); - for (BreakpointLocationSP loc_sp : m_constituents.BreakpointLocations()) { - std::optional loc_frame_index = - loc_sp->GetSuggestedStackFrameIndex(); - if (loc_frame_index) { - if (result) - result = std::max(*loc_frame_index, *result); - else - result = loc_frame_index; - } - } - return result; -} - bool BreakpointSite::IsInternal() const { return m_constituents.IsInternal(); } uint8_t *BreakpointSite::GetTrapOpcodeBytes() { return &m_trap_opcode[0]; } diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index a485c4b9ba48a..579a3999d14ea 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -70,9 +70,8 @@ int Declaration::Compare(const Declaration &a, const Declaration &b) { return 0; } -bool Declaration::FileAndLineEqual(const Declaration &declaration, - bool full) const { - int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, full); +bool Declaration::FileAndLineEqual(const Declaration &declaration) const { + int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, true); return file_compare == 0 && this->m_line == declaration.m_line; } diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 5c7772a6db780..f7d9c0d2d3306 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite( const auto *function_info = inlined_block->GetInlinedFunctionInfo(); if (function_info && - function_info->GetCallSite().FileAndLineEqual(find_call_site, true)) + function_info->GetCallSite().FileAndLineEqual(find_call_site)) return inlined_block; inlined_block = inlined_block->GetInlinedParent(); } diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index f0f7e40ae70d8..db8f8ce6bcbc9 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -251,10 +251,7 @@ void CompileUnit::ResolveSymbolContext( SymbolContextItem resolve_scope, SymbolContextList &sc_list, RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); - const uint32_t line = - src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER); - const uint32_t column_num = - src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER); + const uint32_t line = src_location_spec.GetLine().value_or(0); const bool check_inlines = src_location_spec.GetCheckInlines(); // First find all of the file indexes that match our "file_spec". If @@ -315,112 +312,6 @@ void CompileUnit::ResolveSymbolContext( 0, file_indexes, src_location_spec, &line_entry); } - // If we didn't manage to find a breakpoint that matched the line number - // requested, that might be because it is only an inline call site, and - // doesn't have a line entry in the line table. Scan for that here. - // - // We are making the assumption that if there was an inlined function it will - // contribute at least 1 non-call-site entry to the line table. That's handy - // because we don't move line breakpoints over function boundaries, so if we - // found a hit, and there were also a call site entry, it would have to be in - // the function containing the PC of the line table match. That way we can - // limit the call site search to that function. - // We will miss functions that ONLY exist as a call site entry. - - if (line_entry.IsValid() && - (line_entry.line != line || line_entry.column != column_num) && - resolve_scope & eSymbolContextLineEntry && check_inlines) { - // We don't move lines over function boundaries, so the address in the - // line entry will be the in function that contained the line that might - // be a CallSite, and we can just iterate over that function to find any - // inline records, and dig up their call sites. - Address start_addr = line_entry.range.GetBaseAddress(); - Function *function = start_addr.CalculateSymbolContextFunction(); - - Declaration sought_decl(file_spec, line, column_num); - // We use this recursive function to descend the block structure looking - // for a block that has this Declaration as in it's CallSite info. - // This function recursively scans the sibling blocks of the incoming - // block parameter. - std::function examine_block = - [&sought_decl, &sc_list, &src_location_spec, resolve_scope, - &examine_block](Block &block) -> void { - // Iterate over the sibling child blocks of the incoming block. - Block *sibling_block = block.GetFirstChild(); - while (sibling_block) { - // We only have to descend through the regular blocks, looking for - // immediate inlines, since those are the only ones that will have this - // callsite. - const InlineFunctionInfo *inline_info = - sibling_block->GetInlinedFunctionInfo(); - if (inline_info) { - // If this is the call-site we are looking for, record that: - // We need to be careful because the call site from the debug info - // will generally have a column, but the user might not have specified - // it. - Declaration found_decl = inline_info->GetCallSite(); - uint32_t sought_column = sought_decl.GetColumn(); - if (found_decl.FileAndLineEqual(sought_decl, false) && - (sought_column == LLDB_INVALID_COLUMN_NUMBER || - sought_column == found_decl.GetColumn())) { - // If we found a call site, it belongs not in this inlined block, - // but in the parent block that inlined it. - Address parent_start_addr; - if (sibling_block->GetParent()->GetStartAddress( - parent_start_addr)) { - SymbolContext sc; - parent_start_addr.CalculateSymbolContext(&sc, resolve_scope); - // Now swap out the line entry for the one we found. - LineEntry call_site_line = sc.line_entry; - call_site_line.line = found_decl.GetLine(); - call_site_line.column = found_decl.GetColumn(); - bool matches_spec = true; - // If the user asked for an exact match, we need to make sure the - // call site we found actually matches the location. - if (src_location_spec.GetExactMatch()) { - matches_spec = false; - if ((src_location_spec.GetFileSpec() == - sc.line_entry.GetFile()) && - (src_location_spec.GetLine() && - *src_location_spec.GetLine() == call_site_line.line) && - (src_location_spec.GetColumn() && - *src_location_spec.GetColumn() == call_site_line.column)) - matches_spec = true; - } - if (matches_spec && - sibling_block->GetRangeAtIndex(0, call_site_line.range)) { - SymbolContext call_site_sc(sc.target_sp, sc.module_sp, - sc.comp_unit, sc.function, sc.block, - &call_site_line, sc.symbol); - sc_list.Append(call_site_sc); - } - } - } - } - - // Descend into the child blocks: - examine_block(*sibling_block); - // Now go to the next sibling: - sibling_block = sibling_block->GetSibling(); - } - }; - - if (function) { - // We don't need to examine the function block, it can't be inlined. - Block &func_block = function->GetBlock(true); - examine_block(func_block); - } - // If we found entries here, we are done. We only get here because we - // didn't find an exact line entry for this line & column, but if we found - // an exact match from the call site info that's strictly better than - // continuing to look for matches further on in the file. - // FIXME: Should I also do this for "call site line exists between the - // given line number and the later line we found in the line table"? That's - // a closer approximation to our general sliding algorithm. - if (sc_list.GetSize()) - return; - } - // If "exact == true", then "found_line" will be the same as "line". If // "exact == false", the "found_line" will be the closest line entry // with a line number greater than "line" and we will use this for our diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 94a381edd5e20..3849ec5ed178d 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -85,32 +85,121 @@ void StackFrameList::ResetCurrentInlinedDepth() { return; std::lock_guard guard(m_mutex); + + GetFramesUpTo(0, DoNotAllowInterruption); + if (m_frames.empty()) + return; + if (!m_frames[0]->IsInlined()) { + m_current_inlined_depth = UINT32_MAX; + m_current_inlined_pc = LLDB_INVALID_ADDRESS; + Log *log = GetLog(LLDBLog::Step); + if (log && log->GetVerbose()) + LLDB_LOGF( + log, + "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); + return; + } - m_current_inlined_pc = LLDB_INVALID_ADDRESS; - m_current_inlined_depth = UINT32_MAX; + // We only need to do something special about inlined blocks when we are + // at the beginning of an inlined function: + // FIXME: We probably also have to do something special if the PC is at + // the END of an inlined function, which coincides with the end of either + // its containing function or another inlined function. + + Block *block_ptr = m_frames[0]->GetFrameBlock(); + if (!block_ptr) + return; + Address pc_as_address; + lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC(); + pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget())); + AddressRange containing_range; + if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) || + pc_as_address != containing_range.GetBaseAddress()) + return; + + // If we got here because of a breakpoint hit, then set the inlined depth + // depending on where the breakpoint was set. If we got here because of a + // crash, then set the inlined depth to the deepest most block. Otherwise, + // we stopped here naturally as the result of a step, so set ourselves in the + // containing frame of the whole set of nested inlines, so the user can then + // "virtually" step into the frames one by one, or next over the whole mess. + // Note: We don't have to handle being somewhere in the middle of the stack + // here, since ResetCurrentInlinedDepth doesn't get called if there is a + // valid inlined depth set. StopInfoSP stop_info_sp = m_thread.GetStopInfo(); if (!stop_info_sp) return; + switch (stop_info_sp->GetStopReason()) { + case eStopReasonWatchpoint: + case eStopReasonException: + case eStopReasonExec: + case eStopReasonFork: + case eStopReasonVFork: + case eStopReasonVForkDone: + case eStopReasonSignal: + // In all these cases we want to stop in the deepest frame. + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = 0; + break; + case eStopReasonBreakpoint: { + // FIXME: Figure out what this break point is doing, and set the inline + // depth appropriately. Be careful to take into account breakpoints that + // implement step over prologue, since that should do the default + // calculation. For now, if the breakpoints corresponding to this hit are + // all internal, I set the stop location to the top of the inlined stack, + // since that will make things like stepping over prologues work right. + // But if there are any non-internal breakpoints I do to the bottom of the + // stack, since that was the old behavior. + uint32_t bp_site_id = stop_info_sp->GetValue(); + BreakpointSiteSP bp_site_sp( + m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id)); + bool all_internal = true; + if (bp_site_sp) { + uint32_t num_owners = bp_site_sp->GetNumberOfConstituents(); + for (uint32_t i = 0; i < num_owners; i++) { + Breakpoint &bp_ref = + bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint(); + if (!bp_ref.IsInternal()) { + all_internal = false; + } + } + } + if (!all_internal) { + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = 0; + break; + } + } + [[fallthrough]]; + default: { + // Otherwise, we should set ourselves at the container of the inlining, so + // that the user can descend into them. So first we check whether we have + // more than one inlined block sharing this PC: + int num_inlined_functions = 0; + + for (Block *container_ptr = block_ptr->GetInlinedParent(); + container_ptr != nullptr; + container_ptr = container_ptr->GetInlinedParent()) { + if (!container_ptr->GetRangeContainingAddress(pc_as_address, + containing_range)) + break; + if (pc_as_address != containing_range.GetBaseAddress()) + break; - bool inlined = true; - auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined); - // We're only adjusting the inlined stack here. - Log *log = GetLog(LLDBLog::Step); - if (inline_depth) { - m_current_inlined_depth = *inline_depth; - m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC(); - + num_inlined_functions++; + } + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = num_inlined_functions + 1; + Log *log = GetLog(LLDBLog::Step); if (log && log->GetVerbose()) LLDB_LOGF(log, "ResetCurrentInlinedDepth: setting inlined " "depth: %d 0x%" PRIx64 ".\n", - m_current_inlined_depth, m_current_inlined_pc); - } else { - if (log && log->GetVerbose()) - LLDB_LOGF( - log, - "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); + m_current_inlined_depth, curr_pc); + + break; + } } } @@ -727,48 +816,19 @@ void StackFrameList::SelectMostRelevantFrame() { RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame(); - if (recognized_frame_sp) { - if (StackFrameSP most_relevant_frame_sp = - recognized_frame_sp->GetMostRelevantFrame()) { - LLDB_LOG(log, "Found most relevant frame at index {0}", - most_relevant_frame_sp->GetFrameIndex()); - SetSelectedFrame(most_relevant_frame_sp.get()); - return; - } - } - LLDB_LOG(log, "Frame #0 not recognized"); - - // If this thread has a non-trivial StopInof, then let it suggest - // a most relevant frame: - StopInfoSP stop_info_sp = m_thread.GetStopInfo(); - uint32_t stack_idx = 0; - bool found_relevant = false; - if (stop_info_sp) { - // Here we're only asking the stop info if it wants to adjust the real stack - // index. We have to ask about the m_inlined_stack_depth in - // Thread::ShouldStop since the plans need to reason with that info. - bool inlined = false; - std::optional stack_opt = - stop_info_sp->GetSuggestedStackFrameIndex(inlined); - if (stack_opt) { - stack_idx = *stack_opt; - found_relevant = true; - } + if (!recognized_frame_sp) { + LLDB_LOG(log, "Frame #0 not recognized"); + return; } - frame_sp = GetFrameAtIndex(stack_idx); - if (!frame_sp) - LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist", - stack_idx); - else if (found_relevant) - LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx); - // Note, we don't have to worry about "inlined" frames here, because we've - // already calculated the inlined frame in Thread::ShouldStop, and - // SetSelectedFrame will take care of that adjustment for us. - SetSelectedFrame(frame_sp.get()); - - if (!found_relevant) + if (StackFrameSP most_relevant_frame_sp = + recognized_frame_sp->GetMostRelevantFrame()) { + LLDB_LOG(log, "Found most relevant frame at index {0}", + most_relevant_frame_sp->GetFrameIndex()); + SetSelectedFrame(most_relevant_frame_sp.get()); + } else { LLDB_LOG(log, "No relevant frame!"); + } } uint32_t StackFrameList::GetSelectedFrameIndex( @@ -781,7 +841,6 @@ uint32_t StackFrameList::GetSelectedFrameIndex( // isn't set, then don't force a selection here, just return 0. if (!select_most_relevant) return 0; - // If the inlined stack frame is set, then use that: m_selected_frame_idx = 0; } return *m_selected_frame_idx; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index f6387d47504e6..60aa65ed38c74 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -15,7 +15,6 @@ #include "lldb/Breakpoint/WatchpointResource.h" #include "lldb/Core/Debugger.h" #include "lldb/Expression/UserExpression.h" -#include "lldb/Symbol/Block.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -247,22 +246,6 @@ class StopInfoBreakpoint : public StopInfo { return m_description.c_str(); } - std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) override { - if (!inlined_stack) - return {}; - - ThreadSP thread_sp(m_thread_wp.lock()); - if (!thread_sp) - return {}; - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); - if (!bp_site_sp) - return {}; - - return bp_site_sp->GetSuggestedStackFrameIndex(); - } - protected: bool ShouldStop(Event *event_ptr) override { // This just reports the work done by PerformAction or the synchronous @@ -1181,44 +1164,6 @@ class StopInfoTrace : public StopInfo { else return m_description.c_str(); } - - std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) override { - // Trace only knows how to adjust inlined stacks: - if (!inlined_stack) - return {}; - - ThreadSP thread_sp = GetThread(); - StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0); - if (!frame_0_sp) - return {}; - if (!frame_0_sp->IsInlined()) - return {}; - Block *block_ptr = frame_0_sp->GetFrameBlock(); - if (!block_ptr) - return {}; - Address pc_address = frame_0_sp->GetFrameCodeAddress(); - AddressRange containing_range; - if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) || - pc_address != containing_range.GetBaseAddress()) - return {}; - - int num_inlined_functions = 0; - - for (Block *container_ptr = block_ptr->GetInlinedParent(); - container_ptr != nullptr; - container_ptr = container_ptr->GetInlinedParent()) { - if (!container_ptr->GetRangeContainingAddress(pc_address, - containing_range)) - break; - if (pc_address != containing_range.GetBaseAddress()) - break; - - num_inlined_functions++; - } - inlined_stack = true; - return num_inlined_functions + 1; - } }; // StopInfoException diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 735295e6f2593..8373cdc36268f 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -619,14 +619,6 @@ void Thread::WillStop() { void Thread::SetupForResume() { if (GetResumeState() != eStateSuspended) { - // First check whether this thread is going to "actually" resume at all. - // For instance, if we're stepping from one level to the next of an - // virtual inlined call stack, we just change the inlined call stack index - // without actually running this thread. In that case, for this thread we - // shouldn't push a step over breakpoint plan or do that work. - if (GetCurrentPlan()->IsVirtualStep()) - return; - // If we're at a breakpoint push the step-over breakpoint plan. Do this // before telling the current plan it will resume, since we might change // what the current plan is. diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 325a70619908b..567dcc26d0d37 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange( "Step Range stepping in", thread, range, addr_context, stop_others), ThreadPlanShouldStopHere(this), m_step_past_prologue(true), - m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) { + m_virtual_step(false), m_step_into_target(step_into_target) { SetCallbacks(); SetFlagsToDefault(); SetupAvoidNoDebug(step_in_avoids_code_without_debug_info, @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) { m_sub_plan_sp.reset(); } - if (m_virtual_step == eLazyBoolYes) { + if (m_virtual_step) { // If we've just completed a virtual step, all we need to do is check for a // ShouldStopHere plan, and otherwise we're done. // FIXME - This can be both a step in and a step out. Probably should @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool return_value = false; - if (m_virtual_step == eLazyBoolYes) { + if (m_virtual_step) { return_value = true; } else { StopInfoSP stop_info_sp = GetPrivateStopInfo(); @@ -460,13 +460,10 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, bool current_plan) { - m_virtual_step = eLazyBoolCalculate; + m_virtual_step = false; if (resume_state == eStateStepping && current_plan) { Thread &thread = GetThread(); // See if we are about to step over a virtual inlined call. - // But if we already know we're virtual stepping, don't decrement the - // inlined depth again... - bool step_without_resume = thread.DecrementCurrentInlinedDepth(); if (step_without_resume) { Log *log = GetLog(LLDBLog::Step); @@ -479,20 +476,11 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, // FIXME: Maybe it would be better to create a InlineStep stop reason, but // then // the whole rest of the world would have to handle that stop reason. - m_virtual_step = eLazyBoolYes; + m_virtual_step = true; } return !step_without_resume; } return true; } -bool ThreadPlanStepInRange::IsVirtualStep() { - if (m_virtual_step == eLazyBoolCalculate) { - Thread &thread = GetThread(); - if (thread.GetCurrentInlinedDepth() == UINT32_MAX) - m_virtual_step = eLazyBoolNo; - else - m_virtual_step = eLazyBoolYes; - } - return m_virtual_step == eLazyBoolYes; -} +bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; } diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp index 643ee827c865c..ef5b4b5c434d1 100644 --- a/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state, if (in_inlined_stack) { Log *log = GetLog(LLDBLog::Step); LLDB_LOGF(log, - "ThreadPlanStepOverRange::DoWillResume: adjusting range to " + "ThreadPlanStepInRange::DoWillResume: adjusting range to " "the frame at inlined depth %d.", thread.GetCurrentInlinedDepth()); StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0); diff --git a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py index f52e0f0fd5bcf..752c3a9cbd286 100644 --- a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py @@ -32,12 +32,6 @@ def test_step_in_template_with_python_api(self): self.build() self.step_in_template() - @add_test_categories(["pyapi"]) - def test_virtual_inline_stepping(self): - """Test stepping through a virtual inlined call stack""" - self.build() - self.virtual_inline_stepping() - def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -363,60 +357,3 @@ def step_in_template(self): step_sequence = [["// In max_value specialized", "into"]] self.run_step_sequence(step_sequence) - - def run_to_call_site_and_step(self, source_regex, func_name, start_pos): - main_spec = lldb.SBFileSpec("calling.cpp") - # Set the breakpoint by file and line, not sourced regex because - # we want to make sure we can set breakpoints on call sites: - call_site_line_num = line_number(self.main_source, source_regex) - target, process, thread, bkpt = lldbutil.run_to_line_breakpoint( - self, main_spec, call_site_line_num - ) - - # Make sure that the location is at the call site (run_to_line_breakpoint already asserted - # that there's one location.): - bkpt_loc = bkpt.location[0] - strm = lldb.SBStream() - result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull) - - self.assertTrue(result, "Got a location description") - desc = strm.GetData() - self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed") - # We don't get the function name right yet - so we omit it in printing. - # Turn on this test when that is working. - # self.assertIn(func_name, desc, "Right function listed") - - pc = thread.frame[0].pc - for i in range(start_pos, 3): - thread.StepInto() - frame_0 = thread.frame[0] - - trivial_line_num = line_number( - self.main_source, f"In caller_trivial_inline_{i}." - ) - self.assertEqual( - frame_0.line_entry.line, - trivial_line_num, - f"Stepped into the caller_trivial_inline_{i}", - ) - if pc != frame_0.pc: - # If we get here, we stepped to the expected line number, but - # the compiler on this system has decided to insert an instruction - # between the call site of an inlined function with no arguments, - # returning void, and its immediate call to another void inlined function - # with no arguments. We aren't going to be testing virtual inline - # stepping for this function... - break - - process.Kill() - target.Clear() - - def virtual_inline_stepping(self): - """Use the Python API's to step through a virtual inlined stack""" - self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1) - self.run_to_call_site_and_step( - "In caller_trivial_inline_1", "caller_trivial_inline_1", 2 - ) - self.run_to_call_site_and_step( - "In caller_trivial_inline_2", "caller_trivial_inline_2", 3 - ) diff --git a/lldb/test/API/functionalities/inline-stepping/calling.cpp b/lldb/test/API/functionalities/inline-stepping/calling.cpp index d7ee56b3c0790..49179ce7c9788 100644 --- a/lldb/test/API/functionalities/inline-stepping/calling.cpp +++ b/lldb/test/API/functionalities/inline-stepping/calling.cpp @@ -13,12 +13,6 @@ int called_by_inline_ref (int &value); inline void inline_trivial_1 () __attribute__((always_inline)); inline void inline_trivial_2 () __attribute__((always_inline)); -// These three should share the same initial pc so we can test -// virtual inline stepping. -inline void caller_trivial_inline_1() __attribute__((always_inline)); -inline void caller_trivial_inline_2() __attribute__((always_inline)); -inline void caller_trivial_inline_3() __attribute__((always_inline)); - void caller_trivial_1 (); void caller_trivial_2 (); @@ -85,23 +79,6 @@ caller_trivial_2 () inline_value += 1; // At increment in caller_trivial_2. } -// When you call caller_trivial_inline_1, the inlined call-site -// should share a PC with all three of the following inlined -// functions, so we can exercise "virtual inline stepping". -void caller_trivial_inline_1() { - caller_trivial_inline_2(); // In caller_trivial_inline_1. - inline_value += 1; -} - -void caller_trivial_inline_2() { - caller_trivial_inline_3(); // In caller_trivial_inline_2. - inline_value += 1; -} - -void caller_trivial_inline_3() { - inline_value += 1; // In caller_trivial_inline_3. -} - void called_by_inline_trivial () { @@ -155,7 +132,5 @@ main (int argc, char **argv) max_value(123, 456); // Call max_value template max_value(std::string("abc"), std::string("0022")); // Call max_value specialized - caller_trivial_inline_1(); // At caller_trivial_inline_1. - return 0; // About to return from main. } diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp index 60d4c3bc293a3..97908b4acaf28 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp @@ -208,7 +208,8 @@ std::vector MachVMRegion::GetMemoryTypes() const { m_data.user_tag == VM_MEMORY_MALLOC_LARGE_REUSABLE || m_data.user_tag == VM_MEMORY_MALLOC_HUGE || m_data.user_tag == VM_MEMORY_REALLOC || - m_data.user_tag == VM_MEMORY_SBRK) { + m_data.user_tag == VM_MEMORY_SBRK || + m_data.user_tag == VM_MEMORY_SANITIZER) { types.push_back("heap"); if (m_data.user_tag == VM_MEMORY_MALLOC_TINY) { types.push_back("malloc-tiny"); diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index ac34ddafc5e72..2a890905dc632 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -503,7 +503,7 @@ struct DependT { using LocatorList = ObjectListT; using TaskDependenceType = tomp::type::TaskDependenceType; - struct WithLocators { // Modern form + struct DepType { // The form with task dependence type. using TupleTrait = std::true_type; // Empty LocatorList means "omp_all_memory". std::tuple t; @@ -511,7 +511,7 @@ struct DependT { using Doacross = DoacrossT; using UnionTrait = std::true_type; - std::variant u; // Doacross form is legacy + std::variant u; // Doacross form is legacy }; // V5.2: [3.5] `destroy` clause diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index 902d1305c818a..d12bc260f5cf4 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -135,10 +135,14 @@ template <> struct MappingTraits { } }; -struct FunctionSummaryYaml { +struct GlobalValueSummaryYaml { + // Commonly used fields unsigned Linkage, Visibility; bool NotEligibleToImport, Live, IsLocal, CanAutoHide; unsigned ImportType; + // Fields for AliasSummary + std::optional Aliasee; + // Fields for FunctionSummary std::vector Refs; std::vector TypeTests; std::vector TypeTestAssumeVCalls, @@ -176,8 +180,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummary::ConstVCall) namespace llvm { namespace yaml { -template <> struct MappingTraits { - static void mapping(IO &io, FunctionSummaryYaml& summary) { +template <> struct MappingTraits { + static void mapping(IO &io, GlobalValueSummaryYaml &summary) { io.mapOptional("Linkage", summary.Linkage); io.mapOptional("Visibility", summary.Visibility); io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); @@ -185,6 +189,7 @@ template <> struct MappingTraits { io.mapOptional("Local", summary.IsLocal); io.mapOptional("CanAutoHide", summary.CanAutoHide); io.mapOptional("ImportType", summary.ImportType); + io.mapOptional("Aliasee", summary.Aliasee); io.mapOptional("Refs", summary.Refs); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); @@ -199,7 +204,7 @@ template <> struct MappingTraits { } // End yaml namespace } // End llvm namespace -LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml) +LLVM_YAML_IS_SEQUENCE_VECTOR(GlobalValueSummaryYaml) namespace llvm { namespace yaml { @@ -207,61 +212,99 @@ namespace yaml { // FIXME: Add YAML mappings for the rest of the module summary. template <> struct CustomMappingTraits { static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) { - std::vector FSums; - io.mapRequired(Key.str().c_str(), FSums); + std::vector GVSums; + io.mapRequired(Key.str().c_str(), GVSums); uint64_t KeyInt; if (Key.getAsInteger(0, KeyInt)) { io.setError("key not an integer"); return; } auto &Elem = V.try_emplace(KeyInt, /*IsAnalysis=*/false).first->second; - for (auto &FSum : FSums) { + for (auto &GVSum : GVSums) { + GlobalValueSummary::GVFlags GVFlags( + static_cast(GVSum.Linkage), + static_cast(GVSum.Visibility), + GVSum.NotEligibleToImport, GVSum.Live, GVSum.IsLocal, + GVSum.CanAutoHide, + static_cast(GVSum.ImportType)); + if (GVSum.Aliasee) { + auto ASum = std::make_unique(GVFlags); + if (!V.count(*GVSum.Aliasee)) + V.emplace(*GVSum.Aliasee, /*IsAnalysis=*/false); + ValueInfo AliaseeVI(/*IsAnalysis=*/false, &*V.find(*GVSum.Aliasee)); + // Note: Aliasee cannot be filled until all summaries are loaded. + // This is done in fixAliaseeLinks() which is called in + // MappingTraits::mapping(). + ASum->setAliasee(AliaseeVI, /*Aliasee=*/nullptr); + Elem.SummaryList.push_back(std::move(ASum)); + continue; + } SmallVector Refs; - Refs.reserve(FSum.Refs.size()); - for (auto &RefGUID : FSum.Refs) { + Refs.reserve(GVSum.Refs.size()); + for (auto &RefGUID : GVSum.Refs) { auto It = V.try_emplace(RefGUID, /*IsAnalysis=*/false).first; Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*It)); } Elem.SummaryList.push_back(std::make_unique( - GlobalValueSummary::GVFlags( - static_cast(FSum.Linkage), - static_cast(FSum.Visibility), - FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, - FSum.CanAutoHide, - static_cast(FSum.ImportType)), - /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), - SmallVector{}, std::move(FSum.TypeTests), - std::move(FSum.TypeTestAssumeVCalls), - std::move(FSum.TypeCheckedLoadVCalls), - std::move(FSum.TypeTestAssumeConstVCalls), - std::move(FSum.TypeCheckedLoadConstVCalls), + GVFlags, /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), + SmallVector{}, std::move(GVSum.TypeTests), + std::move(GVSum.TypeTestAssumeVCalls), + std::move(GVSum.TypeCheckedLoadVCalls), + std::move(GVSum.TypeTestAssumeConstVCalls), + std::move(GVSum.TypeCheckedLoadConstVCalls), ArrayRef{}, ArrayRef{}, ArrayRef{})); } } static void output(IO &io, GlobalValueSummaryMapTy &V) { for (auto &P : V) { - std::vector FSums; + std::vector GVSums; for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast(Sum.get())) { std::vector Refs; Refs.reserve(FSum->refs().size()); for (auto &VI : FSum->refs()) Refs.push_back(VI.getGUID()); - FSums.push_back(FunctionSummaryYaml{ + GVSums.push_back(GlobalValueSummaryYaml{ FSum->flags().Linkage, FSum->flags().Visibility, static_cast(FSum->flags().NotEligibleToImport), static_cast(FSum->flags().Live), static_cast(FSum->flags().DSOLocal), static_cast(FSum->flags().CanAutoHide), - FSum->flags().ImportType, Refs, FSum->type_tests(), - FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), + FSum->flags().ImportType, /*Aliasee=*/std::nullopt, Refs, + FSum->type_tests(), FSum->type_test_assume_vcalls(), + FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); - } + } else if (auto *ASum = dyn_cast(Sum.get()); + ASum && ASum->hasAliasee()) { + GVSums.push_back(GlobalValueSummaryYaml{ + ASum->flags().Linkage, ASum->flags().Visibility, + static_cast(ASum->flags().NotEligibleToImport), + static_cast(ASum->flags().Live), + static_cast(ASum->flags().DSOLocal), + static_cast(ASum->flags().CanAutoHide), + ASum->flags().ImportType, + /*Aliasee=*/ASum->getAliaseeGUID()}); + } + } + if (!GVSums.empty()) + io.mapRequired(llvm::utostr(P.first).c_str(), GVSums); + } + } + static void fixAliaseeLinks(GlobalValueSummaryMapTy &V) { + for (auto &P : V) { + for (auto &Sum : P.second.SummaryList) { + if (auto *Alias = dyn_cast(Sum.get())) { + ValueInfo AliaseeVI = Alias->getAliaseeVI(); + auto AliaseeSL = AliaseeVI.getSummaryList(); + if (AliaseeSL.empty()) { + ValueInfo EmptyVI; + Alias->setAliasee(EmptyVI, nullptr); + } else + Alias->setAliasee(AliaseeVI, AliaseeSL[0].get()); + } } - if (!FSums.empty()) - io.mapRequired(llvm::utostr(P.first).c_str(), FSums); } } }; @@ -281,6 +324,9 @@ template <> struct CustomMappingTraits { template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); + if (!io.outputting()) + CustomMappingTraits::fixAliaseeLinks( + index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); io.mapOptional("WithGlobalValueDeadStripping", index.WithGlobalValueDeadStripping); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index d4b0b54375b02..49dcec26dbc55 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -31,6 +31,7 @@ enum class ResultReason { NotInstructions, DiffOpcodes, DiffTypes, + DiffMathFlags, }; #ifndef NDEBUG @@ -53,6 +54,8 @@ struct ToStr { return "DiffOpcodes"; case ResultReason::DiffTypes: return "DiffTypes"; + case ResultReason::DiffMathFlags: + return "DiffMathFlags"; } llvm_unreachable("Unknown ResultReason enum"); } diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index 64f57edb38484..9577e8ef7b37c 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -12,7 +12,11 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H -class Utils { +#include "llvm/SandboxIR/Type.h" + +namespace llvm::sandboxir { + +class VecUtils { public: /// \Returns the number of elements in \p Ty. That is the number of lanes if a /// fixed vector or 1 if scalar. ScalableVectors have unknown size and @@ -25,6 +29,8 @@ class Utils { static Type *getElementType(Type *Ty) { return Ty->isVectorTy() ? cast(Ty)->getElementType() : Ty; } -} +}; + +} // namespace llvm::sandboxir -#endif LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index cbf38f2c57a35..6c874fcabcc30 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -19,6 +19,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dfaa36f7f512d..9af6429c5caee 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -212,6 +212,7 @@ #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 22d632ed1e5f8..9d7051239ef99 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19,6 +19,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -10083,9 +10084,9 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, // Thus, it's only used for ptrauth references to extern_weak to avoid null // checks. -SDValue AArch64TargetLowering::LowerPtrAuthGlobalAddressStatically( +static SDValue LowerPtrAuthGlobalAddressStatically( SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC, - SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) const { + SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) { const auto *TGN = cast(TGA.getNode()); assert(TGN->getGlobal()->hasExternalWeakLinkage()); @@ -27575,6 +27576,22 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { return TargetLowering::getSafeStackPointerLocation(IRB); } +/// If a physical register, this returns the register that receives the +/// exception address on entry to an EH pad. +Register AArch64TargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X0; +} + +/// If a physical register, this returns the register that receives the +/// exception typeid on entry to a landing pad. +Register AArch64TargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X1; +} + bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { // Only sink 'and' mask to cmp use block if it is masking a single bit, since diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 160cd18ca53b3..d696355bb062a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -14,8 +14,6 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H -#include "AArch64.h" -#include "Utils/AArch64SMEAttributes.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -830,18 +828,12 @@ class AArch64TargetLowering : public TargetLowering { /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Register - getExceptionPointerRegister(const Constant *PersonalityFn) const override { - // FIXME: This is a guess. Has this been defined yet? - return AArch64::X0; - } + getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. Register - getExceptionSelectorRegister(const Constant *PersonalityFn) const override { - // FIXME: This is a guess. Has this been defined yet? - return AArch64::X1; - } + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; @@ -1132,11 +1124,6 @@ class AArch64TargetLowering : public TargetLowering { SelectionDAG &DAG) const; SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, - AArch64PACKey::ID Key, - SDValue Discriminator, - SDValue AddrDiscriminator, - SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 19ef6f4fb32e7..525538db8036c 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "Utils/AArch64SMEAttributes.h" + using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ff3c69f7e10c6..71f9bbbbc3504 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -10,6 +10,7 @@ #include "AArch64ExpandImm.h" #include "AArch64PerfectShuffle.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 6cbfb018b3183..065858c428944 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -18,6 +18,7 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ObjCARCUtil.h" diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index dd65dbe594a63..6024027afaf6c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1536,6 +1536,14 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { + auto LowerBinOp = [&MI](unsigned Opcode) { + MachineIRBuilder MIB(MI); + MIB.buildInstr(Opcode, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); + MI.eraseFromParent(); + return true; + }; + Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::vacopy: { @@ -1675,37 +1683,25 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return true; } case Intrinsic::aarch64_neon_smax: + return LowerBinOp(TargetOpcode::G_SMAX); case Intrinsic::aarch64_neon_smin: + return LowerBinOp(TargetOpcode::G_SMIN); case Intrinsic::aarch64_neon_umax: + return LowerBinOp(TargetOpcode::G_UMAX); case Intrinsic::aarch64_neon_umin: + return LowerBinOp(TargetOpcode::G_UMIN); case Intrinsic::aarch64_neon_fmax: + return LowerBinOp(TargetOpcode::G_FMAXIMUM); case Intrinsic::aarch64_neon_fmin: + return LowerBinOp(TargetOpcode::G_FMINIMUM); case Intrinsic::aarch64_neon_fmaxnm: - case Intrinsic::aarch64_neon_fminnm: { - MachineIRBuilder MIB(MI); - if (IntrinsicID == Intrinsic::aarch64_neon_smax) - MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_smin) - MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_umax) - MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_umin) - MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmax) - MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmin) - MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm) - MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm) - MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - MI.eraseFromParent(); - return true; - } + return LowerBinOp(TargetOpcode::G_FMAXNUM); + case Intrinsic::aarch64_neon_fminnm: + return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_smull: + return LowerBinOp(AArch64::G_UMULL); + case Intrinsic::aarch64_neon_umull: + return LowerBinOp(AArch64::G_SMULL); case Intrinsic::vector_reverse: // TODO: Add support for vector_reverse return false; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 147b32b1ca990..68ae5de06423c 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -47,6 +47,7 @@ def ResRetInt32Ty : DXILOpParamType; def HandleTy : DXILOpParamType; def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; +def SplitDoubleTy : DXILOpParamType; class DXILOpClass; @@ -779,6 +780,15 @@ def FlattenedThreadIdInGroup : DXILOp<96, flattenedThreadIdInGroup> { let attributes = [Attributes]; } +def SplitDouble : DXILOp<102, splitDouble> { + let Doc = "Splits a double into 2 uints"; + let arguments = [OverloadTy]; + let result = SplitDoubleTy; + let overloads = [Overloads]; + let stages = [Stages]; + let attributes = [Attributes]; +} + def AnnotateHandle : DXILOp<217, annotateHandle> { let Doc = "annotate handle with resource properties"; let arguments = [HandleTy, ResPropsTy]; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 7719d6b107911..5d5bb3eacace2 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -229,6 +229,13 @@ static StructType *getResPropsType(LLVMContext &Context) { return StructType::create({Int32Ty, Int32Ty}, "dx.types.ResourceProperties"); } +static StructType *getSplitDoubleType(LLVMContext &Context) { + if (auto *ST = StructType::getTypeByName(Context, "dx.types.splitdouble")) + return ST; + Type *Int32Ty = Type::getInt32Ty(Context); + return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble"); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -266,6 +273,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getResBindType(Ctx); case OpParamType::ResPropsTy: return getResPropsType(Ctx); + case OpParamType::SplitDoubleTy: + return getSplitDoubleType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; @@ -467,6 +476,10 @@ StructType *DXILOpBuilder::getResRetType(Type *ElementTy) { return ::getResRetType(ElementTy); } +StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) { + return ::getSplitDoubleType(Context); +} + StructType *DXILOpBuilder::getHandleType() { return ::getHandleType(IRB.getContext()); } diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h index 037ae3822cfb9..df5a0240870f4 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -49,6 +49,10 @@ class DXILOpBuilder { /// Get a `%dx.types.ResRet` type with the given element type. StructType *getResRetType(Type *ElementTy); + + /// Get the `%dx.types.splitdouble` type. + StructType *getSplitDoubleType(LLVMContext &Context); + /// Get the `%dx.types.Handle` type. StructType *getHandleType(); diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index c62ba8c21d679..f7722d7707476 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" @@ -128,6 +129,30 @@ class OpLowerer { }); } + [[nodiscard]] bool replaceFunctionWithNamedStructOp( + Function &F, dxil::OpCode DXILOp, Type *NewRetTy, + llvm::function_ref ReplaceUses) { + bool IsVectorArgExpansion = isVectorArgExpansion(F); + return replaceFunction(F, [&](CallInst *CI) -> Error { + SmallVector Args; + OpBuilder.getIRB().SetInsertPoint(CI); + if (IsVectorArgExpansion) { + SmallVector NewArgs = argVectorFlatten(CI, OpBuilder.getIRB()); + Args.append(NewArgs.begin(), NewArgs.end()); + } else + Args.append(CI->arg_begin(), CI->arg_end()); + + Expected OpCall = + OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), NewRetTy); + if (Error E = OpCall.takeError()) + return E; + if (Error E = ReplaceUses(CI, *OpCall)) + return E; + + return Error::success(); + }); + } + /// Create a cast between a `target("dx")` type and `dx.types.Handle`, which /// is intended to be removed by the end of lowering. This is used to allow /// lowering of ops which need to change their return or argument types in a @@ -263,6 +288,26 @@ class OpLowerer { return lowerToBindAndAnnotateHandle(F); } + Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) { + for (Use &U : make_early_inc_range(Intrin->uses())) { + if (auto *EVI = dyn_cast(U.getUser())) { + + if (EVI->getNumIndices() != 1) + return createStringError(std::errc::invalid_argument, + "Splitdouble has only 2 elements"); + EVI->setOperand(0, Op); + } else { + return make_error( + "Splitdouble use is not ExtractValueInst", + inconvertibleErrorCode()); + } + } + + Intrin->eraseFromParent(); + + return Error::success(); + } + /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op. /// Since we expect to be post-scalarization, make an effort to avoid vectors. Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) { @@ -488,6 +533,16 @@ class OpLowerer { case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); break; + // TODO: this can be removed when + // https://github.com/llvm/llvm-project/issues/113192 is fixed + case Intrinsic::dx_splitdouble: + HasErrors |= replaceFunctionWithNamedStructOp( + F, OpCode::SplitDouble, + OpBuilder.getSplitDoubleType(M.getContext()), + [&](CallInst *CI, CallInst *Op) { + return replaceSplitDoubleCallUsages(CI, Op); + }); + break; } Updated = true; } diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 3fcfc6a876776..6ba371069bb23 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -2083,8 +2083,12 @@ bool LowerTypeTestsModule::lower() { for (auto &I : *ExportSummary) for (auto &GVS : I.second.SummaryList) if (GVS->isLive()) - for (const auto &Ref : GVS->refs()) + for (const auto &Ref : GVS->refs()) { AddressTaken.insert(Ref.getGUID()); + for (auto &RefGVS : Ref.getSummaryList()) + if (auto Alias = dyn_cast(RefGVS.get())) + AddressTaken.insert(Alias->getAliaseeGUID()); + } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); if (CfiFunctionsMD) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 88086f24dfdce..778d928252e05 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { } } -// Check if \p RedResult is a ComputeReductionResult instruction, and if it is -// create a merge phi node for it. -static void createAndCollectMergePhiForReduction( - VPInstruction *RedResult, - VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock, - bool VectorizingEpilogue) { - if (!RedResult || - RedResult->getOpcode() != VPInstruction::ComputeReductionResult) +// If \p R is a ComputeReductionResult when vectorizing the epilog loop, +// fix the reduction's scalar PHI node by adding the incoming value from the +// main vector loop. +static void fixReductionScalarResumeWhenVectorizingEpilog( + VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) { + auto *EpiRedResult = dyn_cast(R); + if (!EpiRedResult || + EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult) return; - auto *PhiR = cast(RedResult->getOperand(0)); - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - - Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); - auto *ResumePhi = - dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); - if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Cmp = cast(PhiR->getStartValue()->getUnderlyingValue()); - assert(Cmp->getPredicate() == CmpInst::ICMP_NE); - assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue()); - ResumePhi = cast(Cmp->getOperand(0)); - } - assert((!VectorizingEpilogue || ResumePhi) && - "when vectorizing the epilogue loop, we need a resume phi from main " - "vector loop"); - - // TODO: bc.merge.rdx should not be created here, instead it should be - // modeled in VPlan. - BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader(); - // Create a phi node that merges control-flow from the backedge-taken check - // block and the middle block. - auto *BCBlockPhi = - PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx", - LoopScalarPreHeader->getTerminator()->getIterator()); - - // If we are fixing reductions in the epilogue loop then we should already - // have created a bc.merge.rdx Phi after the main vector body. Ensure that - // we carry over the incoming values correctly. + auto *EpiRedHeaderPhi = + cast(EpiRedResult->getOperand(0)); + const RecurrenceDescriptor &RdxDesc = + EpiRedHeaderPhi->getRecurrenceDescriptor(); + Value *MainResumeValue = + EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Cmp = cast(MainResumeValue); + assert(Cmp->getPredicate() == CmpInst::ICMP_NE && + "AnyOf expected to start with ICMP_NE"); + assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() && + "AnyOf expected to start by comparing main resume value to original " + "start value"); + MainResumeValue = Cmp->getOperand(0); + } + PHINode *MainResumePhi = cast(MainResumeValue); + + // When fixing reductions in the epilogue loop we should already have + // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry + // over the incoming values correctly. + using namespace VPlanPatternMatch; + auto IsResumePhi = [](VPUser *U) { + return match( + U, m_VPInstruction(m_VPValue(), m_VPValue())); + }; + assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 && + "ResumePhi must have a single user"); + auto *EpiResumePhiVPI = + cast(*find_if(EpiRedResult->users(), IsResumePhi)); + auto *EpiResumePhi = cast(State.get(EpiResumePhiVPI, true)); + BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent(); + bool Updated = false; for (auto *Incoming : predecessors(LoopScalarPreHeader)) { - if (Incoming == LoopMiddleBlock) - BCBlockPhi->addIncoming(FinalValue, Incoming); - else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming)) - BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), - Incoming); - else - BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming); + if (is_contained(MainResumePhi->blocks(), Incoming)) { + assert(EpiResumePhi->getIncomingValueForBlock(Incoming) == + RdxDesc.getRecurrenceStartValue() && + "Trying to reset unexpected value"); + assert(!Updated && "Should update at most 1 incoming value"); + EpiResumePhi->setIncomingValueForBlock( + Incoming, MainResumePhi->getIncomingValueForBlock(Incoming)); + Updated = true; + } } - - auto *OrigPhi = cast(PhiR->getUnderlyingValue()); - // TODO: This fixup should instead be modeled in VPlan. - // Fix the scalar loop reduction variable with the incoming reduction sum - // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = - OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); - assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); - // Pick the other block. - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); - OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); - Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); - OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); + assert(Updated && "Must update EpiResumePhi."); + (void)Updated; } DenseMap LoopVectorizationPlanner::executePlan( @@ -7713,11 +7708,11 @@ DenseMap LoopVectorizationPlanner::executePlan( // 2.5 Collect reduction resume values. auto *ExitVPBB = cast(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); - for (VPRecipeBase &R : *ExitVPBB) { - createAndCollectMergePhiForReduction( - dyn_cast(&R), State, OrigLoop, - State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue); - } + if (VectorizingEpilogue) + for (VPRecipeBase &R : *ExitVPBB) { + fixReductionScalarResumeWhenVectorizingEpilog( + &R, State, State.CFG.VPBB2IRBB[ExitVPBB]); + } // 2.6. Maintain Loop Hints // Keep all loop hints from the original loop on the vector loop (we'll @@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( }); FinalReductionResult->insertBefore(*MiddleVPBB, IP); + // Order is strict: if there are multiple successors, the first is the exit + // block, second is the scalar preheader. + VPBasicBlock *ScalarPHVPBB = + cast(MiddleVPBB->getSuccessors().back()); + VPBuilder ScalarPHBuilder(ScalarPHVPBB); + auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()}, + {}, "bc.merge.rdx"); + auto *RedPhi = cast(PhiR->getUnderlyingInstr()); + Plan->addLiveOut(RedPhi, ResumePhiRecipe); + // Adjust AnyOf reductions; replace the reduction phi for the selected value // with a boolean reduction phi node to check if the condition is true in // any iteration. The final value is selected by the final diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index e4546c2f98113..346d8a90589f5 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -8,9 +8,11 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" #include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Operator.h" #include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" namespace llvm::sandboxir { @@ -26,7 +28,35 @@ void LegalityResult::dump() const { std::optional LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( ArrayRef Bndl) { - // TODO: Unimplemented. + auto *I0 = cast(Bndl[0]); + auto Opcode = I0->getOpcode(); + // If they have different opcodes, then we cannot form a vector (for now). + if (any_of(drop_begin(Bndl), [Opcode](Value *V) { + return cast(V)->getOpcode() != Opcode; + })) + return ResultReason::DiffOpcodes; + + // If not the same scalar type, Pack. This will accept scalars and vectors as + // long as the element type is the same. + Type *ElmTy0 = VecUtils::getElementType(Utils::getExpectedType(I0)); + if (any_of(drop_begin(Bndl), [ElmTy0](Value *V) { + return VecUtils::getElementType(Utils::getExpectedType(V)) != ElmTy0; + })) + return ResultReason::DiffTypes; + + // TODO: Allow vectorization of instrs with different flags as long as we + // change them to the least common one. + // For now pack if differnt FastMathFlags. + if (isa(I0)) { + FastMathFlags FMF0 = cast(Bndl[0])->getFastMathFlags(); + if (any_of(drop_begin(Bndl), [FMF0](auto *V) { + return cast(V)->getFastMathFlags() != FMF0; + })) + return ResultReason::DiffMathFlags; + } + + // TODO: Missing checks + return std::nullopt; } diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index d677526bab000..11397703b4442 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -4,16 +4,7 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v4i32_uzp1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for do_stuff define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: @@ -2025,13 +2016,30 @@ define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: smlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: smlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -2043,13 +2051,30 @@ define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: umlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: umlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -2061,13 +2086,30 @@ define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { -; CHECK-LABEL: smlsl2_v4i32_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.8h, v0.8h, v2.8h -; CHECK-NEXT: smlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-NEON-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-SVE-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <4 x i32>, ptr %5, align 4 %7 = trunc <4 x i32> %6 to <4 x i16> @@ -2079,13 +2121,30 @@ define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { -; CHECK-LABEL: umlsl2_v4i32_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.8h, v0.8h, v2.8h -; CHECK-NEXT: umlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-NEON-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-SVE-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <4 x i32>, ptr %5, align 4 %7 = trunc <4 x i32> %6 to <4 x i16> @@ -2124,14 +2183,35 @@ entry: } define void @smlsl_smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: smlsl_smlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: smlsl v1.8h, v0.8b, v2.8b -; CHECK-NEXT: smlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v4.8b, v4.8h +; CHECK-GI-NEXT: umull v2.8h, v3.8b, v2.8b +; CHECK-GI-NEXT: umlal v2.8h, v0.8b, v4.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> @@ -2149,14 +2229,35 @@ entry: } define void @umlsl_umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: umlsl_umlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: umlsl v1.8h, v0.8b, v2.8b -; CHECK-NEXT: umlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v4.8b, v4.8h +; CHECK-GI-NEXT: smull v2.8h, v3.8b, v2.8b +; CHECK-GI-NEXT: smlal v2.8h, v0.8b, v4.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> @@ -2174,14 +2275,35 @@ entry: } define void @smlsl_smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: smlsl_smlsl2_v4i32_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h -; CHECK-NEXT: smlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEON-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-NEON-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SVE-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-SVE-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: xtn v4.4h, v4.4s +; CHECK-GI-NEXT: umull v2.4s, v3.4h, v2.4h +; CHECK-GI-NEXT: umlal v2.4s, v0.4h, v4.4h +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <4 x i32>, ptr %3, align 4 %6 = trunc <4 x i32> %5 to <4 x i16> @@ -2199,14 +2321,35 @@ entry: } define void @umlsl_umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: umlsl_umlsl2_v4i32_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: umlsl v1.4s, v0.4h, v2.4h -; CHECK-NEXT: umlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEON-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-NEON-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SVE-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-SVE-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: xtn v4.4h, v4.4s +; CHECK-GI-NEXT: smull v2.4s, v3.4h, v2.4h +; CHECK-GI-NEXT: smlal v2.4s, v0.4h, v4.4h +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <4 x i32>, ptr %3, align 4 %6 = trunc <4 x i32> %5 to <4 x i16> @@ -2224,13 +2367,31 @@ entry: } define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) { -; CHECK-LABEL: do_stuff: -; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: smull2 v0.2d, v1.4s, v0.4s -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: do_stuff: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEON-NEXT: smull2 v0.2d, v1.4s, v0.4s +; CHECK-NEON-NEXT: xtn v0.2s, v0.2d +; CHECK-NEON-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: do_stuff: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-SVE-NEXT: smull2 v0.2d, v1.4s, v0.4s +; CHECK-SVE-NEXT: xtn v0.2s, v0.2d +; CHECK-SVE-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: do_stuff: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8 +; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %bc.1 = bitcast <2 x i64> %1 to <4 x i32> %trunc.0 = trunc <2 x i64> %0 to <2 x i32> %shuff.hi = shufflevector <4 x i32> %bc.1, <4 x i32> zeroinitializer, <2 x i32> diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll deleted file mode 100644 index 759590fa56279..0000000000000 --- a/llvm/test/CodeGen/DirectX/split-double.ll +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s - -define void @test_vector_double_split_void(<2 x double> noundef %d) { -; CHECK-LABEL: define void @test_vector_double_split_void( -; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { -; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 -; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) -; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 -; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) -; CHECK-NEXT: ret void -; - %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) - ret void -} - -define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { -; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split( -; CHECK-SAME: <3 x double> noundef [[D:%.*]]) { -; CHECK-NEXT: [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0 -; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) -; CHECK-NEXT: [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1 -; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) -; CHECK-NEXT: [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2 -; CHECK-NEXT: [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]]) -; CHECK-NEXT: [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 -; CHECK-NEXT: [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0 -; CHECK-NEXT: [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0 -; CHECK-NEXT: [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 -; CHECK-NEXT: [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1 -; CHECK-NEXT: [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1 -; CHECK-NEXT: [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]] -; CHECK-NEXT: [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]] -; CHECK-NEXT: [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]] -; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0 -; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2 -; CHECK-NEXT: ret <3 x i32> [[TMP1]] -; - %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) - %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 - %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 - %3 = add <3 x i32> %1, %2 - ret <3 x i32> %3 -} diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll new file mode 100644 index 0000000000000..1443ba6269255 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/splitdouble.ll @@ -0,0 +1,76 @@ +; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,NOLOWER +; RUN: opt -passes='function(scalarizer),module(dxil-op-lower)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,WITHLOWER + +define i32 @test_scalar(double noundef %D) { +; CHECK-LABEL: define i32 @test_scalar( +; CHECK-SAME: double noundef [[D:%.*]]) { +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]]) +; NOLOWER-NEXT: [[EV1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 +; NOLOWER-NEXT: [[EV2:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 +; WITHLOWER-NEXT: [[EV1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0 +; WITHLOWER-NEXT: [[EV2:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[EV1]], [[EV2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %hlsl.splitdouble = call { i32, i32 } @llvm.dx.splitdouble.i32(double %D) + %1 = extractvalue { i32, i32 } %hlsl.splitdouble, 0 + %2 = extractvalue { i32, i32 } %hlsl.splitdouble, 1 + %add = add i32 %1, %2 + ret i32 %add +} + + +define void @test_vector_double_split_void(<2 x double> noundef %d) { +; CHECK-LABEL: define void @test_vector_double_split_void( +; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 +; NOLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) +; CHECK-NEXT: ret void +; + %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) + ret void +} + +define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { +; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split( +; CHECK-SAME: <3 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0 +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1 +; NOLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) +; CHECK-NEXT: [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2 +; NOLOWER-NEXT: [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]]) +; NOLOWER-NEXT: [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 +; WITHLOWER-NEXT: [[DOTELEM0:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0 +; NOLOWER-NEXT: [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0 +; WITHLOWER-NEXT: [[DOTELEM01:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 0 +; NOLOWER-NEXT: [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0 +; WITHLOWER-NEXT: [[DOTELEM02:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 0 +; NOLOWER-NEXT: [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 +; WITHLOWER-NEXT: [[DOTELEM1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1 +; NOLOWER-NEXT: [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1 +; WITHLOWER-NEXT: [[DOTELEM13:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 1 +; NOLOWER-NEXT: [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1 +; WITHLOWER-NEXT: [[DOTELEM14:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 1 +; CHECK-NEXT: [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]] +; CHECK-NEXT: [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]] +; CHECK-NEXT: [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]] +; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0 +; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[TMP1]] +; + %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) + %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 + %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 + %3 = add <3 x i32> %1, %2 + ret <3 x i32> %3 +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll new file mode 100644 index 0000000000000..d18b16b843c37 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll @@ -0,0 +1,40 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure lowering is correctly generating spirv code. + +; CHECK-DAG: %[[#double:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#vec_2_double:]] = OpTypeVector %[[#double]] 2 +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#vec_2_int_32:]] = OpTypeVector %[[#int_32]] 2 +; CHECK-DAG: %[[#vec_4_int_32:]] = OpTypeVector %[[#int_32]] 4 + + +define spir_func noundef i32 @test_scalar(double noundef %D) local_unnamed_addr { +entry: + ; CHECK-LABEL: ; -- Begin function test_scalar + ; CHECK: %[[#param:]] = OpFunctionParameter %[[#double]] + ; CHECK: %[[#bitcast:]] = OpBitcast %[[#vec_2_int_32]] %[[#param]] + %0 = bitcast double %D to <2 x i32> + ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 0 + %1 = extractelement <2 x i32> %0, i64 0 + ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 1 + %2 = extractelement <2 x i32> %0, i64 1 + %add = add i32 %1, %2 + ret i32 %add +} + + +define spir_func noundef <2 x i32> @test_vector(<2 x double> noundef %D) local_unnamed_addr { +entry: + ; CHECK-LABEL: ; -- Begin function test_vector + ; CHECK: %[[#param:]] = OpFunctionParameter %[[#vec_2_double]] + ; CHECK: %[[#CAST1:]] = OpBitcast %[[#vec_4_int_32]] %[[#param]] + ; CHECK: %[[#SHUFF2:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 0 2 + ; CHECK: %[[#SHUFF3:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 1 3 + %0 = bitcast <2 x double> %D to <4 x i32> + %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> + %add = add <2 x i32> %1, %2 + ret <2 x i32> %add +} diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll index 0746a07d2cdf2..5ae5caf3e88b2 100644 --- a/llvm/test/CodeGen/X86/scmp.ll +++ b/llvm/test/CodeGen/X86/scmp.ll @@ -848,7 +848,7 @@ define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k2 ; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll index cd643cb8d6375..6a52acfe2fb30 100644 --- a/llvm/test/CodeGen/X86/ucmp.ll +++ b/llvm/test/CodeGen/X86/ucmp.ll @@ -819,7 +819,7 @@ define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512-NEXT: vpcmpltub %xmm1, %xmm0, %k1 ; AVX512-NEXT: vpcmpnleub %xmm1, %xmm0, %k2 ; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512-NEXT: retq ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 1326751a847d7..59db6c197ef8c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; IF-EVL-INLOOP-NEXT: No successors +; IF-EVL-INLOOP-EMPTY: +; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; IF-EVL-INLOOP-NEXT: } ; @@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-OUTLOOP-NEXT: No successors +; NO-VP-OUTLOOP-EMPTY: +; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; NO-VP-OUTLOOP-NEXT: } ; @@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: +; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-INLOOP-NEXT: No successors +; NO-VP-INLOOP-EMPTY: +; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; NO-VP-INLOOP-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8e56614a2e3d5..b05980bef1b38 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> +; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 0dde507d08be7..2247295295663 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> ; CHECK-NEXT: } ; entry: @@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> ; CHECK-NEXT: } ; entry: @@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]> ; CHECK-NEXT:} entry: diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll b/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll new file mode 100644 index 0000000000000..0c5324ee96c93 --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll @@ -0,0 +1,54 @@ +;; Check that if the address of a weak function is only taken through an alias, +;; it is still added to a list of exported functions and @llvm.type.test() is +;; lowered to an actual check against the generated CFI jumptable. + +RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir +RUN: opt test.ll --thinlto-bc --thinlto-split-lto-unit -o test.bc +RUN: llvm-modextract test.bc -n 0 -o test0.bc +RUN: llvm-modextract test.bc -n 1 -o test1.bc + +;; Check that a CFI jumptable is generated. +RUN: opt test1.bc -passes=lowertypetests -lowertypetests-read-summary=in.yaml \ +RUN: -lowertypetests-summary-action=export -lowertypetests-write-summary=exported.yaml \ +RUN: -S -o - | FileCheck %s --check-prefix=REGULAR +REGULAR: @__typeid__ZTSFvvE_global_addr = hidden alias i8, ptr @.cfi.jumptable +REGULAR: @f = alias void (), ptr @.cfi.jumptable +REGULAR: define private void @.cfi.jumptable() + +;; CHECK that @llvm.type.test() is lowered to an actual check. +RUN: opt test0.bc -passes=lowertypetests -lowertypetests-read-summary=exported.yaml \ +RUN: -lowertypetests-summary-action=import -S -o - | FileCheck %s --check-prefix=THIN +THIN: define i1 @test() { +THIN-NEXT: %1 = icmp eq i64 ptrtoint (ptr @alias to i64), ptrtoint (ptr @__typeid__ZTSFvvE_global_addr to i64) +THIN-NEXT: ret i1 %1 +THIN-NEXT: } + +;--- test.ll +target triple = "x86_64-pc-linux-gnu" + +@alias = alias void(), ptr @f + +define weak void @f() !type !0 { + ret void +} + +define i1 @test() { + %1 = call i1 @llvm.type.test(ptr nonnull @alias, metadata !"_ZTSFvvE") + ret i1 %1 +} + +declare i1 @llvm.type.test(ptr, metadata) + +!0 = !{i64 0, !"_ZTSFvvE"} +;--- in.yaml +--- +GlobalValueMap: + 8346051122425466633: # guid("test") + - Live: true + Refs: [5833419078793185394] # guid("alias") + TypeTests: [9080559750644022485] # guid("_ZTSFvvE") + 5833419078793185394: # guid("alias") + - Aliasee: 14740650423002898831 # guid("f") + 14740650423002898831: # guid("f") + - +... diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 24512cb0225e8..df689767b7724 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -13,4 +13,5 @@ add_llvm_unittest(SandboxVectorizerTests LegalityTest.cpp SchedulerTest.cpp SeedCollectorTest.cpp + VecUtilsTest.cpp ) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 56c6bf5f1ef1f..aaa8e96de6d17 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -29,13 +29,19 @@ struct LegalityTest : public testing::Test { TEST_F(LegalityTest, Legality) { parseIR(C, R"IR( -define void @foo(ptr %ptr) { +define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1) { %gep0 = getelementptr float, ptr %ptr, i32 0 %gep1 = getelementptr float, ptr %ptr, i32 1 + %gep3 = getelementptr float, ptr %ptr, i32 3 %ld0 = load float, ptr %gep0 %ld1 = load float, ptr %gep0 store float %ld0, ptr %gep0 store float %ld1, ptr %gep1 + store <2 x float> %vec2, ptr %gep1 + store <3 x float> %vec3, ptr %gep3 + store i8 %arg, ptr %gep1 + %fadd0 = fadd float %farg0, %farg0 + %fadd1 = fadd fast float %farg1, %farg1 ret void } )IR"); @@ -46,10 +52,16 @@ define void @foo(ptr %ptr) { auto It = BB->begin(); [[maybe_unused]] auto *Gep0 = cast(&*It++); [[maybe_unused]] auto *Gep1 = cast(&*It++); + [[maybe_unused]] auto *Gep3 = cast(&*It++); [[maybe_unused]] auto *Ld0 = cast(&*It++); [[maybe_unused]] auto *Ld1 = cast(&*It++); auto *St0 = cast(&*It++); auto *St1 = cast(&*It++); + auto *StVec2 = cast(&*It++); + auto *StVec3 = cast(&*It++); + auto *StI8 = cast(&*It++); + auto *FAdd0 = cast(&*It++); + auto *FAdd1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; const auto &Result = Legality.canVectorize({St0, St1}); @@ -62,6 +74,30 @@ define void @foo(ptr %ptr) { EXPECT_EQ(cast(Result).getReason(), sandboxir::ResultReason::NotInstructions); } + { + // Check DiffOpcodes + const auto &Result = Legality.canVectorize({St0, Ld0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffOpcodes); + } + { + // Check DiffTypes + EXPECT_TRUE(isa(Legality.canVectorize({St0, StVec2}))); + EXPECT_TRUE(isa(Legality.canVectorize({StVec2, StVec3}))); + + const auto &Result = Legality.canVectorize({St0, StI8}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffTypes); + } + { + // Check DiffMathFlags + const auto &Result = Legality.canVectorize({FAdd0, FAdd1}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffMathFlags); + } } #ifndef NDEBUG @@ -85,5 +121,8 @@ TEST_F(LegalityTest, LegalityResultDump) { EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffTypes), "Pack Reason: DiffTypes")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::DiffMathFlags), + "Pack Reason: DiffMathFlags")); } #endif // NDEBUG diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp new file mode 100644 index 0000000000000..e0b0828496439 --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -0,0 +1,37 @@ +//===- VecUtilsTest.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/Type.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct VecUtilsTest : public testing::Test { + LLVMContext C; +}; + +TEST_F(VecUtilsTest, GetNumElements) { + sandboxir::Context Ctx(C); + auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(ElemTy), 1); + auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy), 2); + auto *VTy1 = sandboxir::FixedVectorType::get(ElemTy, 1); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy1), 1); +} + +TEST_F(VecUtilsTest, GetElementType) { + sandboxir::Context Ctx(C); + auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx); + EXPECT_EQ(sandboxir::VecUtils::getElementType(ElemTy), ElemTy); + auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2); + EXPECT_EQ(sandboxir::VecUtils::getElementType(VTy), ElemTy); +} diff --git a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt index cbe85789b29a3..7fc4af5403185 100644 --- a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt @@ -15,6 +15,7 @@ add_mlir_conversion_library(MLIRLinalgToStandard MLIRIR MLIRLinalgDialect MLIRLinalgTransforms + MLIRLLVMDialect MLIRMemRefDialect MLIRPass MLIRSCFDialect diff --git a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt index 61c46e9bfe250..0a4eb97474f3a 100644 --- a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt +++ b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt @@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRMathToLibm MLIRArithDialect MLIRDialectUtils MLIRFuncDialect + MLIRLLVMDialect MLIRMathDialect MLIRPass MLIRTransformUtils