Skip to content

Commit

Permalink
Add option to compact event L3 flush packet
Browse files Browse the repository at this point in the history
Related-To: NEO-7434

Signed-off-by: Zbigniew Zdanowicz <[email protected]>
  • Loading branch information
zzdanowicz authored and Compute-Runtime-Automation committed Nov 3, 2022
1 parent 709e322 commit 6a6ab80
Show file tree
Hide file tree
Showing 13 changed files with 1,063 additions and 30 deletions.
8 changes: 8 additions & 0 deletions level_zero/core/source/cmdlist/cmdlist_hw.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,18 @@ struct CommandListCoreFamily : CommandListImp {
size_t dstSize,
CmdListFillKernelArguments &outArguments,
Kernel *kernel);
bool compactL3FlushEvent(bool dcFlush) const {
return this->compactL3FlushEventPacket && dcFlush;
}
bool eventSignalPipeControl(bool splitKernel, bool dcFlush) const {
return (this->pipeControlMultiKernelEventSync && splitKernel) ||
compactL3FlushEvent(dcFlush);
}

size_t cmdListCurrentStartOffset = 0;
bool containsAnyKernel = false;
bool pipeControlMultiKernelEventSync = false;
bool compactL3FlushEventPacket = false;
};

template <PRODUCT_FAMILY gfxProductFamily>
Expand Down
10 changes: 7 additions & 3 deletions level_zero/core/source/cmdlist/cmdlist_hw.inl
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->frontEndStateTracking = L0HwHelper::enableFrontEndStateTracking(hwInfo);
this->pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking(hwInfo);
this->pipeControlMultiKernelEventSync = L0HwHelper::usePipeControlMultiKernelEventSync(hwInfo);
this->compactL3FlushEventPacket = L0HwHelper::useCompactL3FlushEventPacket(hwInfo);

if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
Expand Down Expand Up @@ -1187,19 +1188,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}

CmdListKernelLaunchParams launchParams = {};

bool dcFlush = false;
Event *signalEvent = nullptr;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}

uint32_t kernelCounter = leftSize > 0 ? 1 : 0;
kernelCounter += middleSizeBytes > 0 ? 1 : 0;
kernelCounter += rightSize > 0 ? 1 : 0;

launchParams.isKernelSplitOperation = kernelCounter > 1;
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);

appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);

Expand Down Expand Up @@ -1551,9 +1553,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
CmdListKernelLaunchParams launchParams = {};

Event *signalEvent = nullptr;
bool dcFlush = false;
if (hSignalEvent) {
signalEvent = Event::fromHandle(hSignalEvent);
launchParams.isHostSignalScopeEvent = !!(signalEvent->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
dcFlush = getDcFlushRequired(!!signalEvent->signalScope);
}

if (isCopyOnly()) {
Expand Down Expand Up @@ -1610,7 +1614,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
setupFillKernelArguments(dstAllocation.offset, patternSize, size, fillArguments, builtinKernel);

launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
bool singlePipeControlPacket = this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation;
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);

appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);

Expand Down
32 changes: 22 additions & 10 deletions level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -163,20 +163,26 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
threadGroupDimensions->groupCountY,
threadGroupDimensions->groupCountZ);
}
NEO::GraphicsAllocation *eventAlloc = nullptr;

uint64_t eventAddress = 0;
bool isTimestampEvent = false;
bool l3FlushEnable = false;
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
Event *compactEvent = nullptr;
if (event) {
eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
isHostSignalScopeEvent = !!(event->signalScope & ZE_EVENT_SCOPE_FLAG_HOST);
if (compactL3FlushEvent(getDcFlushRequired(!!event->signalScope))) {
compactEvent = event;
event = nullptr;
} else {
NEO::GraphicsAllocation *eventAlloc = &event->getAllocation(this->device);
commandContainer.addToResidencyContainer(eventAlloc);
bool flushRequired = !!event->signalScope &&
!launchParams.isKernelSplitOperation;
l3FlushEnable = getDcFlushRequired(flushRequired);
isTimestampEvent = event->isUsingContextEndOffset();
eventAddress = event->getPacketAddress(this->device);
}
}

bool isKernelUsingSystemAllocation = false;
Expand Down Expand Up @@ -249,6 +255,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K

std::list<void *> additionalCommands;

if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, true, true);
}

NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
neoDevice, // device
Expand All @@ -273,7 +283,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;

if (event) {
if (compactEvent) {
appendEventForProfilingAllWalkers(compactEvent, false, true);
} else if (event) {
if (partitionCount > 1) {
event->setPacketsInUse(partitionCount);
}
Expand Down Expand Up @@ -404,7 +416,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
Event *event,
const CmdListKernelLaunchParams &launchParams) {
if (event) {
if (this->pipeControlMultiKernelEventSync && launchParams.isKernelSplitOperation) {
if (eventSignalPipeControl(launchParams.isKernelSplitOperation, getDcFlushRequired(!!event->signalScope))) {
event = nullptr;
} else {
event->increaseKernelCount();
Expand Down
7 changes: 7 additions & 0 deletions level_zero/core/source/hw_helpers/l0_hw_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,11 @@ bool L0HwHelper::usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwI
return false;
}

bool L0HwHelper::useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo) {
if (NEO::DebugManager.flags.CompactL3FlushEventPacket.get() != -1) {
return !!NEO::DebugManager.flags.CompactL3FlushEventPacket.get();
}
return false;
}

} // namespace L0
1 change: 1 addition & 0 deletions level_zero/core/source/hw_helpers/l0_hw_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class L0HwHelper {
static bool enableStateComputeModeTracking(const NEO::HardwareInfo &hwInfo);
static bool enableImmediateCmdListHeapSharing(const NEO::HardwareInfo &hwInfo, bool cmdlistSupport);
static bool usePipeControlMultiKernelEventSync(const NEO::HardwareInfo &hwInfo);
static bool useCompactL3FlushEventPacket(const NEO::HardwareInfo &hwInfo);
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;

Expand Down
2 changes: 2 additions & 0 deletions level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,9 @@ struct TestExpectedValues {
uint32_t expectedKernelCount = 0;
uint32_t expectedWalkerPostSyncOp = 0;
uint32_t expectedPostSyncPipeControls = 0;
uint32_t expectDcFlush = 0;
bool postSyncAddressZero = false;
bool workloadPartition = false;
};

} // namespace ult
Expand Down
3 changes: 3 additions & 0 deletions level_zero/core/test/unit_tests/mocks/mock_cmdlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::commandListPerThreadScratchSize;
using BaseClass::commandListPreemptionMode;
using BaseClass::commandsToPatch;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::containsAnyKernel;
using BaseClass::containsCooperativeKernelsFlag;
using BaseClass::csr;
Expand Down Expand Up @@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::clearCommandsToPatch;
using BaseClass::cmdQImmediate;
using BaseClass::commandsToPatch;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::csr;
using BaseClass::finalStreamState;
using BaseClass::frontEndStateTracking;
Expand All @@ -142,6 +144,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandListImmediate : public CommandListCoreFamilyImmediate<gfxCoreFamily> {
using BaseClass = CommandListCoreFamilyImmediate<gfxCoreFamily>;
using BaseClass::compactL3FlushEventPacket;
using BaseClass::containsAnyKernel;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::indirectAllocationsAllowed;
Expand Down
Loading

0 comments on commit 6a6ab80

Please sign in to comment.