Loading...
Searching...
No Matches
pullBasedExecutorEngine.h
Go to the documentation of this file.
1//
2// Copyright 2025 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusd.org/license.
6//
7#ifndef PXR_EXEC_VDF_PULL_BASED_EXECUTOR_ENGINE_H
8#define PXR_EXEC_VDF_PULL_BASED_EXECUTOR_ENGINE_H
9
11
12#include "pxr/pxr.h"
13
21#include "pxr/exec/vdf/mask.h"
23#include "pxr/exec/vdf/node.h"
24#include "pxr/exec/vdf/output.h"
29#include "pxr/exec/vdf/vector.h"
30
31#include "pxr/base/tf/bits.h"
34
35PXR_NAMESPACE_OPEN_SCOPE
36
37// The TRACE_SCOPE (and FUNCTION) invocations in this file can be pretty
38// expensive. We turn off most of them. However, they are still useful to
39// track down performance issues, which is why we have a quick way of enabling
40// them, by setting _VDF_PBEE_PROFILING_ON to 1.
41#define _VDF_PBEE_PROFILING_ON 0
42#if _VDF_PBEE_PROFILING_ON
43#define VDF_PBEE_TRACE_FUNCTION TRACE_FUNCTION
44#define VDF_PBEE_TRACE_SCOPE TRACE_SCOPE
45#else
46#define VDF_PBEE_TRACE_FUNCTION()
47#define VDF_PBEE_TRACE_SCOPE(name)
48#endif
49
52
53// Forward declare the speculation executor engine with equivalent traits to
54// this executor engine.
55template <typename> class VdfSpeculationExecutorEngine;
56
65
66template <typename DataManagerType>
68{
69public:
70
75 typedef
78
82 const VdfExecutorInterface &executor,
83 DataManagerType *dataManager) :
84 _executor(executor),
85 _dataManager(dataManager)
86 {}
87
92 const VdfSchedule &schedule,
93 const VdfRequest &computeRequest,
94 VdfExecutorErrorLogger *errorLogger) {
96 schedule, computeRequest, errorLogger,
97 [](const VdfMaskedOutput &, size_t){});
98 }
99
105 template < typename F >
106 void RunSchedule(
107 const VdfSchedule &schedule,
108 const VdfRequest &computeRequest,
109 VdfExecutorErrorLogger *errorLogger,
110 F &&callback);
111
112protected:
113
116 typedef typename DataManagerType::DataHandle _DataHandle;
117
121
122 ExecutionStageStart, // Nodes start in this stage.
123
124 ExecutionStagePreRequisitesDone, // After prerequisites have been
125 // computed but before rest of
126 // inputs have been computed.
127
128 ExecutionStageReadsDone, // After the reads have finished --
129 // only needed for speculation engine.
130
131 ExecutionStageCompute, // Final stage before node computation.
132
133 };
134
135
140 void _ComputeNode(
141 const VdfEvaluationState &state,
142 const VdfNode &node,
143 bool absorbLockedCache = false);
144
157 bool _PassThroughNode(
158 const VdfSchedule &schedule,
159 const VdfNode &node,
160 bool absorbLockedCache = false);
161
162
167 const VdfOutput &toOutput,
168 VdfExecutorBufferData *toBuffer,
169 const VdfOutput &fromOutput,
170 const VdfMask &fromMask) const;
171
174 const VdfExecutorInterface &_GetExecutor() { return _executor; }
175
178 DataManagerType *_GetDataManager() { return _dataManager; }
179
186 const _DataHandle dataHandle,
187 const VdfOutput &output,
188 const VdfOutput &source,
189 const VdfMask &inputMask,
190 const VdfSchedule &schedule);
191
192
198 const _DataHandle dataHandle,
199 const VdfOutput &output,
200 const VdfOutput &source,
201 const VdfMask &inputMask,
202 const VdfSchedule &schedule) const;
203
210 const _DataHandle dataHandle,
211 const VdfInput &input,
212 const VdfMask &mask,
213 const VdfSchedule &schedule);
214
219 inline static bool _IsNotPassing(
220 const VdfOutput& output,
221 const VdfSchedule::OutputId& outputId,
222 const VdfSchedule& schedule)
223 {
224 return output.GetAssociatedInput() &&
225 !schedule.GetPassToOutput(outputId);
226 }
227
228private:
229
230 // This struct contains the necessary state to compute an output.
231 //
232 struct _OutputToExecute {
233
234 // Constructor that takes schedule node and output.
235 _OutputToExecute(
236 const VdfSchedule::OutputId &outputId,
237 const VdfMask &lockedCacheMask,
238 bool affective) :
239 outputId(outputId),
240 stage(ExecutionStageStart),
241 lockedCacheMask(lockedCacheMask),
242 affective(affective),
243 absorbLockedCache(false)
244 { }
245
246 // The schedule identifier for the output to execute.
247 VdfSchedule::OutputId outputId;
248
249 // The current phase of this output in the execution stack.
250 _ExecutionStage stage;
251
252 // Current state of the locked cache
253 VdfMask lockedCacheMask;
254
255 // Determines the affective-ness of the output
256 bool affective;
257
258 // Absorb the locked cache for SMBL
259 bool absorbLockedCache;
260
261 };
262
263 // Helper method to _ExecuteOutput.
264 //
265 // This method adds \p output to the \p outputs vector.
266 // Returns \c true if it added a new output and \c false otherwise.
267 //
268 bool _PushBackOutput(
269 std::vector< _OutputToExecute > *outputs,
270 const VdfMask &lockedCacheMask,
271 const VdfOutput &output,
272 const VdfSchedule &schedule);
273
274 // Executes the given \p output.
275 //
276 void _ExecuteOutput(
277 const VdfEvaluationState &state,
278 const VdfOutput &output,
279 TfBits *executedNodes);
280
281 // Finalize the output buffer after computing or passing through. This
282 // sets the computed output mask as well as merges in any data that has
283 // been temporarily held on to.
284 //
285 void _FinalizeComputedOutput(
286 const _DataHandle dataHandle,
287 const VdfMask &requestMask,
288 const bool hasBeenInterrupted,
289 const bool extendRequestMask);
290
291 // Update the output stack entry for SMBL. This refreshes the affectiveness
292 // flag, the lockedCacheMask and the flag that determines whether the
293 // locked cache needs to be absorbed into the executor cache. This method
294 // returns true if any of the relevant flags on the stackEntry object have
295 // been modified.
296 //
297 bool _UpdateOutputForSMBL(
298 const VdfOutput &output,
299 _OutputToExecute *stackEntry,
300 const VdfSchedule &schedule);
301
302 // The executor that uses this engine.
303 //
304 const VdfExecutorInterface &_executor;
305
306 // The data manager for this engine.
307 //
308 DataManagerType *_dataManager;
309
310 // Acceleration structure used for caching output handles, which may be
311 // repeatedly looked up in the same order.
312 //
313 std::vector<_DataHandle> _dataHandleCache;
314
315};
316
318
319template<typename DataManagerType>
320template<typename F>
321void
323 const VdfSchedule &schedule,
324 const VdfRequest &computeRequest,
325 VdfExecutorErrorLogger *errorLogger,
326 F &&callback)
327{
328 TRACE_FUNCTION();
329
330 // Make sure the data manager is appropriately sized.
331 _dataManager->Resize(*schedule.GetNetwork());
332
333 // Indicates which nodes have been executed.
334 TfBits executedNodes(schedule.GetScheduleNodeVector().size());
335
336 // The persistent evaluation state
337 VdfEvaluationState state(_GetExecutor(), schedule, errorLogger);
338
339 // Now execute the uncached, requested outputs.
340 VdfRequest::IndexedView requestView(computeRequest);
341 for (size_t i = 0; i < requestView.GetSize(); ++i) {
342 // Skip outputs not included in the request.
343 const VdfMaskedOutput *maskedOutput = requestView.Get(i);
344 if (!maskedOutput) {
345 continue;
346 }
347
348 // Skip outputs that have already been cached. However, we must invoke
349 // the callback to notify the client side that evaluation of the
350 // requested output has completed.
351 const VdfOutput &output = *maskedOutput->GetOutput();
352 const VdfMask &mask = maskedOutput->GetMask();
353 if (_GetExecutor().GetOutputValue(output, mask)) {
354 callback(*maskedOutput, i);
355 continue;
356 }
357
358 VDF_PBEE_TRACE_SCOPE(
359 "VdfPullBasedExecutorEngine<T>::RunSchedule (executing output)");
360 _ExecuteOutput(state, output, &executedNodes);
361
362 // If we've been interrupted, bail out.
363 if (_GetExecutor().HasBeenInterrupted()) {
364 break;
365 }
366
367 // Invoke the callback once the output has been evaluated, but only
368 // if the executor has not been interrupted.
369 else {
370 callback(*maskedOutput, i);
371 }
372 }
373}
374
375template<typename DataManagerType>
376VdfVector *
378 const VdfOutput &toOutput,
379 VdfExecutorBufferData *toBuffer,
380 const VdfOutput &fromOutput,
381 const VdfMask &fromMask) const
382{
383 TfAutoMallocTag2 tag("Vdf", "VdfPullBasedExecutorEngine<T>::_CopyCache");
384
385 // Note that we must look up the data through the executor, instead of the
386 // data manager, because we may have initially received a cache hit by
387 // looking up the executor. The data may live in the parent executor, for
388 // example, instead of the local data manager.
389 const VdfVector *sourceVector =
390 _executor.GetOutputValue(fromOutput, fromMask);
391
392 if (!sourceVector) {
393 // CODE_COVERAGE_OFF - We should never hit this
394 VDF_FATAL_ERROR(fromOutput.GetNode(),
395 "No cache for output " + fromOutput.GetDebugName());
396 // CODE_COVERAGE_ON
397 }
398
399 VdfVector *result = _dataManager->CreateOutputCache(toOutput, toBuffer);
400 result->Copy(*sourceVector, fromMask);
401
402 if (VdfExecutionStats* stats = _executor.GetExecutionStats()) {
403 stats->LogData(
404 VdfExecutionStats::ElementsCopiedEvent,
405 toOutput.GetNode(),
406 fromMask.GetNumSet());
407 }
408
409 return result;
410}
411
412
413template<typename DataManagerType>
414VdfVector *
416 const _DataHandle dataHandle,
417 const VdfOutput &output,
418 const VdfOutput &source,
419 const VdfMask &inputMask,
420 const VdfSchedule &schedule)
421{
422 VDF_PBEE_TRACE_FUNCTION();
423
424 // The following block of code makes sure that we touch all the outputs
425 // between the source output and the output that was passed the buffer.
426 //
427 // XXX: This loop scales with the number of nodes between the two outputs
428 // and can get quite expensive. It's also very cache unfriendly. It
429 // would be great if we could get away without every touching these
430 // outputs.
431 //
432 const VdfOutput *betweenOutput = VdfGetAssociatedSourceOutput(output);
433 while (betweenOutput && betweenOutput != &source) {
434 _GetExecutor()._TouchOutput(*betweenOutput);
435 betweenOutput = VdfGetAssociatedSourceOutput(*betweenOutput);
436 }
437
438 return _PassOrCopyBufferInternal(
439 dataHandle, output, source, inputMask, schedule);
440}
441
442template<typename DataManagerType>
443VdfVector *
445 const _DataHandle dataHandle,
446 const VdfOutput &output,
447 const VdfOutput &source,
448 const VdfMask &inputMask,
449 const VdfSchedule &schedule) const
450{
451 // Here's where we have the most potential for optimization. We
452 // can re-use our inputs cache (without any copying) if our input
453 // has one and only one output (and that's us)
454 //
455 const _DataHandle sourceHandle =
456 _dataManager->GetDataHandle(source.GetId());
457 VdfSchedule::OutputId sourceId = schedule.GetOutputId(source);
458
459 VdfVector *result = NULL;
460
461 // If this is the output that 'source' is supposed to pass its buffer
462 // to, do so, otherwise copy.
463 if (_dataManager->IsValidDataHandle(sourceHandle) &&
464 &output == schedule.GetPassToOutput(sourceId)) {
465
466 // Retrieve the buffer data from the source data handle.
467 VdfExecutorBufferData *sourceBuffer =
468 _dataManager->GetBufferData(sourceHandle);
469
470 // If the source output does not contain any data, don't even
471 // bother with mung buffer locking or buffer passing.
472 if (sourceBuffer->GetExecutorCache() &&
473 sourceBuffer->GetExecutorCacheMask().IsAnySet()) {
474
475 // Decide whether mung buffer locking should be in effect.
476 // We identify this source output as a likely candidate for buffer
477 // locking (keeping its buffer around) if we observe that the
478 // current output has been recently invalidated while the source
479 // output has not. We optimistically "lock" the buffer by copying
480 // it instead of passing it, so that during the rest of the current
481 // mung (if any), the source buffer will still have its buffer
482 // intact, and we won't have to visit any of its upstream nodes.
483 if (_dataManager->HasInvalidationTimestampMismatch(
484 sourceHandle, dataHandle)) {
485 TF_DEBUG(VDF_MUNG_BUFFER_LOCKING)
486 .Msg("Mung buffer locking between outputs "
487 "'%s' and '%s'.\n",
488 source.GetDebugName().c_str(),
489 output.GetDebugName().c_str());
490 }
491
492 // If mung buffers are not supposed to be locked, pass the buffer
493 // data from the source output to the destination output.
494 else {
495
496 // If the source output does not contain all the data that has
497 // been requested in the inputMask, we cannot pass buffers.
498 // Note, that the requested data being available also implies
499 // that the source output contains the data marked to keep,
500 // since the keep mask is always a subset of the request mask.
501 // This is verified at scheduling time.
502 //
503 // We end up in this particular situation if the execution
504 // engine has found the data living on a parent executor,
505 // i.e. it must be copied before it can be passed to
506 // subsequent outputs.
507 //
508 if (sourceBuffer->GetExecutorCacheMask().Contains(inputMask)) {
509 const VdfMask &keepMask = schedule.GetKeepMask(sourceId);
510 result = _dataManager->PassBuffer(
511 source, sourceBuffer,
512 output, _dataManager->GetBufferData(dataHandle),
513 keepMask);
514
515 if (VdfExecutionStats* stats =
516 _executor.GetExecutionStats()) {
517 stats->LogData(
518 VdfExecutionStats::ElementsCopiedEvent,
519 source.GetNode(),
520 keepMask.GetNumSet());
521 }
522 }
523
524 // Note that result can be NULL and we can still end up in
525 // _CopyCache. This can happen when something cached in the
526 // parent executor is read by a speculating executor.
527 }
528 }
529 }
530
531 if (!result) {
532 VDF_PBEE_TRACE_SCOPE(
533 "VdfPullBasedExecutorEngine<T>::_PassOrCopyBufferInternal "
534 "(copying vector)");
535 result = _CopyCache(
536 output, _dataManager->GetBufferData(dataHandle), source, inputMask);
537 }
538
539 return result;
540}
541
542template<typename DataManagerType>
543void
545 const _DataHandle dataHandle,
546 const VdfInput &input,
547 const VdfMask &mask,
548 const VdfSchedule &schedule)
549{
550 // Get the output associated with the read/write input.
551 const VdfOutput *output = input.GetAssociatedOutput();
552 TF_DEV_AXIOM(output);
553
554 // Here's where we have the most potential for optimization. We
555 // can re-use our inputs cache (without any copying) if our input
556 // has one and only one output (and that's us)
557 const size_t numInputNodes = input.GetNumConnections();
558 if (numInputNodes == 1 && !input[0].GetMask().IsAllZeros()) {
559 _PassOrCopyBufferInternal(
560 dataHandle, *output, input[0].GetSourceOutput(), mask, schedule);
561 return;
562 }
563
564 // If we have no inputs, provide a fresh new cache.
565 _dataManager->CreateOutputCache(
566 *output, _dataManager->GetBufferData(dataHandle));
567}
568
569template<typename DataManagerType>
570bool
572 std::vector< _OutputToExecute > *outputs,
573 const VdfMask& lockedCacheMask,
574 const VdfOutput &output,
575 const VdfSchedule &schedule)
576{
577 VdfSchedule::OutputId outputId = schedule.GetOutputId(output);
578
579 if (outputId.IsValid()) {
580 // Push the output
581 outputs->push_back(
582 _OutputToExecute(
583 outputId,
584 lockedCacheMask,
585 schedule.IsAffective(outputId)));
586 return true;
587 }
588
589 // The output to push is not actually scheduled, which guarantees
590 // that is value will never be needed by any computations. So
591 // just skip it.
592 return false;
593}
594
595template<typename DataManagerType>
596bool
598 const VdfOutput &output,
599 _OutputToExecute *stackEntry,
600 const VdfSchedule &schedule)
601{
602 VDF_PBEE_TRACE_FUNCTION();
603
604 // Retrieve the output data handle.
605 const _DataHandle dataHandle = _dataManager->GetDataHandle(output.GetId());
606 if (!_dataManager->IsValidDataHandle(dataHandle)) {
607 return false;
608 }
609
610 // Get the invalidation timestamp at the output.
611 const VdfInvalidationTimestamp invalidationTs =
612 _dataManager->GetInvalidationTimestamp(dataHandle);
613
614 // If this output has never been invalidated, bail out.
615 if (!invalidationTs) {
616 return false;
617 }
618
619 // If this output was not invalidated during the last invalidation round,
620 // do not consider it for sparse mung buffer locking. The first output that
621 // is no longer part of the last invalidation round will hold the fully
622 // locked mung buffer.
623 // Note, we also have to reset the locked cache mask when crossing the
624 // timestamp edge. If we ever reach back into a pool chain that has the
625 // current invalidation timestamp, we have to start back up with an empty
626 // locked cache mask.
627 if (invalidationTs != _dataManager->GetInvalidationTimestamp()) {
628 if (!stackEntry->lockedCacheMask.IsEmpty()) {
629 stackEntry->lockedCacheMask = VdfMask();
630 return true;
631 }
632 return false;
633 }
634
635 // Output updated?
636 bool updated = false;
637
638 // Append the data sitting at this output to the locked cache mask. This
639 // section of the code is responsible for growing the lockedCacheMask as
640 // we traverse up the pool chain.
641 VdfExecutorBufferData *outputBuffer =
642 _dataManager->GetBufferData(dataHandle);
643 VdfSMBLData *smblData = _dataManager->GetOrCreateSMBLData(dataHandle);
644 const VdfSchedule::OutputId &outputId = stackEntry->outputId;
645 const VdfMask &keepMask = schedule.GetKeepMask(outputId);
646 if (outputBuffer->GetExecutorCache() &&
647 !outputBuffer->GetExecutorCacheMask().IsEmpty() &&
648 !keepMask.IsEmpty()) {
649 smblData->ExtendLockedCacheMask(
650 &stackEntry->lockedCacheMask,
651 outputBuffer->GetExecutorCacheMask());
652 stackEntry->absorbLockedCache = true;
653 updated = true;
654 }
655
656 // If the locked cache mask is still empty, than there is no work to do.
657 if (stackEntry->lockedCacheMask.IsEmpty()) {
658 return false;
659 }
660
661 // Before determining the affective-ness of the node, insure that the data
662 // indicated by the keep mask is stored in the executor cache, and that
663 // any bits not contained in the executor cache are not contained in the
664 // locked cache mask. Otherwise, we could be skipping nodes which really
665 // need to run in order to provide valid values to be kept.
666 if (!keepMask.IsEmpty()) {
667 smblData->RemoveUncachedMask(
668 &stackEntry->lockedCacheMask,
669 outputBuffer->GetExecutorCacheMask(),
670 keepMask);
671 updated = true;
672 }
673
674 // If this node is affective in the schedule, we may be able to get away
675 // without computing it, and making it un-affective. We determine whether
676 // this is the case by looking at the lockedCacheMask to see if it contains
677 // the scheduled affects mask.
678 if (stackEntry->affective &&
679 !smblData->ComputeAffectiveness(
680 stackEntry->lockedCacheMask,
681 schedule.GetAffectsMask(outputId))) {
682 stackEntry->affective = false;
683 return true;
684 }
685
686 // Any updates to the output?
687 return updated;
688}
689
690template<typename DataManagerType>
691void
693 const VdfEvaluationState &state,
694 const VdfOutput &output,
695 TfBits *executedNodes)
696{
697 TF_DEBUG(VDF_PBEE_TRACE).Msg(
698 "----------------- _ExecuteOutput --------- \n");
699
700 // The current schedule
701 const VdfSchedule &schedule = state.GetSchedule();
702
703 // Is Sparse Mung Buffer Locking enabled for this round of evaluation?
704 //
705 // Note that executors that may be interrupted, do not yet support SMBL.
706 // After interruption, a buffer that has not been fully passed down the pool
707 // chain, may contain garbage data. That same buffer may then get picked up
708 // in subsequent evaluation rounds, where it is assumed to be entirely
709 // valid.
710 const bool enableSMBL =
711 schedule.HasSMBL() && !_GetExecutor().GetInterruptionFlag();
712
713 // This is the stack of the outputs currently in the process of execution.
714 std::vector< _OutputToExecute > outputsStack;
715
716 // Add the first output to the stack.
717 _PushBackOutput(&outputsStack, VdfMask(), output, schedule);
718
719 while (!outputsStack.empty()) {
720
721 // If we've been interrupted, bail out.
722 if (_GetExecutor().HasBeenInterrupted()) {
723 break;
724 }
725
726 // Stack Top State
727 VdfSchedule::OutputId outputId = outputsStack.back().outputId;
728 bool affective = outputsStack.back().affective;
729 VdfMask lockedCacheMask = outputsStack.back().lockedCacheMask;
730 bool absorbLockedCache = outputsStack.back().absorbLockedCache;
731
732 // Temporary State
733 const VdfMask *requestMask = NULL;
734 const VdfOutput *output = NULL;
735 const VdfNode &node = *schedule.GetNode(outputId);
736 bool added = false;
737
738 switch (outputsStack.back().stage) {
739
740 case ExecutionStageStart:
741
742 TF_DEBUG(VDF_PBEE_TRACE)
743 .Msg("{ BeginNode(\"%s\");\n", node.GetDebugName().c_str());
744
745 // We have to compute if
746 // o The node has not been executed, yet
747 // o The output is dirty
748 // o The cache is empty
749 // o The computed mask doesn't cover what is asked for in the
750 // schedule.
751 output = schedule.GetOutput(outputId);
752 requestMask = &schedule.GetRequestMask(outputId);
753 if (executedNodes->IsSet(schedule.GetScheduleNodeIndex(outputId)) ||
754 _GetExecutor().GetOutputValue(*output, *requestMask)) {
755
756 // Pop off the top of the output stack
757 outputsStack.pop_back();
758
759 TF_DEBUG(VDF_PBEE_TRACE).Msg(" EndNodeFoundCache(); }\n");
760 continue;
761 }
762
763 // Update the output for SMBL. This refreshes the affective-ness
764 // flag, the lockedCacheMask and the flag that indicates whether
765 // the locked cache should be absorbed into the executor cache.
766 if (enableSMBL && Vdf_IsPoolOutput(*output)) {
767 // Update the top of the output stack. Since no new outputs
768 // have been pushed onto the stack at this point, the top is
769 // still the output we are currently executing.
770 _OutputToExecute *stackTop = &outputsStack.back();
771 if (_UpdateOutputForSMBL(*output, stackTop, schedule)) {
772 affective = stackTop->affective;
773 lockedCacheMask = stackTop->lockedCacheMask;
774 absorbLockedCache = stackTop->absorbLockedCache;
775 }
776 }
777
778 // The first stage of computation is to execute all the
779 // prerequisites for current output. So we push them on our stack
780 // and wait for them to be computed.
781
782 // Mark that we've processed the prerequisites for this output.
783 outputsStack.back().stage = ExecutionStagePreRequisitesDone;
784
785 // Push back all the prerequisites if this output will do anything
786 if (affective) {
787 for (const VdfScheduleInput &input : schedule.GetInputs(node)) {
788 if (input.input->GetSpec().IsPrerequisite()) {
789 added |= _PushBackOutput(
790 &outputsStack, VdfMask(), *input.source, schedule);
791 }
792 }
793 }
794
795 // If we added inputs then we want to go back to the top of the
796 // loop and execute our inputs, otherwise we will fall through to
797 // the next stage.
798 if (added) {
799 break;
800 } // else fall through to the next stage.
801
802 case ExecutionStagePreRequisitesDone:
803
804 // Now that all the prerequisites are done, the second stage
805 // of computation is to use the prerequisites to determine what
806 // other inputs we need to run to satisfy the current output.
807
808 // Mark that all the inputs have now been processed for the
809 // current output.
810 outputsStack.back().stage = ExecutionStageCompute;
811
812 // Note that outputs added are executed in reverse order. So we
813 // push last the nodes that we want to run first.
814
815 // Only run the reads if the output is expected to modify
816 // anything.
817 if (affective) {
818
819 // Get the list of required inputs based on the prerequisite
820 // computations.
821 VdfRequiredInputsPredicate inputsPredicate =
822 node.GetRequiredInputsPredicate(VdfContext(state, node));
823
824 // Run the required reads last.
825 // Here we try to run the "read" inputs after the "read/write"
826 // inputs.
827 if (inputsPredicate.HasRequiredReads()) {
828 for (const VdfScheduleInput &input :
829 schedule.GetInputs(node)) {
830 if (inputsPredicate.IsRequiredRead(*input.input)) {
831 added |= _PushBackOutput(
832 &outputsStack, VdfMask(),
833 *input.source, schedule);
834 }
835 }
836 }
837 }
838
839 // Run the read/writes first, so that we can maximize the chance of
840 // being able to re-use the kept buffers for speculations.
841 for (const VdfScheduleInput &input : schedule.GetInputs(node)) {
842 const VdfOutput *assocOutput =
843 input.input->GetAssociatedOutput();
844 if (!assocOutput) {
845 continue;
846 }
847
848 // Does this output have a pass-through scheduled?
849 const VdfSchedule::OutputId &assocOutputId =
850 schedule.GetOutputId(*assocOutput);
851 if (assocOutputId.IsValid()) {
852 if (const VdfOutput *fromBufferOutput =
853 schedule.GetFromBufferOutput(assocOutputId)) {
854 added |= _PushBackOutput(
855 &outputsStack, lockedCacheMask,
856 *fromBufferOutput, schedule);
857 continue;
858 }
859 }
860
861 // If the associated output is not scheduled, or it does not
862 // have a pass-through scheduled, we need to consider all
863 // connected source outputs!
864 added |= _PushBackOutput(
865 &outputsStack, lockedCacheMask, *input.source, schedule);
866 }
867
868 // If we added inputs then we want to go back to the top of the
869 // loop and execute our inputs, otherwise we will fall through to
870 // the next stage.
871 if (added) {
872 break;
873 } // else fall through to the next stage.
874
875 default:
876
877 // Set a bit indicating that this node has been executed.
878 executedNodes->Set(schedule.GetScheduleNodeIndex(outputId));
879
880 // Compute the node.
881 if (affective) {
882 _ComputeNode(state, node, absorbLockedCache);
883
884 TF_DEBUG(VDF_PBEE_TRACE).Msg(
885 "ComputedNode(\"%s\"); }\n", node.GetDebugName().c_str());
886
887 } else {
888 // The node doesn't have any outputs that need to be computed.
889 // Skip the node passing through the data for read/write
890 // outputs.
891 _PassThroughNode(schedule, node, absorbLockedCache);
892
893 TF_DEBUG(VDF_PBEE_TRACE)
894 .Msg("ComputedNodeInaffective(\"%s\"); }\n",
895 node.GetDebugName().c_str());
896 }
897
898 // Pop the output off the stack, once we are done with it
899 outputsStack.pop_back();
900 }
901 }
902}
903
904template<typename DataManagerType>
905void
907 const VdfEvaluationState &state,
908 const VdfNode &node,
909 bool absorbLockedCache)
910{
911 VDF_PBEE_TRACE_FUNCTION();
912
913 VdfExecutionStats *stats = _executor.GetExecutionStats();
914
916 compute(stats, node, VdfExecutionStats::NodeEvaluateEvent);
917
918 if (stats) {
919 stats->LogTimestamp(VdfExecutionStats::NodeDidComputeEvent, node);
920 }
921
922
923 // The current schedule.
924 const VdfSchedule &schedule = state.GetSchedule();
925
926 // Clear the acceleration structure for output data lookups.
927 _dataHandleCache.clear();
928
929 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
930 const VdfOutput &output = *schedule.GetOutput(outputId);
931
932 // Retrieve the data handle and cache it for accelerated lookup below.
933 const _DataHandle dataHandle =
934 _dataManager->GetOrCreateDataHandle(output.GetId());
935 _dataHandleCache.push_back(dataHandle);
936
937 // Retrieve the buffer data associated with the handle.
938 VdfExecutorBufferData *bufferData =
939 _dataManager->GetBufferData(dataHandle);
940
941 // If this output still contains data (i.e., invalidation did not
942 // remove the cache), it may have been locked and we may want to retain
943 // the data to absorb it shortly.
944 if (absorbLockedCache ||
945 (bufferData->GetExecutorCache() &&
946 _IsNotPassing(output, outputId, schedule))) {
947 bufferData->RetainExecutorCache(
948 output.GetSpec(),
949 _dataManager->GetOrCreateSMBLData(dataHandle));
950 }
951
952 // Before we compute the output, we have to make sure that all
953 // the recipients of its cache are cleared and that the cache is
954 // reclaimed by output.
955 bufferData->ResetExecutorCache();
956
957 // Mark the output as having been touched during evaluation.
958 _dataManager->Touch(dataHandle);
959
960 // If this is a read/write output, make sure the buffer has been
961 // passed down. We also need to set the computed output mask here,
962 // because the node will read input values of read/write inputs
963 // directly at this output.
964 // Note, that on interruption this mask must be reset!
965 if (const VdfInput *ai = output.GetAssociatedInput()) {
966 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
967 _PrepareReadWriteBuffer(dataHandle, *ai, requestMask, schedule);
968 _dataManager->SetComputedOutputMask(bufferData, requestMask);
969 }
970 }
971
972 // Compute the node
973 {
974 VDF_PBEE_TRACE_SCOPE(
975 "VdfPullBasedExecutorEngine<T>::_ComputeNode "
976 "(node callback)");
977
978 node.Compute(VdfContext(state, node));
979 }
980
981 // Has the node been interrupted during execution?
982 const bool hasBeenInterrupted = _GetExecutor().HasBeenInterrupted();
983
984 // Deallocate temporary buffers which the schedule knows can be deallocated
985 // now that this node has run (they will never be read again before they
986 // are deallocated due to invalidation).
987 if (const VdfOutput* ctd = schedule.GetOutputToClear(node)) {
988 // Fetch the data handle directly from _dataManager, rather than
989 // through a virtual method, because we only ever want to eagerly clear
990 // temporary buffers in our own data manager (never a parent's).
991 const _DataHandle dataHandle =
992 _dataManager->GetDataHandle(ctd->GetId());
993 if (_dataManager->IsValidDataHandle(dataHandle)) {
994 _dataManager->GetBufferData(dataHandle)->Reset();
995 }
996 }
997
998 // We now need to mark the computed parts of our vectors.
999 size_t outputIndex = 0;
1000 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
1001 const VdfOutput &output = *schedule.GetOutput(outputId);
1002 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
1003
1004 // Retrieve the data handle from the cache.
1005 const _DataHandle dataHandle = _dataHandleCache[outputIndex++];
1006
1007 // Retrieve the buffer data associated with the handle.
1008 VdfExecutorBufferData *bufferData =
1009 _dataManager->GetBufferData(dataHandle);
1010
1011 // Check to see if the node did indeed produce values for this
1012 // output. We don't want to post warnings for missing output values
1013 // if the node has been interrupted.
1014 if (!hasBeenInterrupted &&
1015 !output.GetAssociatedInput() &&
1016 !bufferData->GetExecutorCache()) {
1017
1018 // This is an output without an associated input that has
1019 // no value even though it was requested. (We know it is
1020 // requested because otherwise, it wouldn't be in the schedule,
1021 // because of VdfScheduler::_RemoveTrivialNodes.)
1022 TF_WARN(
1023 "No value set for output " + output.GetDebugName() +
1024 " of type " + output.GetSpec().GetType().GetTypeName() +
1025 " named " + output.GetName().GetString());
1026
1027 //XXX: This is not 100% right when we use a single data flow
1028 // element to hold multiple values (as we do for shaped
1029 // attributes). FillVector() would need to know that this
1030 // is the case and it would need to know the # of values
1031 // to package into the output. This can happen anywhere
1032 // in the network, but for now, I only added a workaround
1033 // in the EfCopyToPoolNode.
1035 output.GetSpec().GetType(),
1036 requestMask.GetSize(),
1037 _dataManager->GetOrCreateOutputValueForWriting(
1038 output, dataHandle));
1039 }
1040
1041 // If the node has been interrupted, make sure to reset the computed
1042 // output mask: Read/writes will already have their mask set.
1043 _FinalizeComputedOutput(
1044 dataHandle,
1045 requestMask,
1046 hasBeenInterrupted,
1047 _IsNotPassing(output, outputId, schedule));
1048
1049 // Log stats
1050 if (stats) {
1051 const VdfNode& node = output.GetNode();
1052
1053 stats->LogData(
1054 VdfExecutionStats::ElementsProcessedEvent,
1055 node,
1056 schedule.GetAffectsMask(outputId).GetNumSet());
1057 }
1058 }
1059}
1060
1061template<typename DataManagerType>
1062bool
1064 const VdfSchedule &schedule,
1065 const VdfNode &node,
1066 bool absorbLockedCache)
1067{
1068 VDF_PBEE_TRACE_FUNCTION();
1069
1070 bool passedThrough = false;
1071
1072 VdfExecutionStats *stats = _executor.GetExecutionStats();
1074 compute(stats, node, VdfExecutionStats::NodeEvaluateEvent);
1075
1076 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
1077 const VdfOutput &output = *schedule.GetOutput(outputId);
1078 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
1079
1080 // Retrieve the data handle.
1081 const _DataHandle dataHandle =
1082 _dataManager->GetOrCreateDataHandle(output.GetId());
1083
1084 // Get the buffer data associated with the data handle.
1085 VdfExecutorBufferData *bufferData =
1086 _dataManager->GetBufferData(dataHandle);
1087
1088 // If this output still contains data (i.e., invalidation did not
1089 // remove the cache), it may have been locked and we may want to retain
1090 // the data to absorb it shortly.
1091 if (absorbLockedCache ||
1092 (bufferData->GetExecutorCache()
1093 && _IsNotPassing(output, outputId, schedule))) {
1094 bufferData->RetainExecutorCache(
1095 output.GetSpec(),
1096 _dataManager->GetOrCreateSMBLData(dataHandle));
1097 }
1098
1099 // Before we pass the output data through, we have to make sure that
1100 // all the recipients of its cache are cleared and that the cache is
1101 // reclaimed by output.
1102 bufferData->ResetExecutorCache();
1103
1104 // Marked the output as having been touched during evaluation, in order
1105 // for invalidation to consider this output.
1106 _dataManager->Touch(dataHandle);
1107
1108 if (const VdfOutput *fromBufferOutput =
1109 schedule.GetFromBufferOutput(outputId)) {
1110
1111 _PassOrCopySourceOutputBuffer(
1112 dataHandle, output, *fromBufferOutput, requestMask, schedule);
1113
1114 passedThrough = true;
1115
1116 } else if (const VdfInput *ai = output.GetAssociatedInput()) {
1117
1118 // We better have one and only one connection on this input
1119 // connector. Otherwise we can't pass anything through.
1120 TF_DEV_AXIOM(output.GetAssociatedInput()->GetNumConnections()==1);
1121
1122 // If the output has an associated input, pass the data through.
1123 _PrepareReadWriteBuffer(dataHandle, *ai, requestMask, schedule);
1124 passedThrough = true;
1125 }
1126
1127 // Finalize the computed output, by merging in any temporary data and
1128 // setting the appropriate computed output mask.
1129 _FinalizeComputedOutput(
1130 dataHandle,
1131 requestMask,
1132 false, /* hasBeenInterrupted */
1133 _IsNotPassing(output, outputId, schedule));
1134 }
1135
1136 return passedThrough;
1137}
1138
1139template<typename DataManagerType>
1140void
1142 const _DataHandle dataHandle,
1143 const VdfMask &requestMask,
1144 const bool hasBeenInterrupted,
1145 const bool extendRequestMask)
1146{
1147 // Retrieve the buffer data associated with the data handle.
1148 VdfExecutorBufferData *bufferData = _dataManager->GetBufferData(dataHandle);
1149
1150 // Merge in temporary data, if available. Note, we must release the
1151 // SMBL data despite any possible interruption!
1152 VdfMask lockedMask =
1153 bufferData->ReleaseExecutorCache(_dataManager->GetSMBLData(dataHandle));
1154
1155 // Has the executor been interrupted? Make sure to reset the computed
1156 // output mask, so that subsequent cache hits do not return garbage data.
1157 if (hasBeenInterrupted) {
1158 _dataManager->SetComputedOutputMask(bufferData, VdfMask());
1159 }
1160
1161 // Otherwise, set the computed output mask to the request mask.
1162 else {
1163 // If extendRequestMask is set and the cache's mask is non-empty,
1164 // copy the bits merge the requestMask and the cacheMask. Otherwise,
1165 // set using the standard requestMask.
1166 _dataManager->SetComputedOutputMask(
1167 bufferData,
1168 extendRequestMask && !lockedMask.IsEmpty() ?
1169 lockedMask | requestMask :
1170 requestMask);
1171 }
1172}
1173
1175
1176PXR_NAMESPACE_CLOSE_SCOPE
1177
1178#endif
Fast bit array that keeps track of the number of bits set and can find the next set in a timely manne...
Definition bits.h:49
void Set(size_t index)
Sets bit # index to one.
Definition bits.h:377
bool IsSet(size_t index) const
Returns true, if bit # index is set.
Definition bits.h:412
Scoped (i.e.
Definition mallocTag.h:251
A context is the parameter bundle passed to callbacks of computations.
Definition context.h:40
This object holds state that remains persistent during one round of network evaluation.
const VdfSchedule & GetSchedule() const
The schedule used for evaluation.
Execution stats profiling event logger.
void LogData(EventType event, const VdfNode &node, EventData data)
Log event API.
void LogTimestamp(EventType event, const VdfNode &node)
Log timestamp API.
static VDF_API void FillVector(TfType type, size_t numElements, VdfVector *vector)
Fills vector with the fallback value registered for the given type.
This object is responsible for storing the executor buffer data, comprised of the executor cache vect...
void ResetExecutorCache(const VdfMask &mask)
Reset the executor cache without releasing any memory and set the executor cache mask to mask.
VDF_API void RetainExecutorCache(const VdfOutputSpec &spec, VdfSMBLData *smblData)
Takes the existing executor cache and retains it within the existing VdfSMBLData object.
const VdfMask & GetExecutorCacheMask() const
Get the available mask.
VdfVector * GetExecutorCache() const
Returns the executor cache stored at this buffer data instance.
VDF_API VdfMask ReleaseExecutorCache(VdfSMBLData *smblData)
Merges the executor cache previously retained in smblData into this cache and releases the SMBL data.
A client may instantiate an object of this class and set it in an executor, to collect errors that ma...
Abstract base class for classes that execute a VdfNetwork to compute a requested set of values.
A VdfInput is used to connect a VdfNode to one or more VdfNodes' outputs.
Definition input.h:36
size_t GetNumConnections() const
Returns the number of connections for this input.
Definition input.h:58
const VdfOutput * GetAssociatedOutput() const
Returns the output corresponding to this input.
Definition input.h:82
A VdfMask is placed on connections to specify the data flowing through them.
Definition mask.h:37
size_t GetSize() const
Returns the size of the mask.
Definition mask.h:158
bool IsAnySet() const
Returns true, if there is at least a single set entry.
Definition mask.h:216
bool IsEmpty() const
Returns true if this mask is empty, i.e.
Definition mask.h:168
bool Contains(const VdfMask &mask) const
Returns true if mask is a subset-of or equal to this mask, false otherwise.
Definition mask.h:186
size_t GetNumSet() const
Returns the number of set bits in the mask.
Definition mask.h:246
Class to hold on to an externally owned output and a mask.
VdfOutput * GetOutput() const
Returns the VdfOutput.
const VdfMask & GetMask() const
Returns the VdfMask.
This is the base class for all nodes in a VdfNetwork.
Definition node.h:53
VDF_API const std::string GetDebugName() const
Returns the debug name for this node, if one is registered.
virtual VDF_API VdfRequiredInputsPredicate GetRequiredInputsPredicate(const VdfContext &context) const
Returns a predicate, determining whether a given input and its connections are required in order to f...
virtual void Compute(const VdfContext &context) const =0
This is the method called to perform computation.
A VdfOutput represents an output on a node.
Definition output.h:32
const VdfNode & GetNode() const
Returns the owning node for this output.
Definition output.h:57
VDF_API std::string GetDebugName() const
Returns the debug name for this output.
VdfId GetId() const
The unique id of this output.
Definition output.h:100
This class is a collection of common functions used by pulled based executors.
VdfVector * _PassOrCopySourceOutputBuffer(const _DataHandle dataHandle, const VdfOutput &output, const VdfOutput &source, const VdfMask &inputMask, const VdfSchedule &schedule)
Fast path for when we know ahead of time the output from which we wish to pass the buffer (or copy) a...
VdfPullBasedExecutorEngine(const VdfExecutorInterface &executor, DataManagerType *dataManager)
Constructor.
void _ComputeNode(const VdfEvaluationState &state, const VdfNode &node, bool absorbLockedCache=false)
Computes node.
static bool _IsNotPassing(const VdfOutput &output, const VdfSchedule::OutputId &outputId, const VdfSchedule &schedule)
Returns true if the output is associative but does not pass the buffer to another output.
void _PrepareReadWriteBuffer(const _DataHandle dataHandle, const VdfInput &input, const VdfMask &mask, const VdfSchedule &schedule)
Prepares a buffer for a read/write output.
void RunSchedule(const VdfSchedule &schedule, const VdfRequest &computeRequest, VdfExecutorErrorLogger *errorLogger)
Executes the given schedule with a computeRequest and an optional /p errorLogger.
_ExecutionStage
This enum describes the stages that a node goes through in execution.
DataManagerType * _GetDataManager()
Returns the data manager used by this engine.
VdfVector * _CopyCache(const VdfOutput &toOutput, VdfExecutorBufferData *toBuffer, const VdfOutput &fromOutput, const VdfMask &fromMask) const
Helper method to _PrepareReadWriteBuffer that copies the cache from fromOutput to toOutput.
const VdfExecutorInterface & _GetExecutor()
Returns the executor running this engine.
VdfSpeculationExecutorEngine< DataManagerType > SpeculationExecutorEngine
The equivalent speculation executor engine.
VdfVector * _PassOrCopyBufferInternal(const _DataHandle dataHandle, const VdfOutput &output, const VdfOutput &source, const VdfMask &inputMask, const VdfSchedule &schedule) const
Common method for _PrepareReadWriteBuffer and _PassOrCopySourceOutputBuffer that attempts to pass the...
bool _PassThroughNode(const VdfSchedule &schedule, const VdfNode &node, bool absorbLockedCache=false)
Causes the outputs with associated inputs in node to have their data passed through.
DataManagerType::DataHandle _DataHandle
The data handle type from the data manager implementation.
This predicate determines whether a given input value is needed to fulfill the input dependencies req...
bool IsRequiredRead(const VdfInput &input) const
Is this input a required read? Note that read/writes as well as prerequisite inputs are not required ...
bool HasRequiredReads() const
Are any inputs required?
VdfSMBLData holds per-output data that is meant to be consumed by the executor.
Definition smblData.h:31
void RemoveUncachedMask(VdfMask *lockedCacheMask, const VdfMask &cacheMask, const VdfMask &keepMask)
Make sure that all the bits in the keepMask are provided by the cacheMask.
Definition smblData.h:250
bool ComputeAffectiveness(const VdfMask &lockedCacheMask, const VdfMask &affectsMask)
Computes the affectiveness of the corresponding output given the accumulated lockedCacheMask and the ...
Definition smblData.h:87
void ExtendLockedCacheMask(VdfMask *lockedCacheMask, const VdfMask &cacheMask)
Extends the lockedCacheMask by appending the bits stored in the executor cacheMask.
Definition smblData.h:65
An OutputId is a small key object that, once obtained for a particular VdfOutput, can be used to quer...
Definition schedule.h:91
bool IsValid() const
Returns whether this OutputId can be used to make queries about an output's scheduling.
Definition schedule.h:97
Contains a specification of how to execute a particular VdfNetwork.
Definition schedule.h:41
VDF_API const VdfMask & GetAffectsMask(const OutputId &outputId) const
Returns the affects mask associated with the given OutputId.
const VdfNetwork * GetNetwork() const
Returns the network for this schedule.
Definition schedule.h:178
VDF_API OutputId GetOutputId(const VdfOutput &output) const
Returns a small, cheap OutputId, which can be passed to other Get* methods in this class to efficient...
VDF_API const VdfMask & GetRequestMask(const OutputId &outputId) const
Returns the request mask associated with the given OutputId.
VDF_API const VdfOutput * GetFromBufferOutput(const OutputId &outputId) const
Returns the "from buffer's" output associated with the given OutputId.
VDF_API InputsRange GetInputs(const VdfNode &node) const
Returns a range of inputs scheduled for the given node.
int GetScheduleNodeIndex(const OutputId &outputId) const
Returns the node index of the schedule node associated with the given outputId.
Definition schedule.h:514
VDF_API const VdfMask & GetKeepMask(const OutputId &outputId) const
Returns the keep mask associated with the given OutputId.
VDF_API const VdfOutput * GetPassToOutput(const OutputId &outputId) const
Returns the "pass to" output associated with the given OutputId.
VDF_API const VdfNode * GetNode(const OutputId &outputId) const
Returns the VdfNode that owns the VdfOutput associated with the given outputId.
VDF_API const VdfOutput * GetOutputToClear(const VdfNode &node) const
Returns the output whose temporary buffer can be immediately deallocated after node has finished exec...
ScheduleNodeVector & GetScheduleNodeVector()
Returns the vector of schedule nodes in this schedule.
Definition schedule.h:503
VDF_API bool IsAffective(const OutputId &outputId) const
Returns true if the output is expected to have an effect on its corresponding input,...
VDF_API const VdfOutput * GetOutput(const OutputId &outputId) const
Returns the scheduled VdfOutput associated with the given OutputId.
bool HasSMBL() const
Returns true if this schedule participates in sparse mung buffer locking.
Definition schedule.h:353
This class provides an executor engine to the speculation executor.
This class is used to abstract away knowledge of the cache data used for each node.
Definition vector.h:56
void Copy(const VdfVector &rhs, const VdfMask &mask)
Copies the contents of rhs into this vector.
Definition vector.h:281
unsigned int VdfInvalidationTimestamp
Type of the timestamp that identifies the most recent round of invalidation.
Definition types.h:74
#define TF_DEBUG(enumVal)
Evaluate and print debugging message msg if enumVal is enabled for debugging.
Definition debug.h:501
#define TF_DEV_AXIOM(cond)
The same as TF_AXIOM, but compiled only in dev builds.
Definition diagnostic.h:205
#define TF_WARN(...)
Issue a warning, but continue execution.
Definition diagnostic.h:132
VDF_API const VdfOutput * VdfGetAssociatedSourceOutput(const VdfOutput &output)
Returns the output that is the source of the associated input of output, if any and NULL otherwise.
bool Vdf_IsPoolOutput(const VdfOutput &output)
Returns true if output is a pool output, i.e., an output that has an associated input,...
This class contains scheduling information for an input.
Scoped event that automatically pushes and pops malloc tags for the given VdfNode.