Loading...
Searching...
No Matches
pullBasedExecutorEngine.h
Go to the documentation of this file.
1//
2// Copyright 2025 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusd.org/license.
6//
7#ifndef PXR_EXEC_VDF_PULL_BASED_EXECUTOR_ENGINE_H
8#define PXR_EXEC_VDF_PULL_BASED_EXECUTOR_ENGINE_H
9
11
12#include "pxr/pxr.h"
13
21#include "pxr/exec/vdf/mask.h"
23#include "pxr/exec/vdf/node.h"
24#include "pxr/exec/vdf/output.h"
29#include "pxr/exec/vdf/vector.h"
30
31#include "pxr/base/tf/bits.h"
34
35PXR_NAMESPACE_OPEN_SCOPE
36
37// The TRACE_SCOPE (and FUNCTION) invocations in this file can be pretty
38// expensive. We turn off most of them. However, they are still useful to
39// track down performance issues, which is why we have a quick way of enabling
40// them, by setting _VDF_PBEE_PROFILING_ON to 1.
41#define _VDF_PBEE_PROFILING_ON 0
42#if _VDF_PBEE_PROFILING_ON
43#define VDF_PBEE_TRACE_FUNCTION TRACE_FUNCTION
44#define VDF_PBEE_TRACE_SCOPE TRACE_SCOPE
45#else
46#define VDF_PBEE_TRACE_FUNCTION()
47#define VDF_PBEE_TRACE_SCOPE(name)
48#endif
49
52
53// Forward declare the speculation executor engine with equivalent traits to
54// this executor engine.
55template <typename> class VdfSpeculationExecutorEngine;
56
65
66template <typename DataManagerType>
68{
69public:
70
75 typedef
78
82 const VdfExecutorInterface &executor,
83 DataManagerType *dataManager) :
84 _executor(executor),
85 _dataManager(dataManager)
86 {}
87
92 const VdfSchedule &schedule,
93 const VdfRequest &computeRequest,
94 VdfExecutorErrorLogger *errorLogger) {
96 schedule, computeRequest, errorLogger,
97 [](const VdfMaskedOutput &, size_t){});
98 }
99
105 template < typename F >
106 void RunSchedule(
107 const VdfSchedule &schedule,
108 const VdfRequest &computeRequest,
109 VdfExecutorErrorLogger *errorLogger,
110 F &&callback);
111
112protected:
113
116 typedef typename DataManagerType::DataHandle _DataHandle;
117
121
122 ExecutionStageStart, // Nodes start in this stage.
123
124 ExecutionStagePreRequisitesDone, // After prerequisites have been
125 // computed but before rest of
126 // inputs have been computed.
127
128 ExecutionStageReadsDone, // After the reads have finished --
129 // only needed for speculation engine.
130
131 ExecutionStageCompute, // Final stage before node computation.
132
133 };
134
135
140 void _ComputeNode(
141 const VdfEvaluationState &state,
142 const VdfNode &node,
143 bool absorbLockedCache = false);
144
157 bool _PassThroughNode(
158 const VdfSchedule &schedule,
159 const VdfNode &node,
160 bool absorbLockedCache = false);
161
162
167 const VdfOutput &toOutput,
168 VdfExecutorBufferData *toBuffer,
169 const VdfOutput &fromOutput,
170 const VdfMask &fromMask) const;
171
174 const VdfExecutorInterface &_GetExecutor() { return _executor; }
175
178 DataManagerType *_GetDataManager() { return _dataManager; }
179
186 const _DataHandle dataHandle,
187 const VdfOutput &output,
188 const VdfOutput &source,
189 const VdfMask &inputMask,
190 const VdfSchedule &schedule);
191
192
198 const _DataHandle dataHandle,
199 const VdfOutput &output,
200 const VdfOutput &source,
201 const VdfMask &inputMask,
202 const VdfSchedule &schedule) const;
203
210 const _DataHandle dataHandle,
211 const VdfInput &input,
212 const VdfMask &mask,
213 const VdfSchedule &schedule);
214
219 inline static bool _IsNotPassing(
220 const VdfOutput& output,
221 const VdfSchedule::OutputId& outputId,
222 const VdfSchedule& schedule)
223 {
224 return output.GetAssociatedInput() &&
225 !schedule.GetPassToOutput(outputId);
226 }
227
228private:
229
230 // This struct contains the necessary state to compute an output.
231 //
232 struct _OutputToExecute {
233
234 // Constructor that takes schedule node and output.
235 _OutputToExecute(
236 const VdfSchedule::OutputId &outputId,
237 const VdfMask &lockedCacheMask,
238 bool affective) :
239 outputId(outputId),
240 stage(ExecutionStageStart),
241 lockedCacheMask(lockedCacheMask),
242 affective(affective),
243 absorbLockedCache(false)
244 { }
245
246 // The schedule identifier for the output to execute.
247 VdfSchedule::OutputId outputId;
248
249 // The current phase of this output in the execution stack.
250 _ExecutionStage stage;
251
252 // Current state of the locked cache
253 VdfMask lockedCacheMask;
254
255 // Determines the affective-ness of the output
256 bool affective;
257
258 // Absorb the locked cache for SMBL
259 bool absorbLockedCache;
260
261 };
262
263 // Helper method to _ExecuteOutput.
264 //
265 // This method adds \p output to the \p outputs vector.
266 // Returns \c true if it added a new output and \c false otherwise.
267 //
268 bool _PushBackOutput(
269 std::vector< _OutputToExecute > *outputs,
270 const VdfMask &lockedCacheMask,
271 const VdfOutput &output,
272 const VdfSchedule &schedule);
273
274 // Executes the given \p output.
275 //
276 void _ExecuteOutput(
277 const VdfEvaluationState &state,
278 const VdfOutput &output,
279 TfBits *executedNodes);
280
281 // Finalize the output buffer after computing or passing through. This
282 // sets the computed output mask as well as merges in any data that has
283 // been temporarily held on to.
284 //
285 void _FinalizeComputedOutput(
286 const _DataHandle dataHandle,
287 const VdfMask &requestMask,
288 const bool hasBeenInterrupted,
289 const bool extendRequestMask);
290
291 // Update the output stack entry for SMBL. This refreshes the affectiveness
292 // flag, the lockedCacheMask and the flag that determines whether the
293 // locked cache needs to be absorbed into the executor cache. This method
294 // returns true if any of the relevant flags on the stackEntry object have
295 // been modified.
296 //
297 bool _UpdateOutputForSMBL(
298 const VdfOutput &output,
299 _OutputToExecute *stackEntry,
300 const VdfSchedule &schedule);
301
302 // The executor that uses this engine.
303 //
304 const VdfExecutorInterface &_executor;
305
306 // The data manager for this engine.
307 //
308 DataManagerType *_dataManager;
309
310 // Acceleration structure used for caching output handles, which may be
311 // repeatedly looked up in the same order.
312 //
313 std::vector<_DataHandle> _dataHandleCache;
314
315};
316
318
319template<typename DataManagerType>
320template<typename F>
321void
323 const VdfSchedule &schedule,
324 const VdfRequest &computeRequest,
325 VdfExecutorErrorLogger *errorLogger,
326 F &&callback)
327{
328 TRACE_FUNCTION();
329
330 // Make sure the data manager is appropriately sized.
331 _dataManager->Resize(*schedule.GetNetwork());
332
333 // Indicates which nodes have been executed.
334 TfBits executedNodes(schedule.GetScheduleNodeVector().size());
335
336 // The persistent evaluation state
337 VdfEvaluationState state(_GetExecutor(), schedule, errorLogger);
338
339 // Now execute the uncached, requested outputs.
340 VdfRequest::IndexedView requestView(computeRequest);
341 for (size_t i = 0; i < requestView.GetSize(); ++i) {
342 // Skip outputs not included in the request.
343 const VdfMaskedOutput *maskedOutput = requestView.Get(i);
344 if (!maskedOutput) {
345 continue;
346 }
347
348 // Skip outputs that have already been cached. However, we must invoke
349 // the callback to notify the client side that evaluation of the
350 // requested output has completed.
351 const VdfOutput &output = *maskedOutput->GetOutput();
352 const VdfMask &mask = maskedOutput->GetMask();
353 if (_GetExecutor().GetOutputValue(output, mask)) {
354 callback(*maskedOutput, i);
355 continue;
356 }
357
358 VDF_PBEE_TRACE_SCOPE(
359 "VdfPullBasedExecutorEngine<T>::RunSchedule (executing output)");
360 _ExecuteOutput(state, output, &executedNodes);
361
362 // If we've been interrupted, bail out.
363 if (_GetExecutor().HasBeenInterrupted()) {
364 break;
365 }
366
367 // Invoke the callback once the output has been evaluated, but only
368 // if the executor has not been interrupted.
369 else {
370 callback(*maskedOutput, i);
371 }
372 }
373}
374
375template<typename DataManagerType>
376VdfVector *
378 const VdfOutput &toOutput,
379 VdfExecutorBufferData *toBuffer,
380 const VdfOutput &fromOutput,
381 const VdfMask &fromMask) const
382{
383 TfAutoMallocTag2 tag("Vdf", "VdfPullBasedExecutorEngine<T>::_CopyCache");
384
385 // Note that we must look up the data through the executor, instead of the
386 // data manager, because we may have initially received a cache hit by
387 // looking up the executor. The data may live in the parent executor, for
388 // example, instead of the local data manager.
389 const VdfVector *sourceVector =
390 _executor.GetOutputValue(fromOutput, fromMask);
391
392 if (!sourceVector) {
393 // CODE_COVERAGE_OFF - We should never hit this
394 VDF_FATAL_ERROR(fromOutput.GetNode(),
395 "No cache for output " + fromOutput.GetDebugName());
396 // CODE_COVERAGE_ON
397 }
398
399 VdfVector *result = _dataManager->CreateOutputCache(toOutput, toBuffer);
400 result->Copy(*sourceVector, fromMask);
401
402 if (VdfExecutionStats* stats = _executor.GetExecutionStats()) {
403 stats->LogData(
404 VdfExecutionStats::ElementsCopiedEvent,
405 toOutput.GetNode(),
406 fromMask.GetNumSet());
407 }
408
409 return result;
410}
411
412
413template<typename DataManagerType>
414VdfVector *
416 const _DataHandle dataHandle,
417 const VdfOutput &output,
418 const VdfOutput &source,
419 const VdfMask &inputMask,
420 const VdfSchedule &schedule)
421{
422 VDF_PBEE_TRACE_FUNCTION();
423
424 // The following block of code makes sure that we touch all the outputs
425 // between the source output and the output that was passed the buffer.
426 //
427 // XXX: This loop scales with the number of nodes between the two outputs
428 // and can get quite expensive. It's also very cache unfriendly. It
429 // would be great if we could get away without every touching these
430 // outputs.
431 //
432 const VdfOutput *betweenOutput = VdfGetAssociatedSourceOutput(output);
433 while (betweenOutput && betweenOutput != &source) {
434 _GetExecutor()._TouchOutput(*betweenOutput);
435 betweenOutput = VdfGetAssociatedSourceOutput(*betweenOutput);
436 }
437
438 return _PassOrCopyBufferInternal(
439 dataHandle, output, source, inputMask, schedule);
440}
441
442template<typename DataManagerType>
443VdfVector *
445 const _DataHandle dataHandle,
446 const VdfOutput &output,
447 const VdfOutput &source,
448 const VdfMask &inputMask,
449 const VdfSchedule &schedule) const
450{
451 // Here's where we have the most potential for optimization. We
452 // can re-use our inputs cache (without any copying) if our input
453 // has one and only one output (and that's us)
454 //
455 const _DataHandle sourceHandle =
456 _dataManager->GetDataHandle(source.GetId());
457 VdfSchedule::OutputId sourceId = schedule.GetOutputId(source);
458
459 VdfVector *result = NULL;
460
461 // If this is the output that 'source' is supposed to pass its buffer
462 // to, do so, otherwise copy.
463 if (_dataManager->IsValidDataHandle(sourceHandle) &&
464 &output == schedule.GetPassToOutput(sourceId)) {
465
466 // Retrieve the buffer data from the source data handle.
467 VdfExecutorBufferData *sourceBuffer =
468 _dataManager->GetBufferData(sourceHandle);
469
470 // If the source output does not contain any data, don't even
471 // bother with mung buffer locking or buffer passing.
472 if (sourceBuffer->GetExecutorCache() &&
473 sourceBuffer->GetExecutorCacheMask().IsAnySet()) {
474
475 // Decide whether mung buffer locking should be in effect.
476 // We identify this source output as a likely candidate for buffer
477 // locking (keeping its buffer around) if we observe that the
478 // current output has been recently invalidated while the source
479 // output has not. We optimistically "lock" the buffer by copying
480 // it instead of passing it, so that during the rest of the current
481 // mung (if any), the source buffer will still have its buffer
482 // intact, and we won't have to visit any of its upstream nodes.
483 if (_dataManager->HasInvalidationTimestampMismatch(
484 sourceHandle, dataHandle)) {
485 TF_DEBUG(VDF_MUNG_BUFFER_LOCKING)
486 .Msg("Mung buffer locking between outputs "
487 "'%s' and '%s'.\n",
488 source.GetDebugName().c_str(),
489 output.GetDebugName().c_str());
490 }
491
492 // If mung buffers are not supposed to be locked, pass the buffer
493 // data from the source output to the destination output.
494 else {
495
496 // If the source output does not contain all the data that has
497 // been requested in the inputMask, we cannot pass buffers.
498 // Note, that the requested data being available also implies
499 // that the source output contains the data marked to keep,
500 // since the keep mask is always a subset of the request mask.
501 // This is verified at scheduling time.
502 //
503 // We end up in this particular situation if the execution
504 // engine has found the data living on a parent executor,
505 // i.e. it must be copied before it can be passed to
506 // subsequent outputs.
507 //
508 if (sourceBuffer->GetExecutorCacheMask().Contains(inputMask)) {
509 const VdfMask &keepMask = schedule.GetKeepMask(sourceId);
510 result = _dataManager->PassBuffer(
511 source, sourceBuffer,
512 output, _dataManager->GetBufferData(dataHandle),
513 keepMask);
514
515 if (VdfExecutionStats* stats =
516 _executor.GetExecutionStats()) {
517 stats->LogData(
518 VdfExecutionStats::ElementsCopiedEvent,
519 source.GetNode(),
520 keepMask.GetNumSet());
521 }
522 }
523
524 // Note that result can be NULL and we can still end up in
525 // _CopyCache. This can happen when something cached in the
526 // parent executor is read by a speculating executor.
527 }
528 }
529 }
530
531 if (!result) {
532 VDF_PBEE_TRACE_SCOPE(
533 "VdfPullBasedExecutorEngine<T>::_PassOrCopyBufferInternal "
534 "(copying vector)");
535 result = _CopyCache(
536 output, _dataManager->GetBufferData(dataHandle), source, inputMask);
537 }
538
539 return result;
540}
541
542template<typename DataManagerType>
543void
545 const _DataHandle dataHandle,
546 const VdfInput &input,
547 const VdfMask &mask,
548 const VdfSchedule &schedule)
549{
550 // Get the output associated with the read/write input.
551 const VdfOutput *output = input.GetAssociatedOutput();
552 TF_DEV_AXIOM(output);
553
554 // Here's where we have the most potential for optimization. We
555 // can re-use our inputs cache (without any copying) if our input
556 // has one and only one output (and that's us)
557 const size_t numInputNodes = input.GetNumConnections();
558 if (numInputNodes == 1 && !input[0].GetMask().IsAllZeros()) {
559 _PassOrCopyBufferInternal(
560 dataHandle, *output, input[0].GetSourceOutput(), mask, schedule);
561 return;
562 }
563
564 // If we have no inputs, provide a fresh new cache.
565 _dataManager->CreateOutputCache(
566 *output, _dataManager->GetBufferData(dataHandle));
567}
568
569template<typename DataManagerType>
570bool
572 std::vector< _OutputToExecute > *outputs,
573 const VdfMask& lockedCacheMask,
574 const VdfOutput &output,
575 const VdfSchedule &schedule)
576{
577 VdfSchedule::OutputId outputId = schedule.GetOutputId(output);
578
579 if (outputId.IsValid()) {
580 // Push the output
581 outputs->push_back(
582 _OutputToExecute(
583 outputId,
584 lockedCacheMask,
585 schedule.IsAffective(outputId)));
586 return true;
587 }
588
589 // The output to push is not actually scheduled, which guarantees
590 // that is value will never be needed by any computations. So
591 // just skip it.
592 return false;
593}
594
595template<typename DataManagerType>
596bool
598 const VdfOutput &output,
599 _OutputToExecute *stackEntry,
600 const VdfSchedule &schedule)
601{
602 VDF_PBEE_TRACE_FUNCTION();
603
604 // Retrieve the output data handle.
605 const _DataHandle dataHandle = _dataManager->GetDataHandle(output.GetId());
606 if (!_dataManager->IsValidDataHandle(dataHandle)) {
607 return false;
608 }
609
610 // Get the invalidation timestamp at the output.
611 const VdfInvalidationTimestamp invalidationTs =
612 _dataManager->GetInvalidationTimestamp(dataHandle);
613
614 // If this output has never been invalidated, bail out.
615 if (!invalidationTs) {
616 return false;
617 }
618
619 // If this output was not invalidated during the last invalidation round,
620 // do not consider it for sparse mung buffer locking. The first output that
621 // is no longer part of the last invalidation round will hold the fully
622 // locked mung buffer.
623 // Note, we also have to reset the locked cache mask when crossing the
624 // timestamp edge. If we ever reach back into a pool chain that has the
625 // current invalidation timestamp, we have to start back up with an empty
626 // locked cache mask.
627 if (invalidationTs != _dataManager->GetInvalidationTimestamp()) {
628 if (!stackEntry->lockedCacheMask.IsEmpty()) {
629 stackEntry->lockedCacheMask = VdfMask();
630 return true;
631 }
632 return false;
633 }
634
635 // Output updated?
636 bool updated = false;
637
638 // Append the data sitting at this output to the locked cache mask. This
639 // section of the code is responsible for growing the lockedCacheMask as
640 // we traverse up the pool chain.
641 VdfExecutorBufferData *outputBuffer =
642 _dataManager->GetBufferData(dataHandle);
643 VdfSMBLData *smblData = _dataManager->GetOrCreateSMBLData(dataHandle);
644 const VdfSchedule::OutputId &outputId = stackEntry->outputId;
645 const VdfMask &keepMask = schedule.GetKeepMask(outputId);
646 if (outputBuffer->GetExecutorCache() &&
647 !outputBuffer->GetExecutorCacheMask().IsEmpty() &&
648 !keepMask.IsEmpty()) {
649 smblData->ExtendLockedCacheMask(
650 &stackEntry->lockedCacheMask,
651 outputBuffer->GetExecutorCacheMask());
652 stackEntry->absorbLockedCache = true;
653 updated = true;
654 }
655
656 // If the locked cache mask is still empty, than there is no work to do.
657 if (stackEntry->lockedCacheMask.IsEmpty()) {
658 return false;
659 }
660
661 // Before determining the affective-ness of the node, insure that the data
662 // indicated by the keep mask is stored in the executor cache, and that
663 // any bits not contained in the executor cache are not contained in the
664 // locked cache mask. Otherwise, we could be skipping nodes which really
665 // need to run in order to provide valid values to be kept.
666 if (!keepMask.IsEmpty()) {
667 smblData->RemoveUncachedMask(
668 &stackEntry->lockedCacheMask,
669 outputBuffer->GetExecutorCacheMask(),
670 keepMask);
671 updated = true;
672 }
673
674 // If this node is affective in the schedule, we may be able to get away
675 // without computing it, and making it un-affective. We determine whether
676 // this is the case by looking at the lockedCacheMask to see if it contains
677 // the scheduled affects mask.
678 if (stackEntry->affective &&
679 !smblData->ComputeAffectiveness(
680 stackEntry->lockedCacheMask,
681 schedule.GetAffectsMask(outputId))) {
682 stackEntry->affective = false;
683 return true;
684 }
685
686 // Any updates to the output?
687 return updated;
688}
689
690template<typename DataManagerType>
691void
693 const VdfEvaluationState &state,
694 const VdfOutput &output,
695 TfBits *executedNodes)
696{
697 TF_DEBUG(VDF_PBEE_TRACE).Msg(
698 "----------------- _ExecuteOutput --------- \n");
699
700 // The current schedule
701 const VdfSchedule &schedule = state.GetSchedule();
702
703 // Is Sparse Mung Buffer Locking enabled for this round of evaluation?
704 //
705 // Note that executors that may be interrupted, do not yet support SMBL.
706 // After interruption, a buffer that has not been fully passed down the pool
707 // chain, may contain garbage data. That same buffer may then get picked up
708 // in subsequent evaluation rounds, where it is assumed to be entirely
709 // valid.
710 const bool enableSMBL =
711 schedule.HasSMBL() && !_GetExecutor().GetInterruptionFlag();
712
713 // This is the stack of the outputs currently in the process of execution.
714 std::vector< _OutputToExecute > outputsStack;
715
716 // Add the first output to the stack.
717 _PushBackOutput(&outputsStack, VdfMask(), output, schedule);
718
719 while (!outputsStack.empty()) {
720
721 // If we've been interrupted, bail out.
722 if (_GetExecutor().HasBeenInterrupted()) {
723 break;
724 }
725
726 // Stack Top State
727 VdfSchedule::OutputId outputId = outputsStack.back().outputId;
728 bool affective = outputsStack.back().affective;
729 VdfMask lockedCacheMask = outputsStack.back().lockedCacheMask;
730 bool absorbLockedCache = outputsStack.back().absorbLockedCache;
731
732 // Temporary State
733 const VdfMask *requestMask = NULL;
734 const VdfOutput *output = NULL;
735 const VdfNode &node = *schedule.GetNode(outputId);
736 bool added = false;
737
738 switch (outputsStack.back().stage) {
739
740 case ExecutionStageStart:
741
742 TF_DEBUG(VDF_PBEE_TRACE)
743 .Msg("{ BeginNode(\"%s\");\n", node.GetDebugName().c_str());
744
745 // We have to compute if
746 // o The node has not been executed, yet
747 // o The output is dirty
748 // o The cache is empty
749 // o The computed mask doesn't cover what is asked for in the
750 // schedule.
751 output = schedule.GetOutput(outputId);
752 requestMask = &schedule.GetRequestMask(outputId);
753 if (executedNodes->IsSet(schedule.GetScheduleNodeIndex(outputId)) ||
754 _GetExecutor().GetOutputValue(*output, *requestMask)) {
755
756 // Pop off the top of the output stack
757 outputsStack.pop_back();
758
759 TF_DEBUG(VDF_PBEE_TRACE).Msg(" EndNodeFoundCache(); }\n");
760 continue;
761 }
762
763 // Update the output for SMBL. This refreshes the affective-ness
764 // flag, the lockedCacheMask and the flag that indicates whether
765 // the locked cache should be absorbed into the executor cache.
766 if (enableSMBL && Vdf_IsPoolOutput(*output)) {
767 // Update the top of the output stack. Since no new outputs
768 // have been pushed onto the stack at this point, the top is
769 // still the output we are currently executing.
770 _OutputToExecute *stackTop = &outputsStack.back();
771 if (_UpdateOutputForSMBL(*output, stackTop, schedule)) {
772 affective = stackTop->affective;
773 lockedCacheMask = stackTop->lockedCacheMask;
774 absorbLockedCache = stackTop->absorbLockedCache;
775 }
776 }
777
778 // The first stage of computation is to execute all the
779 // prerequisites for current output. So we push them on our stack
780 // and wait for them to be computed.
781
782 // Mark that we've processed the prerequisites for this output.
783 outputsStack.back().stage = ExecutionStagePreRequisitesDone;
784
785 // Push back all the prerequisites if this output will do anything
786 if (affective) {
787 for (const VdfScheduleInput &input : schedule.GetInputs(node)) {
788 if (input.input->GetSpec().IsPrerequisite()) {
789 added |= _PushBackOutput(
790 &outputsStack, VdfMask(), *input.source, schedule);
791 }
792 }
793 }
794
795 // If we added inputs then we want to go back to the top of the
796 // loop and execute our inputs, otherwise we will fall through to
797 // the next stage.
798 if (added) {
799 break;
800 } // else fall through to the next stage.
801
802 case ExecutionStagePreRequisitesDone:
803
804 // Now that all the prerequisites are done, the second stage
805 // of computation is to use the prerequisites to determine what
806 // other inputs we need to run to satisfy the current output.
807
808 // Mark that all the inputs have now been processed for the
809 // current output.
810 outputsStack.back().stage = ExecutionStageCompute;
811
812 // Note that outputs added are executed in reverse order. So we
813 // push last the nodes that we want to run first.
814
815 // Only run the reads if the output is expected to modify
816 // anything.
817 if (affective) {
818
819 // Get the list of required inputs based on the prerequisite
820 // computations.
821 VdfRequiredInputsPredicate inputsPredicate =
822 node.GetRequiredInputsPredicate(VdfContext(state, node));
823
824 // Run the required reads last.
825 // Here we try to run the "read" inputs after the "read/write"
826 // inputs.
827 if (inputsPredicate.HasRequiredReads()) {
828 for (const VdfScheduleInput &input :
829 schedule.GetInputs(node)) {
830 if (inputsPredicate.IsRequiredRead(*input.input)) {
831 added |= _PushBackOutput(
832 &outputsStack, VdfMask(),
833 *input.source, schedule);
834 }
835 }
836 }
837 }
838
839 // Run the read/writes first, so that we can maximize the chance of
840 // being able to re-use the kept buffers for speculations.
841 for (const VdfScheduleInput &input : schedule.GetInputs(node)) {
842 const VdfOutput *assocOutput =
843 input.input->GetAssociatedOutput();
844 if (!assocOutput) {
845 continue;
846 }
847
848 // Does this output have a pass-through scheduled?
849 const VdfSchedule::OutputId &assocOutputId =
850 schedule.GetOutputId(*assocOutput);
851 if (assocOutputId.IsValid()) {
852 if (const VdfOutput *fromBufferOutput =
853 schedule.GetFromBufferOutput(assocOutputId)) {
854 added |= _PushBackOutput(
855 &outputsStack, lockedCacheMask,
856 *fromBufferOutput, schedule);
857 continue;
858 }
859 }
860
861 // If the associated output is not scheduled, or it does not
862 // have a pass-through scheduled, we need to consider all
863 // connected source outputs!
864 added |= _PushBackOutput(
865 &outputsStack, lockedCacheMask, *input.source, schedule);
866 }
867
868 // If we added inputs then we want to go back to the top of the
869 // loop and execute our inputs, otherwise we will fall through to
870 // the next stage.
871 if (added) {
872 break;
873 } // else fall through to the next stage.
874
875 default:
876
877 // Set a bit indicating that this node has been executed.
878 executedNodes->Set(schedule.GetScheduleNodeIndex(outputId));
879
880 // Compute the node.
881 if (affective) {
882 _ComputeNode(state, node, absorbLockedCache);
883
884 TF_DEBUG(VDF_PBEE_TRACE).Msg(
885 "ComputedNode(\"%s\"); }\n", node.GetDebugName().c_str());
886
887 } else {
888 // The node doesn't have any outputs that need to be computed.
889 // Skip the node passing through the data for read/write
890 // outputs.
891 _PassThroughNode(schedule, node, absorbLockedCache);
892
893 TF_DEBUG(VDF_PBEE_TRACE)
894 .Msg("ComputedNodeInaffective(\"%s\"); }\n",
895 node.GetDebugName().c_str());
896 }
897
898 // Pop the output off the stack, once we are done with it
899 outputsStack.pop_back();
900 }
901 }
902}
903
904template<typename DataManagerType>
905void
907 const VdfEvaluationState &state,
908 const VdfNode &node,
909 bool absorbLockedCache)
910{
911 VDF_PBEE_TRACE_FUNCTION();
912
913 VdfExecutionStats *stats = _executor.GetExecutionStats();
914
916 compute(stats, node, VdfExecutionStats::NodeEvaluateEvent);
917
918 if (stats) {
919 stats->LogTimestamp(VdfExecutionStats::NodeDidComputeEvent, node);
920 }
921
922
923 // The current schedule.
924 const VdfSchedule &schedule = state.GetSchedule();
925
926 // Clear the acceleration structure for output data lookups.
927 _dataHandleCache.clear();
928
929 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
930 const VdfOutput &output = *schedule.GetOutput(outputId);
931
932 // Retrieve the data handle and cache it for accelerated lookup below.
933 const _DataHandle dataHandle =
934 _dataManager->GetOrCreateDataHandle(output.GetId());
935 _dataHandleCache.push_back(dataHandle);
936
937 // Retrieve the buffer data associated with the handle.
938 VdfExecutorBufferData *bufferData =
939 _dataManager->GetBufferData(dataHandle);
940
941 // If this output still contains data (i.e., invalidation did not
942 // remove the cache), it may have been locked and we may want to retain
943 // the data to absorb it shortly.
944 if (absorbLockedCache ||
945 (bufferData->GetExecutorCache() &&
946 _IsNotPassing(output, outputId, schedule))) {
947 bufferData->RetainExecutorCache(
948 output.GetSpec(),
949 _dataManager->GetOrCreateSMBLData(dataHandle));
950 }
951
952 // Before we compute the output, we have to make sure that all
953 // the recipients of its cache are cleared and that the cache is
954 // reclaimed by output.
955 bufferData->ResetExecutorCache();
956
957 // Mark the output as having been touched during evaluation.
958 _dataManager->Touch(dataHandle);
959
960 // If this is a read/write output, make sure the buffer has been
961 // passed down. We also need to set the computed output mask here,
962 // because the node will read input values of read/write inputs
963 // directly at this output.
964 // Note, that on interruption this mask must be reset!
965 if (const VdfInput *ai = output.GetAssociatedInput()) {
966 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
967 _PrepareReadWriteBuffer(dataHandle, *ai, requestMask, schedule);
968 _dataManager->SetComputedOutputMask(bufferData, requestMask);
969 }
970 }
971
972 // Compute the node
973 {
974 VDF_PBEE_TRACE_SCOPE(
975 "VdfPullBasedExecutorEngine<T>::_ComputeNode "
976 "(node callback)");
977
978 node.Compute(VdfContext(state, node));
979 }
980
981 // Has the node been interrupted during execution?
982 const bool hasBeenInterrupted = _GetExecutor().HasBeenInterrupted();
983
984 // Deallocate temporary buffers which the schedule knows can be deallocated
985 // now that this node has run (they will never be read again before they
986 // are deallocated due to invalidation).
987 if (const VdfOutput* ctd = schedule.GetOutputToClear(node)) {
988 // Fetch the data handle directly from _dataManager, rather than
989 // through a virtual method, because we only ever want to eagerly clear
990 // temporary buffers in our own data manager (never a parent's).
991 const _DataHandle dataHandle =
992 _dataManager->GetDataHandle(ctd->GetId());
993 if (_dataManager->IsValidDataHandle(dataHandle)) {
994 _dataManager->GetBufferData(dataHandle)->Reset();
995 }
996 }
997
998 // We now need to mark the computed parts of our vectors.
999 size_t outputIndex = 0;
1000 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
1001 const VdfOutput &output = *schedule.GetOutput(outputId);
1002 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
1003
1004 // Retrieve the data handle from the cache.
1005 const _DataHandle dataHandle = _dataHandleCache[outputIndex++];
1006
1007 // Retrieve the buffer data associated with the handle.
1008 VdfExecutorBufferData *bufferData =
1009 _dataManager->GetBufferData(dataHandle);
1010
1011 // Check to see if the node did indeed produce values for this
1012 // output. We don't want to post warnings for missing output values
1013 // if the node has been interrupted.
1014 if (!hasBeenInterrupted &&
1015 !output.GetAssociatedInput() &&
1016 !bufferData->GetExecutorCache()) {
1017
1018 // This is an output without an associated input that has
1019 // no value even though it was requested. (We know it is
1020 // requested because otherwise, it wouldn't be in the schedule,
1021 // because of VdfScheduler::_RemoveTrivialNodes.)
1022 TF_WARN(
1023 "No value set for output " + output.GetDebugName() +
1024 " of type " + output.GetSpec().GetType().GetTypeName() +
1025 " named " + output.GetName().GetString());
1026
1027 //XXX: This is not 100% right when we use a single data flow
1028 // element to hold multiple values (as we do for shaped
1029 // attributes). FillVector() would need to know that this
1030 // is the case and it would need to know the # of values
1031 // to package into the output. This can happen anywhere
1032 // in the network, but for now, I only added a workaround
1033 // in the EfCopyToPoolNode.
1035 output.GetSpec().GetType(),
1036 requestMask.GetSize(),
1037 _dataManager->GetOrCreateOutputValueForWriting(
1038 output, dataHandle));
1039 }
1040
1041 // If the node has been interrupted, make sure to reset the computed
1042 // output mask: Read/writes will already have their mask set.
1043 _FinalizeComputedOutput(
1044 dataHandle,
1045 requestMask,
1046 hasBeenInterrupted,
1047 _IsNotPassing(output, outputId, schedule));
1048
1049 // Log stats
1050 if (stats) {
1051 const VdfNode& node = output.GetNode();
1052
1053 stats->LogData(
1054 VdfExecutionStats::ElementsProcessedEvent,
1055 node,
1056 schedule.GetAffectsMask(outputId).GetNumSet());
1057 }
1058 }
1059}
1060
1061template<typename DataManagerType>
1062bool
1064 const VdfSchedule &schedule,
1065 const VdfNode &node,
1066 bool absorbLockedCache)
1067{
1068 VDF_PBEE_TRACE_FUNCTION();
1069
1070 bool passedThrough = false;
1071
1072 VdfExecutionStats *stats = _executor.GetExecutionStats();
1074 compute(stats, node, VdfExecutionStats::NodeEvaluateEvent);
1075
1076 VDF_FOR_EACH_SCHEDULED_OUTPUT_ID(outputId, schedule, node) {
1077 const VdfOutput &output = *schedule.GetOutput(outputId);
1078 const VdfMask &requestMask = schedule.GetRequestMask(outputId);
1079
1080 // Retrieve the data handle.
1081 const _DataHandle dataHandle =
1082 _dataManager->GetOrCreateDataHandle(output.GetId());
1083
1084 // Get the buffer data associated with the data handle.
1085 VdfExecutorBufferData *bufferData =
1086 _dataManager->GetBufferData(dataHandle);
1087
1088 // If this output still contains data (i.e., invalidation did not
1089 // remove the cache), it may have been locked and we may want to retain
1090 // the data to absorb it shortly.
1091 if (absorbLockedCache ||
1092 (bufferData->GetExecutorCache()
1093 && _IsNotPassing(output, outputId, schedule))) {
1094 bufferData->RetainExecutorCache(
1095 output.GetSpec(),
1096 _dataManager->GetOrCreateSMBLData(dataHandle));
1097 }
1098
1099 // Before we pass the output data through, we have to make sure that
1100 // all the recipients of its cache are cleared and that the cache is
1101 // reclaimed by output.
1102 bufferData->ResetExecutorCache();
1103
1104 // Marked the output as having been touched during evaluation, in order
1105 // for invalidation to consider this output.
1106 _dataManager->Touch(dataHandle);
1107
1108 if (const VdfOutput *fromBufferOutput =
1109 schedule.GetFromBufferOutput(outputId)) {
1110
1111 _PassOrCopySourceOutputBuffer(
1112 dataHandle, output, *fromBufferOutput, requestMask, schedule);
1113
1114 passedThrough = true;
1115
1116 } else if (const VdfInput *ai = output.GetAssociatedInput()) {
1117
1118 // We better have one and only one connection on this input
1119 // connector. Otherwise we can't pass anything through.
1121
1122 // If the output has an associated input, pass the data through.
1123 _PrepareReadWriteBuffer(dataHandle, *ai, requestMask, schedule);
1124 passedThrough = true;
1125 }
1126
1127 // Finalize the computed output, by merging in any temporary data and
1128 // setting the appropriate computed output mask.
1129 _FinalizeComputedOutput(
1130 dataHandle,
1131 requestMask,
1132 false, /* hasBeenInterrupted */
1133 _IsNotPassing(output, outputId, schedule));
1134 }
1135
1136 return passedThrough;
1137}
1138
1139template<typename DataManagerType>
1140void
1142 const _DataHandle dataHandle,
1143 const VdfMask &requestMask,
1144 const bool hasBeenInterrupted,
1145 const bool extendRequestMask)
1146{
1147 // Retrieve the buffer data associated with the data handle.
1148 VdfExecutorBufferData *bufferData = _dataManager->GetBufferData(dataHandle);
1149
1150 // Merge in temporary data, if available. Note, we must release the
1151 // SMBL data despite any possible interruption!
1152 VdfMask lockedMask =
1153 bufferData->ReleaseExecutorCache(_dataManager->GetSMBLData(dataHandle));
1154
1155 // Has the executor been interrupted? Make sure to reset the computed
1156 // output mask, so that subsequent cache hits do not return garbage data.
1157 if (hasBeenInterrupted) {
1158 _dataManager->SetComputedOutputMask(bufferData, VdfMask());
1159 }
1160
1161 // Otherwise, set the computed output mask to the request mask.
1162 else {
1163 // If extendRequestMask is set and the cache's mask is non-empty,
1164 // copy the bits merge the requestMask and the cacheMask. Otherwise,
1165 // set using the standard requestMask.
1166 _dataManager->SetComputedOutputMask(
1167 bufferData,
1168 extendRequestMask && !lockedMask.IsEmpty() ?
1169 lockedMask | requestMask :
1170 requestMask);
1171 }
1172}
1173
1175
1176PXR_NAMESPACE_CLOSE_SCOPE
1177
1178#endif
Fast bit array that keeps track of the number of bits set and can find the next set in a timely manne...
Definition: bits.h:49
void Set(size_t index)
Sets bit # index to one.
Definition: bits.h:377
bool IsSet(size_t index) const
Returns true, if bit # index is set.
Definition: bits.h:412
Scoped (i.e.
Definition: mallocTag.h:249
std::string const & GetString() const
Return the string that this token represents.
Definition: token.h:190
TF_API const std::string & GetTypeName() const
Return the machine-independent name for this type.
A context is the parameter bundle passed to callbacks of computations.
Definition: context.h:40
This object holds state that remains persistent during one round of network evaluation.
const VdfSchedule & GetSchedule() const
The schedule used for evaluation.
Execution stats profiling event logger.
void LogData(EventType event, const VdfNode &node, EventData data)
Log event API.
void LogTimestamp(EventType event, const VdfNode &node)
Log timestamp API.
static VDF_API void FillVector(TfType type, size_t numElements, VdfVector *vector)
Fills vector with the fallback value registered for the given type.
This object is responsible for storing the executor buffer data, comprised of the executor cache vect...
void ResetExecutorCache(const VdfMask &mask)
Reset the executor cache without releasing any memory and set the executor cache mask to mask.
VDF_API void RetainExecutorCache(const VdfOutputSpec &spec, VdfSMBLData *smblData)
Takes the existing executor cache and retains it within the existing VdfSMBLData object.
const VdfMask & GetExecutorCacheMask() const
Get the available mask.
VdfVector * GetExecutorCache() const
Returns the executor cache stored at this buffer data instance.
VDF_API VdfMask ReleaseExecutorCache(VdfSMBLData *smblData)
Merges the executor cache previously retained in smblData into this cache and releases the SMBL data.
A client may instantiate an object of this class and set it in an executor, to collect errors that ma...
Abstract base class for classes that execute a VdfNetwork to compute a requested set of values.
A VdfInput is used to connect a VdfNode to one or more VdfNodes' outputs.
Definition: input.h:36
size_t GetNumConnections() const
Returns the number of connections for this input.
Definition: input.h:58
const VdfOutput * GetAssociatedOutput() const
Returns the output corresponding to this input.
Definition: input.h:82
A VdfMask is placed on connections to specify the data flowing through them.
Definition: mask.h:37
size_t GetSize() const
Returns the size of the mask.
Definition: mask.h:158
bool IsAnySet() const
Returns true, if there is at least a single set entry.
Definition: mask.h:216
bool IsEmpty() const
Returns true if this mask is empty, i.e.
Definition: mask.h:168
bool Contains(const VdfMask &mask) const
Returns true if mask is a subset-of or equal to this mask, false otherwise.
Definition: mask.h:186
size_t GetNumSet() const
Returns the number of set bits in the mask.
Definition: mask.h:246
Class to hold on to an externally owned output and a mask.
Definition: maskedOutput.h:32
VdfOutput * GetOutput() const
Returns the VdfOutput.
Definition: maskedOutput.h:52
const VdfMask & GetMask() const
Returns the VdfMask.
Definition: maskedOutput.h:64
This is the base class for all nodes in a VdfNetwork.
Definition: node.h:53
VDF_API const std::string GetDebugName() const
Returns the debug name for this node, if one is registered.
virtual VDF_API VdfRequiredInputsPredicate GetRequiredInputsPredicate(const VdfContext &context) const
Returns a predicate, determining whether a given input and its connections are required in order to f...
virtual void Compute(const VdfContext &context) const =0
This is the method called to perform computation.
A VdfOutput represents an output on a node.
Definition: output.h:32
const VdfNode & GetNode() const
Returns the owning node for this output.
Definition: output.h:57
VDF_API std::string GetDebugName() const
Returns the debug name for this output.
VdfId GetId() const
The unique id of this output.
Definition: output.h:100
VDF_API const VdfOutputSpec & GetSpec() const
Returns the connector specification object for this output.
VDF_API const TfToken & GetName() const
Returns the name of this output.
const VdfInput * GetAssociatedInput() const
Returns the in/out connector associated with this output.
Definition: output.h:76
TfType GetType() const
Returns the type of this spec.
Definition: outputSpec.h:60
This class is a collection of common functions used by pulled based executors.
VdfVector * _PassOrCopySourceOutputBuffer(const _DataHandle dataHandle, const VdfOutput &output, const VdfOutput &source, const VdfMask &inputMask, const VdfSchedule &schedule)
Fast path for when we know ahead of time the output from which we wish to pass the buffer (or copy) a...
VdfPullBasedExecutorEngine(const VdfExecutorInterface &executor, DataManagerType *dataManager)
Constructor.
void _ComputeNode(const VdfEvaluationState &state, const VdfNode &node, bool absorbLockedCache=false)
Computes node.
static bool _IsNotPassing(const VdfOutput &output, const VdfSchedule::OutputId &outputId, const VdfSchedule &schedule)
Returns true if the output is associative but does not pass the buffer to another output.
void _PrepareReadWriteBuffer(const _DataHandle dataHandle, const VdfInput &input, const VdfMask &mask, const VdfSchedule &schedule)
Prepares a buffer for a read/write output.
void RunSchedule(const VdfSchedule &schedule, const VdfRequest &computeRequest, VdfExecutorErrorLogger *errorLogger)
Executes the given schedule with a computeRequest and an optional /p errorLogger.
_ExecutionStage
This enum describes the stages that a node goes through in execution.
DataManagerType * _GetDataManager()
Returns the data manager used by this engine.
VdfVector * _CopyCache(const VdfOutput &toOutput, VdfExecutorBufferData *toBuffer, const VdfOutput &fromOutput, const VdfMask &fromMask) const
Helper method to _PrepareReadWriteBuffer that copies the cache from fromOutput to toOutput.
const VdfExecutorInterface & _GetExecutor()
Returns the executor running this engine.
VdfVector * _PassOrCopyBufferInternal(const _DataHandle dataHandle, const VdfOutput &output, const VdfOutput &source, const VdfMask &inputMask, const VdfSchedule &schedule) const
Common method for _PrepareReadWriteBuffer and _PassOrCopySourceOutputBuffer that attempts to pass the...
bool _PassThroughNode(const VdfSchedule &schedule, const VdfNode &node, bool absorbLockedCache=false)
Causes the outputs with associated inputs in node to have their data passed through.
DataManagerType::DataHandle _DataHandle
The data handle type from the data manager implementation.
VdfSpeculationExecutorEngine< DataManagerType > SpeculationExecutorEngine
The equivalent speculation executor engine.
This predicate determines whether a given input value is needed to fulfill the input dependencies req...
bool IsRequiredRead(const VdfInput &input) const
Is this input a required read? Note that read/writes as well as prerequisite inputs are not required ...
bool HasRequiredReads() const
Are any inputs required?
VdfSMBLData holds per-output data that is meant to be consumed by the executor.
Definition: smblData.h:31
void RemoveUncachedMask(VdfMask *lockedCacheMask, const VdfMask &cacheMask, const VdfMask &keepMask)
Make sure that all the bits in the keepMask are provided by the cacheMask.
Definition: smblData.h:250
bool ComputeAffectiveness(const VdfMask &lockedCacheMask, const VdfMask &affectsMask)
Computes the affectiveness of the corresponding output given the accumulated lockedCacheMask and the ...
Definition: smblData.h:87
void ExtendLockedCacheMask(VdfMask *lockedCacheMask, const VdfMask &cacheMask)
Extends the lockedCacheMask by appending the bits stored in the executor cacheMask.
Definition: smblData.h:65
An OutputId is a small key object that, once obtained for a particular VdfOutput, can be used to quer...
Definition: schedule.h:91
bool IsValid() const
Returns whether this OutputId can be used to make queries about an output's scheduling.
Definition: schedule.h:97
Contains a specification of how to execute a particular VdfNetwork.
Definition: schedule.h:41
VDF_API const VdfMask & GetAffectsMask(const OutputId &outputId) const
Returns the affects mask associated with the given OutputId.
const VdfNetwork * GetNetwork() const
Returns the network for this schedule.
Definition: schedule.h:178
VDF_API OutputId GetOutputId(const VdfOutput &output) const
Returns a small, cheap OutputId, which can be passed to other Get* methods in this class to efficient...
VDF_API const VdfMask & GetRequestMask(const OutputId &outputId) const
Returns the request mask associated with the given OutputId.
VDF_API const VdfOutput * GetFromBufferOutput(const OutputId &outputId) const
Returns the "from buffer's" output associated with the given OutputId.
VDF_API InputsRange GetInputs(const VdfNode &node) const
Returns a range of inputs scheduled for the given node.
int GetScheduleNodeIndex(const OutputId &outputId) const
Returns the node index of the schedule node associated with the given outputId.
Definition: schedule.h:514
VDF_API const VdfMask & GetKeepMask(const OutputId &outputId) const
Returns the keep mask associated with the given OutputId.
VDF_API const VdfOutput * GetPassToOutput(const OutputId &outputId) const
Returns the "pass to" output associated with the given OutputId.
VDF_API const VdfNode * GetNode(const OutputId &outputId) const
Returns the VdfNode that owns the VdfOutput associated with the given outputId.
VDF_API const VdfOutput * GetOutputToClear(const VdfNode &node) const
Returns the output whose temporary buffer can be immediately deallocated after node has finished exec...
ScheduleNodeVector & GetScheduleNodeVector()
Returns the vector of schedule nodes in this schedule.
Definition: schedule.h:503
VDF_API bool IsAffective(const OutputId &outputId) const
Returns true if the output is expected to have an effect on its corresponding input,...
VDF_API const VdfOutput * GetOutput(const OutputId &outputId) const
Returns the scheduled VdfOutput associated with the given OutputId.
bool HasSMBL() const
Returns true if this schedule participates in sparse mung buffer locking.
Definition: schedule.h:353
This class provides an executor engine to the speculation executor.
This class is used to abstract away knowledge of the cache data used for each node.
Definition: vector.h:56
void Copy(const VdfVector &rhs, const VdfMask &mask)
Copies the contents of rhs into this vector.
Definition: vector.h:274
unsigned int VdfInvalidationTimestamp
Type of the timestamp that identifies the most recent round of invalidation.
Definition: types.h:74
#define TF_DEBUG(enumVal)
Evaluate and print debugging message msg if enumVal is enabled for debugging.
Definition: debug.h:484
#define TF_DEV_AXIOM(cond)
The same as TF_AXIOM, but compiled only in dev builds.
Definition: diagnostic.h:205
#define TF_WARN(...)
Issue a warning, but continue execution.
Definition: diagnostic.h:132
VDF_API const VdfOutput * VdfGetAssociatedSourceOutput(const VdfOutput &output)
Returns the output that is the source of the associated input of output, if any and NULL otherwise.
bool Vdf_IsPoolOutput(const VdfOutput &output)
Returns true if output is a pool output, i.e., an output that has an associated input,...
This class contains scheduling information for an input.
Definition: scheduleNode.h:73
Scoped event that automatically pushes and pops malloc tags for the given VdfNode.