10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
13 #include "./InternalHeaderCheck.h"
29 template<
typename Derived,
typename Device>
32 typedef typename Derived::Index Index;
33 typedef typename Derived::Scalar Scalar;
34 typedef typename Derived::Scalar CoeffReturnType;
35 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
36 typedef typename Derived::Dimensions Dimensions;
37 typedef Derived XprType;
38 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
39 typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
40 typedef StorageMemory<Scalar, Device> Storage;
41 typedef typename Storage::Type EvaluatorPointerType;
44 static constexpr
int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
45 internal::traits<Derived>::NumDimensions : 0;
46 static constexpr
int Layout = Derived::Layout;
49 IsAligned = Derived::IsAligned,
50 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
51 BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
52 PreferBlockAccess =
false,
53 CoordAccess = NumCoords > 0,
57 typedef std::remove_const_t<Scalar> ScalarNoConst;
60 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
61 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
63 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
68 EIGEN_STRONG_INLINE
TensorEvaluator(
const Derived& m,
const Device& device)
69 : m_data(device.get((
const_cast<TensorPointerType
>(m.data())))),
70 m_dims(m.dimensions()),
75 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
77 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
78 if (!
NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
79 m_device.memcpy((
void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
85 #ifdef EIGEN_USE_THREADS
86 template <
typename EvalSubExprsCallback>
87 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
88 EvaluatorPointerType dest, EvalSubExprsCallback done) {
90 done(evalSubExprsIfNeeded(dest));
94 EIGEN_STRONG_INLINE
void cleanup() {}
96 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
97 eigen_assert(m_data != NULL);
101 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
102 eigen_assert(m_data != NULL);
103 return m_data[index];
106 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
107 PacketReturnType packet(Index index)
const
109 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
117 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
118 std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
119 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
121 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
124 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
125 void writePacket(Index index,
const PacketReturnType& x)
127 return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
131 eigen_assert(m_data != NULL);
132 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
133 return m_data[m_dims.IndexOfColMajor(coords)];
135 return m_data[m_dims.IndexOfRowMajor(coords)];
139 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
140 coeffRef(
const array<DenseIndex, NumCoords>& coords) {
141 eigen_assert(m_data != NULL);
142 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
143 return m_data[m_dims.IndexOfColMajor(coords)];
145 return m_data[m_dims.IndexOfRowMajor(coords)];
149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
150 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
151 PacketType<CoeffReturnType, Device>::size);
154 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
155 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
156 return internal::TensorBlockResourceRequirements::any();
159 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
160 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
161 bool =
false)
const {
162 assert(m_data != NULL);
163 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
166 template<
typename TensorBlock>
167 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
168 const TensorBlockDesc& desc,
const TensorBlock& block) {
169 assert(m_data != NULL);
172 typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
176 TensorBlockAssign::Run(
177 TensorBlockAssign::target(desc.dimensions(),
178 internal::strides<Layout>(m_dims), m_data,
183 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
185 #ifdef EIGEN_USE_SYCL
187 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
192 EvaluatorPointerType m_data;
194 const Device EIGEN_DEVICE_REF m_device;
198 template <
typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
199 T loadConstant(
const T* address) {
203 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
204 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
205 float loadConstant(
const float* address) {
206 return __ldg(address);
208 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
209 double loadConstant(
const double* address) {
210 return __ldg(address);
212 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
213 Eigen::half loadConstant(
const Eigen::half* address) {
214 return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
217 #ifdef EIGEN_USE_SYCL
219 template <cl::sycl::access::mode AcMd,
typename T>
220 T &loadConstant(
const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
227 template<
typename Derived,
typename Device>
228 struct TensorEvaluator<const Derived, Device>
230 typedef typename Derived::Index Index;
231 typedef typename Derived::Scalar Scalar;
232 typedef typename Derived::Scalar CoeffReturnType;
233 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
234 typedef typename Derived::Dimensions Dimensions;
235 typedef const Derived XprType;
236 typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
237 typedef StorageMemory<const Scalar, Device> Storage;
238 typedef typename Storage::Type EvaluatorPointerType;
240 typedef std::remove_const_t<Scalar> ScalarNoConst;
243 static constexpr
int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
244 internal::traits<Derived>::NumDimensions : 0;
245 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
246 static constexpr
int Layout = Derived::Layout;
249 IsAligned = Derived::IsAligned,
250 PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
251 BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
252 PreferBlockAccess =
false,
253 CoordAccess = NumCoords > 0,
258 typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
259 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
261 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
266 EIGEN_STRONG_INLINE TensorEvaluator(
const Derived& m,
const Device& device)
267 : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
270 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dims; }
272 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
273 if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
274 m_device.memcpy((
void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() *
sizeof(Scalar));
280 #ifdef EIGEN_USE_THREADS
281 template <
typename EvalSubExprsCallback>
282 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
283 EvaluatorPointerType dest, EvalSubExprsCallback done) {
285 done(evalSubExprsIfNeeded(dest));
289 EIGEN_STRONG_INLINE
void cleanup() { }
291 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const {
292 eigen_assert(m_data != NULL);
293 return internal::loadConstant(m_data+index);
296 template<
int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
297 PacketReturnType packet(Index index)
const
299 return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
307 template <
typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
308 std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
309 partialPacket(Index index,
typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask)
const
311 return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
314 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
const array<DenseIndex, NumCoords>& coords)
const {
315 eigen_assert(m_data != NULL);
316 const Index index = (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) ? m_dims.IndexOfColMajor(coords)
317 : m_dims.IndexOfRowMajor(coords);
318 return internal::loadConstant(m_data+index);
321 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
322 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
323 PacketType<CoeffReturnType, Device>::size);
326 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
327 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
328 return internal::TensorBlockResourceRequirements::any();
331 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
332 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
333 bool =
false)
const {
334 assert(m_data != NULL);
335 return TensorBlock::materialize(m_data, m_dims, desc, scratch);
338 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return m_data; }
339 #ifdef EIGEN_USE_SYCL
341 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
346 EvaluatorPointerType m_data;
348 const Device EIGEN_DEVICE_REF m_device;
356 template<
typename NullaryOp,
typename ArgType,
typename Device>
357 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
359 typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
361 TensorEvaluator(
const XprType& op,
const Device& device)
362 : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
365 typedef typename XprType::Index
Index;
366 typedef typename XprType::Scalar Scalar;
367 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
368 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
369 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
370 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
371 typedef StorageMemory<CoeffReturnType, Device> Storage;
372 typedef typename Storage::Type EvaluatorPointerType;
374 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
377 PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
378 #ifdef EIGEN_USE_SYCL
379 && (PacketType<CoeffReturnType, Device>::size >1)
383 PreferBlockAccess =
false,
389 typedef internal::TensorBlockNotImplemented TensorBlock;
392 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
394 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
return true; }
396 #ifdef EIGEN_USE_THREADS
397 template <
typename EvalSubExprsCallback>
398 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
399 EvaluatorPointerType, EvalSubExprsCallback done) {
404 EIGEN_STRONG_INLINE
void cleanup() { }
406 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
408 return m_wrapper(m_functor, index);
411 template<
int LoadMode>
412 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
414 return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
417 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
418 costPerCoeff(
bool vectorized)
const {
419 return TensorOpCost(
sizeof(CoeffReturnType), 0, 0, vectorized,
420 PacketType<CoeffReturnType, Device>::size);
423 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
425 #ifdef EIGEN_USE_SYCL
427 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
433 const NullaryOp m_functor;
434 TensorEvaluator<ArgType, Device> m_argImpl;
435 const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
442 template<
typename UnaryOp,
typename ArgType,
typename Device>
443 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
445 typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
447 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
449 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
450 PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
451 int(internal::functor_traits<UnaryOp>::PacketAccess),
452 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
453 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
458 TensorEvaluator(
const XprType& op,
const Device& device)
460 m_functor(op.functor()),
461 m_argImpl(op.nestedExpression(), device)
464 typedef typename XprType::Index
Index;
465 typedef typename XprType::Scalar Scalar;
466 typedef std::remove_const_t<Scalar> ScalarNoConst;
467 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
468 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
469 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
470 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
471 typedef StorageMemory<CoeffReturnType, Device> Storage;
472 typedef typename Storage::Type EvaluatorPointerType;
473 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
476 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
477 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
479 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
482 typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
486 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const {
return m_argImpl.dimensions(); }
488 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
489 m_argImpl.evalSubExprsIfNeeded(NULL);
493 #ifdef EIGEN_USE_THREADS
494 template <
typename EvalSubExprsCallback>
495 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
496 EvaluatorPointerType, EvalSubExprsCallback done) {
497 m_argImpl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
501 EIGEN_STRONG_INLINE
void cleanup() {
505 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
507 return m_functor(m_argImpl.coeff(index));
510 template<
int LoadMode>
511 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
513 return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
516 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
517 const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
518 return m_argImpl.costPerCoeff(vectorized) +
519 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
522 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
523 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
524 static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
525 return m_argImpl.getResourceRequirements().addCostPerCoeff(
526 {0, 0, functor_cost / PacketSize});
529 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
530 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
531 bool =
false)
const {
532 return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
535 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
537 #ifdef EIGEN_USE_SYCL
539 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const{
546 const Device EIGEN_DEVICE_REF m_device;
547 const UnaryOp m_functor;
548 TensorEvaluator<ArgType, Device> m_argImpl;
554 template<
typename BinaryOp,
typename LeftArgType,
typename RightArgType,
typename Device>
555 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
557 typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
559 static constexpr
int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
561 IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
562 int(TensorEvaluator<RightArgType, Device>::IsAligned),
563 PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
564 int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
565 int(internal::functor_traits<BinaryOp>::PacketAccess),
566 BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
567 int(TensorEvaluator<RightArgType, Device>::BlockAccess),
568 PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
569 int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
574 TensorEvaluator(
const XprType& op,
const Device& device)
576 m_functor(op.functor()),
577 m_leftImpl(op.lhsExpression(), device),
578 m_rightImpl(op.rhsExpression(), device)
580 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
581 eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
584 typedef typename XprType::Index
Index;
585 typedef typename XprType::Scalar Scalar;
586 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
587 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
588 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
589 typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
590 typedef StorageMemory<CoeffReturnType, Device> Storage;
591 typedef typename Storage::Type EvaluatorPointerType;
593 static constexpr
int NumDims = internal::array_size<
594 typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
597 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
598 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
600 typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
602 typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
605 typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
610 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
613 return m_leftImpl.dimensions();
616 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
617 m_leftImpl.evalSubExprsIfNeeded(NULL);
618 m_rightImpl.evalSubExprsIfNeeded(NULL);
622 #ifdef EIGEN_USE_THREADS
623 template <
typename EvalSubExprsCallback>
624 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
625 EvaluatorPointerType, EvalSubExprsCallback done) {
627 m_leftImpl.evalSubExprsIfNeededAsync(
nullptr, [
this, done](
bool) {
628 m_rightImpl.evalSubExprsIfNeededAsync(
nullptr,
629 [done](
bool) { done(
true); });
634 EIGEN_STRONG_INLINE
void cleanup() {
635 m_leftImpl.cleanup();
636 m_rightImpl.cleanup();
639 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
641 return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
643 template<
int LoadMode>
644 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
646 return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
649 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
650 costPerCoeff(
bool vectorized)
const {
651 const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
652 return m_leftImpl.costPerCoeff(vectorized) +
653 m_rightImpl.costPerCoeff(vectorized) +
654 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
657 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
658 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
659 static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
660 return internal::TensorBlockResourceRequirements::merge(
661 m_leftImpl.getResourceRequirements(),
662 m_rightImpl.getResourceRequirements())
663 .addCostPerCoeff({0, 0, functor_cost / PacketSize});
666 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
667 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
668 bool =
false)
const {
669 desc.DropDestinationBuffer();
670 return TensorBlock(m_leftImpl.block(desc, scratch),
671 m_rightImpl.block(desc, scratch), m_functor);
674 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
676 #ifdef EIGEN_USE_SYCL
678 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
679 m_leftImpl.bind(cgh);
680 m_rightImpl.bind(cgh);
684 const Device EIGEN_DEVICE_REF m_device;
685 const BinaryOp m_functor;
686 TensorEvaluator<LeftArgType, Device> m_leftImpl;
687 TensorEvaluator<RightArgType, Device> m_rightImpl;
692 template<
typename TernaryOp,
typename Arg1Type,
typename Arg2Type,
typename Arg3Type,
typename Device>
693 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
695 typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
697 static constexpr
int Layout = TensorEvaluator<Arg1Type, Device>::Layout;
699 IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
700 PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
701 TensorEvaluator<Arg2Type, Device>::PacketAccess &&
702 TensorEvaluator<Arg3Type, Device>::PacketAccess &&
703 internal::functor_traits<TernaryOp>::PacketAccess,
705 PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
706 TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
707 TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
712 TensorEvaluator(
const XprType& op,
const Device& device)
713 : m_functor(op.functor()),
714 m_arg1Impl(op.arg1Expression(), device),
715 m_arg2Impl(op.arg2Expression(), device),
716 m_arg3Impl(op.arg3Expression(), device)
718 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) ==
static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
720 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
721 typename internal::traits<Arg2Type>::StorageKind>::value),
722 STORAGE_KIND_MUST_MATCH)
723 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::StorageKind,
724 typename internal::traits<Arg3Type>::StorageKind>::value),
725 STORAGE_KIND_MUST_MATCH)
726 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
727 typename internal::traits<Arg2Type>::Index>::value),
728 STORAGE_INDEX_MUST_MATCH)
729 EIGEN_STATIC_ASSERT((internal::is_same<
typename internal::traits<Arg1Type>::Index,
730 typename internal::traits<Arg3Type>::Index>::value),
731 STORAGE_INDEX_MUST_MATCH)
733 eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
736 typedef typename XprType::Index
Index;
737 typedef typename XprType::Scalar Scalar;
738 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
739 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
740 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
741 typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
742 typedef StorageMemory<CoeffReturnType, Device> Storage;
743 typedef typename Storage::Type EvaluatorPointerType;
746 typedef internal::TensorBlockNotImplemented TensorBlock;
749 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
752 return m_arg1Impl.dimensions();
755 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
756 m_arg1Impl.evalSubExprsIfNeeded(NULL);
757 m_arg2Impl.evalSubExprsIfNeeded(NULL);
758 m_arg3Impl.evalSubExprsIfNeeded(NULL);
761 EIGEN_STRONG_INLINE
void cleanup() {
762 m_arg1Impl.cleanup();
763 m_arg2Impl.cleanup();
764 m_arg3Impl.cleanup();
767 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
769 return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
771 template<
int LoadMode>
772 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
774 return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
775 m_arg2Impl.template packet<LoadMode>(index),
776 m_arg3Impl.template packet<LoadMode>(index));
779 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
780 costPerCoeff(
bool vectorized)
const {
781 const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
782 return m_arg1Impl.costPerCoeff(vectorized) +
783 m_arg2Impl.costPerCoeff(vectorized) +
784 m_arg3Impl.costPerCoeff(vectorized) +
785 TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
788 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
790 #ifdef EIGEN_USE_SYCL
792 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
793 m_arg1Impl.bind(cgh);
794 m_arg2Impl.bind(cgh);
795 m_arg3Impl.bind(cgh);
800 const TernaryOp m_functor;
801 TensorEvaluator<Arg1Type, Device> m_arg1Impl;
802 TensorEvaluator<Arg2Type, Device> m_arg2Impl;
803 TensorEvaluator<Arg3Type, Device> m_arg3Impl;
809 template<
typename IfArgType,
typename ThenArgType,
typename ElseArgType,
typename Device>
810 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
812 typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
813 typedef typename XprType::Scalar Scalar;
815 static constexpr
int Layout = TensorEvaluator<IfArgType, Device>::Layout;
817 IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
818 TensorEvaluator<ElseArgType, Device>::IsAligned,
819 PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
820 TensorEvaluator<ElseArgType, Device>::PacketAccess &
821 PacketType<Scalar, Device>::HasBlend,
822 BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
823 TensorEvaluator<ThenArgType, Device>::BlockAccess &&
824 TensorEvaluator<ElseArgType, Device>::BlockAccess,
825 PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
826 TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
827 TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
832 TensorEvaluator(
const XprType& op,
const Device& device)
833 : m_condImpl(op.ifExpression(), device),
834 m_thenImpl(op.thenExpression(), device),
835 m_elseImpl(op.elseExpression(), device)
837 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
838 EIGEN_STATIC_ASSERT((
static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
839 eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
840 eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
843 typedef typename XprType::Index
Index;
844 typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
845 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
846 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
847 typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
848 typedef StorageMemory<CoeffReturnType, Device> Storage;
849 typedef typename Storage::Type EvaluatorPointerType;
851 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
854 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
855 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
857 typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
859 typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
861 typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
864 struct TensorSelectOpBlockFactory {
865 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
867 typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
870 template <
typename IfArgXprType,
typename ThenArgXprType,
typename ElseArgXprType>
871 typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
872 const IfArgXprType& if_expr,
const ThenArgXprType& then_expr,
const ElseArgXprType& else_expr)
const {
873 return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
877 typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
878 IfArgTensorBlock, ThenArgTensorBlock,
883 EIGEN_DEVICE_FUNC
const Dimensions& dimensions()
const
886 return m_condImpl.dimensions();
889 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
890 m_condImpl.evalSubExprsIfNeeded(NULL);
891 m_thenImpl.evalSubExprsIfNeeded(NULL);
892 m_elseImpl.evalSubExprsIfNeeded(NULL);
896 #ifdef EIGEN_USE_THREADS
897 template <
typename EvalSubExprsCallback>
898 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
899 EvaluatorPointerType, EvalSubExprsCallback done) {
900 m_condImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
901 m_thenImpl.evalSubExprsIfNeeded(
nullptr, [
this, done](
bool) {
902 m_elseImpl.evalSubExprsIfNeeded(
nullptr, [done](
bool) { done(
true); });
908 EIGEN_STRONG_INLINE
void cleanup() {
909 m_condImpl.cleanup();
910 m_thenImpl.cleanup();
911 m_elseImpl.cleanup();
914 EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index)
const
916 return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
918 template<
int LoadMode>
919 EIGEN_DEVICE_FUNC PacketReturnType packet(Index index)
const
921 internal::Selector<PacketSize> select;
923 for (Index i = 0; i < PacketSize; ++i) {
924 select.select[i] = m_condImpl.coeff(index+i);
926 return internal::pblend(select,
927 m_thenImpl.template packet<LoadMode>(index),
928 m_elseImpl.template packet<LoadMode>(index));
932 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
933 costPerCoeff(
bool vectorized)
const {
934 return m_condImpl.costPerCoeff(vectorized) +
935 m_thenImpl.costPerCoeff(vectorized)
936 .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
939 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
940 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
941 auto then_req = m_thenImpl.getResourceRequirements();
942 auto else_req = m_elseImpl.getResourceRequirements();
945 internal::TensorBlockResourceRequirements::merge(then_req, else_req);
946 merged_req.cost_per_coeff =
947 then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
949 return internal::TensorBlockResourceRequirements::merge(
950 m_condImpl.getResourceRequirements(), merged_req);
953 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
954 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
955 bool =
false)
const {
958 desc.DropDestinationBuffer();
961 m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
962 m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
965 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
967 #ifdef EIGEN_USE_SYCL
969 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
970 m_condImpl.bind(cgh);
971 m_thenImpl.bind(cgh);
972 m_elseImpl.bind(cgh);
976 TensorEvaluator<IfArgType, Device> m_condImpl;
977 TensorEvaluator<ThenArgType, Device> m_thenImpl;
978 TensorEvaluator<ElseArgType, Device> m_elseImpl;
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:32
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31