Eigen-unsupported  3.4.90 (git rev 67eeba6e720c5745abc77ae6c92ce0a44aa7b7ae)
TensorEvaluator.h
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
28 // Generic evaluator
29 template<typename Derived, typename Device>
31 {
32  typedef typename Derived::Index Index;
33  typedef typename Derived::Scalar Scalar;
34  typedef typename Derived::Scalar CoeffReturnType;
35  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
36  typedef typename Derived::Dimensions Dimensions;
37  typedef Derived XprType;
38  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
39  typedef typename internal::traits<Derived>::template MakePointer<Scalar>::Type TensorPointerType;
40  typedef StorageMemory<Scalar, Device> Storage;
41  typedef typename Storage::Type EvaluatorPointerType;
42 
43  // NumDimensions is -1 for variable dim tensors
44  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
45  internal::traits<Derived>::NumDimensions : 0;
46  static constexpr int Layout = Derived::Layout;
47 
48  enum {
49  IsAligned = Derived::IsAligned,
50  PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
51  BlockAccess = internal::is_arithmetic<std::remove_const_t<Scalar>>::value,
52  PreferBlockAccess = false,
53  CoordAccess = NumCoords > 0,
54  RawAccess = true
55  };
56 
57  typedef std::remove_const_t<Scalar> ScalarNoConst;
58 
59  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
60  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
61  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
62 
63  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
64  Layout, Index>
65  TensorBlock;
66  //===--------------------------------------------------------------------===//
67 
68  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
69  : m_data(device.get((const_cast<TensorPointerType>(m.data())))),
70  m_dims(m.dimensions()),
71  m_device(device)
72  { }
73 
74 
75  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
76 
77  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest) {
78  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && dest) {
79  m_device.memcpy((void*)(m_device.get(dest)), m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
80  return false;
81  }
82  return true;
83  }
84 
85 #ifdef EIGEN_USE_THREADS
86  template <typename EvalSubExprsCallback>
87  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
88  EvaluatorPointerType dest, EvalSubExprsCallback done) {
89  // TODO(ezhulenev): ThreadPoolDevice memcpy is blockign operation.
90  done(evalSubExprsIfNeeded(dest));
91  }
92 #endif // EIGEN_USE_THREADS
93 
94  EIGEN_STRONG_INLINE void cleanup() {}
95 
96  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
97  eigen_assert(m_data != NULL);
98  return m_data[index];
99  }
100 
101  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) {
102  eigen_assert(m_data != NULL);
103  return m_data[index];
104  }
105 
106  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
107  PacketReturnType packet(Index index) const
108  {
109  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
110  }
111 
112  // Return a packet starting at `index` where `umask` specifies which elements
113  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
114  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
115  // float element will be loaded, otherwise 0 will be loaded.
116  // Function has been templatized to enable Sfinae.
117  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
118  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
119  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
120  {
121  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
122  }
123 
124  template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
125  void writePacket(Index index, const PacketReturnType& x)
126  {
127  return internal::pstoret<Scalar, PacketReturnType, StoreMode>(m_data + index, x);
128  }
129 
130  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
131  eigen_assert(m_data != NULL);
132  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
133  return m_data[m_dims.IndexOfColMajor(coords)];
134  } else {
135  return m_data[m_dims.IndexOfRowMajor(coords)];
136  }
137  }
138 
139  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType&
140  coeffRef(const array<DenseIndex, NumCoords>& coords) {
141  eigen_assert(m_data != NULL);
142  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
143  return m_data[m_dims.IndexOfColMajor(coords)];
144  } else {
145  return m_data[m_dims.IndexOfRowMajor(coords)];
146  }
147  }
148 
149  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
150  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
151  PacketType<CoeffReturnType, Device>::size);
152  }
153 
154  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
155  internal::TensorBlockResourceRequirements getResourceRequirements() const {
156  return internal::TensorBlockResourceRequirements::any();
157  }
158 
159  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
160  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
161  bool /*root_of_expr_ast*/ = false) const {
162  assert(m_data != NULL);
163  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
164  }
165 
166  template<typename TensorBlock>
167  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
168  const TensorBlockDesc& desc, const TensorBlock& block) {
169  assert(m_data != NULL);
170 
171  typedef typename TensorBlock::XprType TensorBlockExpr;
172  typedef internal::TensorBlockAssignment<Scalar, NumCoords, TensorBlockExpr,
173  Index>
174  TensorBlockAssign;
175 
176  TensorBlockAssign::Run(
177  TensorBlockAssign::target(desc.dimensions(),
178  internal::strides<Layout>(m_dims), m_data,
179  desc.offset()),
180  block.expr());
181  }
182 
183  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
184 
185 #ifdef EIGEN_USE_SYCL
186  // binding placeholder accessors to a command group handler for SYCL
187  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
188  m_data.bind(cgh);
189  }
190 #endif
191  protected:
192  EvaluatorPointerType m_data;
193  Dimensions m_dims;
194  const Device EIGEN_DEVICE_REF m_device;
195 };
196 
197 namespace internal {
198 template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
199 T loadConstant(const T* address) {
200  return *address;
201 }
202 // Use the texture cache on CUDA devices whenever possible
203 #if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
204 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
205 float loadConstant(const float* address) {
206  return __ldg(address);
207 }
208 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
209 double loadConstant(const double* address) {
210  return __ldg(address);
211 }
212 template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
213 Eigen::half loadConstant(const Eigen::half* address) {
214  return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
215 }
216 #endif
217 #ifdef EIGEN_USE_SYCL
218 // overload of load constant should be implemented here based on range access
219 template <cl::sycl::access::mode AcMd, typename T>
220 T &loadConstant(const Eigen::TensorSycl::internal::RangeAccess<AcMd, T> &address) {
221  return *address;
222 }
223 #endif
224 } // namespace internal
225 
226 // Default evaluator for rvalues
227 template<typename Derived, typename Device>
228 struct TensorEvaluator<const Derived, Device>
229 {
230  typedef typename Derived::Index Index;
231  typedef typename Derived::Scalar Scalar;
232  typedef typename Derived::Scalar CoeffReturnType;
233  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
234  typedef typename Derived::Dimensions Dimensions;
235  typedef const Derived XprType;
236  typedef typename internal::traits<Derived>::template MakePointer<const Scalar>::Type TensorPointerType;
237  typedef StorageMemory<const Scalar, Device> Storage;
238  typedef typename Storage::Type EvaluatorPointerType;
239 
240  typedef std::remove_const_t<Scalar> ScalarNoConst;
241 
242  // NumDimensions is -1 for variable dim tensors
243  static constexpr int NumCoords = internal::traits<Derived>::NumDimensions > 0 ?
244  internal::traits<Derived>::NumDimensions : 0;
245  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
246  static constexpr int Layout = Derived::Layout;
247 
248  enum {
249  IsAligned = Derived::IsAligned,
250  PacketAccess = (PacketType<CoeffReturnType, Device>::size > 1),
251  BlockAccess = internal::is_arithmetic<ScalarNoConst>::value,
252  PreferBlockAccess = false,
253  CoordAccess = NumCoords > 0,
254  RawAccess = true
255  };
256 
257  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
258  typedef internal::TensorBlockDescriptor<NumCoords, Index> TensorBlockDesc;
259  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
260 
261  typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumCoords,
262  Layout, Index>
263  TensorBlock;
264  //===--------------------------------------------------------------------===//
265 
266  EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device)
267  : m_data(device.get(m.data())), m_dims(m.dimensions()), m_device(device)
268  { }
269 
270  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; }
271 
272  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
273  if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization && data) {
274  m_device.memcpy((void*)(m_device.get(data)),m_device.get(m_data), m_dims.TotalSize() * sizeof(Scalar));
275  return false;
276  }
277  return true;
278  }
279 
280 #ifdef EIGEN_USE_THREADS
281  template <typename EvalSubExprsCallback>
282  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
283  EvaluatorPointerType dest, EvalSubExprsCallback done) {
284  // TODO(ezhulenev): ThreadPoolDevice memcpy is a blockign operation.
285  done(evalSubExprsIfNeeded(dest));
286  }
287 #endif // EIGEN_USE_THREADS
288 
289  EIGEN_STRONG_INLINE void cleanup() { }
290 
291  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
292  eigen_assert(m_data != NULL);
293  return internal::loadConstant(m_data+index);
294  }
295 
296  template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
297  PacketReturnType packet(Index index) const
298  {
299  return internal::ploadt_ro<PacketReturnType, LoadMode>(m_data + index);
300  }
301 
302  // Return a packet starting at `index` where `umask` specifies which elements
303  // have to be loaded. Type/size of mask depends on PacketReturnType, e.g. for
304  // Packet16f, `umask` is of type uint16_t and if a bit is 1, corresponding
305  // float element will be loaded, otherwise 0 will be loaded.
306  // Function has been templatized to enable Sfinae.
307  template <typename PacketReturnTypeT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
308  std::enable_if_t<internal::unpacket_traits<PacketReturnTypeT>::masked_load_available, PacketReturnTypeT>
309  partialPacket(Index index, typename internal::unpacket_traits<PacketReturnTypeT>::mask_t umask) const
310  {
311  return internal::ploadu<PacketReturnTypeT>(m_data + index, umask);
312  }
313 
314  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array<DenseIndex, NumCoords>& coords) const {
315  eigen_assert(m_data != NULL);
316  const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
317  : m_dims.IndexOfRowMajor(coords);
318  return internal::loadConstant(m_data+index);
319  }
320 
321  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
322  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
323  PacketType<CoeffReturnType, Device>::size);
324  }
325 
326  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
327  internal::TensorBlockResourceRequirements getResourceRequirements() const {
328  return internal::TensorBlockResourceRequirements::any();
329  }
330 
331  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
332  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
333  bool /*root_of_expr_ast*/ = false) const {
334  assert(m_data != NULL);
335  return TensorBlock::materialize(m_data, m_dims, desc, scratch);
336  }
337 
338  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_data; }
339 #ifdef EIGEN_USE_SYCL
340  // binding placeholder accessors to a command group handler for SYCL
341  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
342  m_data.bind(cgh);
343  }
344 #endif
345  protected:
346  EvaluatorPointerType m_data;
347  Dimensions m_dims;
348  const Device EIGEN_DEVICE_REF m_device;
349 };
350 
351 
352 
353 
354 // -------------------- CwiseNullaryOp --------------------
355 
356 template<typename NullaryOp, typename ArgType, typename Device>
357 struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
358 {
359  typedef TensorCwiseNullaryOp<NullaryOp, ArgType> XprType;
360 
361  TensorEvaluator(const XprType& op, const Device& device)
362  : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
363  { }
364 
365  typedef typename XprType::Index Index;
366  typedef typename XprType::Scalar Scalar;
367  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
368  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
369  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
370  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
371  typedef StorageMemory<CoeffReturnType, Device> Storage;
372  typedef typename Storage::Type EvaluatorPointerType;
373 
374  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
375  enum {
376  IsAligned = true,
377  PacketAccess = internal::functor_traits<NullaryOp>::PacketAccess
378  #ifdef EIGEN_USE_SYCL
379  && (PacketType<CoeffReturnType, Device>::size >1)
380  #endif
381  ,
382  BlockAccess = false,
383  PreferBlockAccess = false,
384  CoordAccess = false, // to be implemented
385  RawAccess = false
386  };
387 
388  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
389  typedef internal::TensorBlockNotImplemented TensorBlock;
390  //===--------------------------------------------------------------------===//
391 
392  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
393 
394  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) { return true; }
395 
396 #ifdef EIGEN_USE_THREADS
397  template <typename EvalSubExprsCallback>
398  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
399  EvaluatorPointerType, EvalSubExprsCallback done) {
400  done(true);
401  }
402 #endif // EIGEN_USE_THREADS
403 
404  EIGEN_STRONG_INLINE void cleanup() { }
405 
406  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
407  {
408  return m_wrapper(m_functor, index);
409  }
410 
411  template<int LoadMode>
412  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
413  {
414  return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
415  }
416 
417  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
418  costPerCoeff(bool vectorized) const {
419  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized,
420  PacketType<CoeffReturnType, Device>::size);
421  }
422 
423  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
424 
425 #ifdef EIGEN_USE_SYCL
426  // binding placeholder accessors to a command group handler for SYCL
427  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
428  m_argImpl.bind(cgh);
429  }
430 #endif
431 
432  private:
433  const NullaryOp m_functor;
434  TensorEvaluator<ArgType, Device> m_argImpl;
435  const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
436 };
437 
438 
439 
440 // -------------------- CwiseUnaryOp --------------------
441 
442 template<typename UnaryOp, typename ArgType, typename Device>
443 struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
444 {
445  typedef TensorCwiseUnaryOp<UnaryOp, ArgType> XprType;
446 
447  static constexpr int Layout = TensorEvaluator<ArgType, Device>::Layout;
448  enum {
449  IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
450  PacketAccess = int(TensorEvaluator<ArgType, Device>::PacketAccess) &
451  int(internal::functor_traits<UnaryOp>::PacketAccess),
452  BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
453  PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
454  CoordAccess = false, // to be implemented
455  RawAccess = false
456  };
457 
458  TensorEvaluator(const XprType& op, const Device& device)
459  : m_device(device),
460  m_functor(op.functor()),
461  m_argImpl(op.nestedExpression(), device)
462  { }
463 
464  typedef typename XprType::Index Index;
465  typedef typename XprType::Scalar Scalar;
466  typedef std::remove_const_t<Scalar> ScalarNoConst;
467  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
468  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
469  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
470  typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
471  typedef StorageMemory<CoeffReturnType, Device> Storage;
472  typedef typename Storage::Type EvaluatorPointerType;
473  static constexpr int NumDims = internal::array_size<Dimensions>::value;
474 
475  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
476  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
477  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
478 
479  typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
480  ArgTensorBlock;
481 
482  typedef internal::TensorCwiseUnaryBlock<UnaryOp, ArgTensorBlock>
483  TensorBlock;
484  //===--------------------------------------------------------------------===//
485 
486  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); }
487 
488  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
489  m_argImpl.evalSubExprsIfNeeded(NULL);
490  return true;
491  }
492 
493 #ifdef EIGEN_USE_THREADS
494  template <typename EvalSubExprsCallback>
495  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
496  EvaluatorPointerType, EvalSubExprsCallback done) {
497  m_argImpl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
498  }
499 #endif // EIGEN_USE_THREADS
500 
501  EIGEN_STRONG_INLINE void cleanup() {
502  m_argImpl.cleanup();
503  }
504 
505  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
506  {
507  return m_functor(m_argImpl.coeff(index));
508  }
509 
510  template<int LoadMode>
511  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
512  {
513  return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
514  }
515 
516  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
517  const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
518  return m_argImpl.costPerCoeff(vectorized) +
519  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
520  }
521 
522  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
523  internal::TensorBlockResourceRequirements getResourceRequirements() const {
524  static const double functor_cost = internal::functor_traits<UnaryOp>::Cost;
525  return m_argImpl.getResourceRequirements().addCostPerCoeff(
526  {0, 0, functor_cost / PacketSize});
527  }
528 
529  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
530  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
531  bool /*root_of_expr_ast*/ = false) const {
532  return TensorBlock(m_argImpl.block(desc, scratch), m_functor);
533  }
534 
535  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
536 
537 #ifdef EIGEN_USE_SYCL
538  // binding placeholder accessors to a command group handler for SYCL
539  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const{
540  m_argImpl.bind(cgh);
541  }
542 #endif
543 
544 
545  private:
546  const Device EIGEN_DEVICE_REF m_device;
547  const UnaryOp m_functor;
548  TensorEvaluator<ArgType, Device> m_argImpl;
549 };
550 
551 
552 // -------------------- CwiseBinaryOp --------------------
553 
554 template<typename BinaryOp, typename LeftArgType, typename RightArgType, typename Device>
555 struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType>, Device>
556 {
557  typedef TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArgType> XprType;
558 
559  static constexpr int Layout = TensorEvaluator<LeftArgType, Device>::Layout;
560  enum {
561  IsAligned = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
562  int(TensorEvaluator<RightArgType, Device>::IsAligned),
563  PacketAccess = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
564  int(TensorEvaluator<RightArgType, Device>::PacketAccess) &
565  int(internal::functor_traits<BinaryOp>::PacketAccess),
566  BlockAccess = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
567  int(TensorEvaluator<RightArgType, Device>::BlockAccess),
568  PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
569  int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
570  CoordAccess = false, // to be implemented
571  RawAccess = false
572  };
573 
574  TensorEvaluator(const XprType& op, const Device& device)
575  : m_device(device),
576  m_functor(op.functor()),
577  m_leftImpl(op.lhsExpression(), device),
578  m_rightImpl(op.rhsExpression(), device)
579  {
580  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
581  eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
582  }
583 
584  typedef typename XprType::Index Index;
585  typedef typename XprType::Scalar Scalar;
586  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
587  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
588  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
589  typedef typename TensorEvaluator<LeftArgType, Device>::Dimensions Dimensions;
590  typedef StorageMemory<CoeffReturnType, Device> Storage;
591  typedef typename Storage::Type EvaluatorPointerType;
592 
593  static constexpr int NumDims = internal::array_size<
594  typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
595 
596  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
597  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
598  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
599 
600  typedef typename TensorEvaluator<const LeftArgType, Device>::TensorBlock
601  LeftTensorBlock;
602  typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
603  RightTensorBlock;
604 
605  typedef internal::TensorCwiseBinaryBlock<BinaryOp, LeftTensorBlock,
606  RightTensorBlock>
607  TensorBlock;
608  //===--------------------------------------------------------------------===//
609 
610  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
611  {
612  // TODO: use right impl instead if right impl dimensions are known at compile time.
613  return m_leftImpl.dimensions();
614  }
615 
616  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
617  m_leftImpl.evalSubExprsIfNeeded(NULL);
618  m_rightImpl.evalSubExprsIfNeeded(NULL);
619  return true;
620  }
621 
622 #ifdef EIGEN_USE_THREADS
623  template <typename EvalSubExprsCallback>
624  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
625  EvaluatorPointerType, EvalSubExprsCallback done) {
626  // TODO(ezhulenev): Evaluate two expression in parallel?
627  m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
628  m_rightImpl.evalSubExprsIfNeededAsync(nullptr,
629  [done](bool) { done(true); });
630  });
631  }
632 #endif // EIGEN_USE_THREADS
633 
634  EIGEN_STRONG_INLINE void cleanup() {
635  m_leftImpl.cleanup();
636  m_rightImpl.cleanup();
637  }
638 
639  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
640  {
641  return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index));
642  }
643  template<int LoadMode>
644  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
645  {
646  return m_functor.packetOp(m_leftImpl.template packet<LoadMode>(index), m_rightImpl.template packet<LoadMode>(index));
647  }
648 
649  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
650  costPerCoeff(bool vectorized) const {
651  const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
652  return m_leftImpl.costPerCoeff(vectorized) +
653  m_rightImpl.costPerCoeff(vectorized) +
654  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
655  }
656 
657  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
658  internal::TensorBlockResourceRequirements getResourceRequirements() const {
659  static const double functor_cost = internal::functor_traits<BinaryOp>::Cost;
660  return internal::TensorBlockResourceRequirements::merge(
661  m_leftImpl.getResourceRequirements(),
662  m_rightImpl.getResourceRequirements())
663  .addCostPerCoeff({0, 0, functor_cost / PacketSize});
664  }
665 
666  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
667  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
668  bool /*root_of_expr_ast*/ = false) const {
669  desc.DropDestinationBuffer();
670  return TensorBlock(m_leftImpl.block(desc, scratch),
671  m_rightImpl.block(desc, scratch), m_functor);
672  }
673 
674  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
675 
676  #ifdef EIGEN_USE_SYCL
677  // binding placeholder accessors to a command group handler for SYCL
678  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
679  m_leftImpl.bind(cgh);
680  m_rightImpl.bind(cgh);
681  }
682  #endif
683  private:
684  const Device EIGEN_DEVICE_REF m_device;
685  const BinaryOp m_functor;
686  TensorEvaluator<LeftArgType, Device> m_leftImpl;
687  TensorEvaluator<RightArgType, Device> m_rightImpl;
688 };
689 
690 // -------------------- CwiseTernaryOp --------------------
691 
692 template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
693 struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
694 {
695  typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
696 
697  static constexpr int Layout = TensorEvaluator<Arg1Type, Device>::Layout;
698  enum {
699  IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
700  PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess &&
701  TensorEvaluator<Arg2Type, Device>::PacketAccess &&
702  TensorEvaluator<Arg3Type, Device>::PacketAccess &&
703  internal::functor_traits<TernaryOp>::PacketAccess,
704  BlockAccess = false,
705  PreferBlockAccess = TensorEvaluator<Arg1Type, Device>::PreferBlockAccess ||
706  TensorEvaluator<Arg2Type, Device>::PreferBlockAccess ||
707  TensorEvaluator<Arg3Type, Device>::PreferBlockAccess,
708  CoordAccess = false, // to be implemented
709  RawAccess = false
710  };
711 
712  TensorEvaluator(const XprType& op, const Device& device)
713  : m_functor(op.functor()),
714  m_arg1Impl(op.arg1Expression(), device),
715  m_arg2Impl(op.arg2Expression(), device),
716  m_arg3Impl(op.arg3Expression(), device)
717  {
718  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
719 
720  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
721  typename internal::traits<Arg2Type>::StorageKind>::value),
722  STORAGE_KIND_MUST_MATCH)
723  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
724  typename internal::traits<Arg3Type>::StorageKind>::value),
725  STORAGE_KIND_MUST_MATCH)
726  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
727  typename internal::traits<Arg2Type>::Index>::value),
728  STORAGE_INDEX_MUST_MATCH)
729  EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
730  typename internal::traits<Arg3Type>::Index>::value),
731  STORAGE_INDEX_MUST_MATCH)
732 
733  eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
734  }
735 
736  typedef typename XprType::Index Index;
737  typedef typename XprType::Scalar Scalar;
738  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
739  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
740  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
741  typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
742  typedef StorageMemory<CoeffReturnType, Device> Storage;
743  typedef typename Storage::Type EvaluatorPointerType;
744 
745  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
746  typedef internal::TensorBlockNotImplemented TensorBlock;
747  //===--------------------------------------------------------------------===//
748 
749  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
750  {
751  // TODO: use arg2 or arg3 dimensions if they are known at compile time.
752  return m_arg1Impl.dimensions();
753  }
754 
755  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
756  m_arg1Impl.evalSubExprsIfNeeded(NULL);
757  m_arg2Impl.evalSubExprsIfNeeded(NULL);
758  m_arg3Impl.evalSubExprsIfNeeded(NULL);
759  return true;
760  }
761  EIGEN_STRONG_INLINE void cleanup() {
762  m_arg1Impl.cleanup();
763  m_arg2Impl.cleanup();
764  m_arg3Impl.cleanup();
765  }
766 
767  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
768  {
769  return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
770  }
771  template<int LoadMode>
772  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
773  {
774  return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
775  m_arg2Impl.template packet<LoadMode>(index),
776  m_arg3Impl.template packet<LoadMode>(index));
777  }
778 
779  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
780  costPerCoeff(bool vectorized) const {
781  const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
782  return m_arg1Impl.costPerCoeff(vectorized) +
783  m_arg2Impl.costPerCoeff(vectorized) +
784  m_arg3Impl.costPerCoeff(vectorized) +
785  TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
786  }
787 
788  EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return NULL; }
789 
790 #ifdef EIGEN_USE_SYCL
791  // binding placeholder accessors to a command group handler for SYCL
792  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
793  m_arg1Impl.bind(cgh);
794  m_arg2Impl.bind(cgh);
795  m_arg3Impl.bind(cgh);
796  }
797 #endif
798 
799  private:
800  const TernaryOp m_functor;
801  TensorEvaluator<Arg1Type, Device> m_arg1Impl;
802  TensorEvaluator<Arg2Type, Device> m_arg2Impl;
803  TensorEvaluator<Arg3Type, Device> m_arg3Impl;
804 };
805 
806 
807 // -------------------- SelectOp --------------------
808 
809 template<typename IfArgType, typename ThenArgType, typename ElseArgType, typename Device>
810 struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>, Device>
811 {
812  typedef TensorSelectOp<IfArgType, ThenArgType, ElseArgType> XprType;
813  typedef typename XprType::Scalar Scalar;
814 
815  static constexpr int Layout = TensorEvaluator<IfArgType, Device>::Layout;
816  enum {
817  IsAligned = TensorEvaluator<ThenArgType, Device>::IsAligned &
818  TensorEvaluator<ElseArgType, Device>::IsAligned,
819  PacketAccess = TensorEvaluator<ThenArgType, Device>::PacketAccess &
820  TensorEvaluator<ElseArgType, Device>::PacketAccess &
821  PacketType<Scalar, Device>::HasBlend,
822  BlockAccess = TensorEvaluator<IfArgType, Device>::BlockAccess &&
823  TensorEvaluator<ThenArgType, Device>::BlockAccess &&
824  TensorEvaluator<ElseArgType, Device>::BlockAccess,
825  PreferBlockAccess = TensorEvaluator<IfArgType, Device>::PreferBlockAccess ||
826  TensorEvaluator<ThenArgType, Device>::PreferBlockAccess ||
827  TensorEvaluator<ElseArgType, Device>::PreferBlockAccess,
828  CoordAccess = false, // to be implemented
829  RawAccess = false
830  };
831 
832  TensorEvaluator(const XprType& op, const Device& device)
833  : m_condImpl(op.ifExpression(), device),
834  m_thenImpl(op.thenExpression(), device),
835  m_elseImpl(op.elseExpression(), device)
836  {
837  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ThenArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
838  EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<IfArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<ElseArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
839  eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions()));
840  eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions()));
841  }
842 
843  typedef typename XprType::Index Index;
844  typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
845  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
846  static constexpr int PacketSize = PacketType<CoeffReturnType, Device>::size;
847  typedef typename TensorEvaluator<IfArgType, Device>::Dimensions Dimensions;
848  typedef StorageMemory<CoeffReturnType, Device> Storage;
849  typedef typename Storage::Type EvaluatorPointerType;
850 
851  static constexpr int NumDims = internal::array_size<Dimensions>::value;
852 
853  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
854  typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
855  typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
856 
857  typedef typename TensorEvaluator<const IfArgType, Device>::TensorBlock
858  IfArgTensorBlock;
859  typedef typename TensorEvaluator<const ThenArgType, Device>::TensorBlock
860  ThenArgTensorBlock;
861  typedef typename TensorEvaluator<const ElseArgType, Device>::TensorBlock
862  ElseArgTensorBlock;
863 
864  struct TensorSelectOpBlockFactory {
865  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
866  struct XprType {
867  typedef TensorSelectOp<const IfArgXprType, const ThenArgXprType, const ElseArgXprType> type;
868  };
869 
870  template <typename IfArgXprType, typename ThenArgXprType, typename ElseArgXprType>
871  typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type expr(
872  const IfArgXprType& if_expr, const ThenArgXprType& then_expr, const ElseArgXprType& else_expr) const {
873  return typename XprType<IfArgXprType, ThenArgXprType, ElseArgXprType>::type(if_expr, then_expr, else_expr);
874  }
875  };
876 
877  typedef internal::TensorTernaryExprBlock<TensorSelectOpBlockFactory,
878  IfArgTensorBlock, ThenArgTensorBlock,
879  ElseArgTensorBlock>
880  TensorBlock;
881  //===--------------------------------------------------------------------===//
882 
883  EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
884  {
885  // TODO: use then or else impl instead if they happen to be known at compile time.
886  return m_condImpl.dimensions();
887  }
888 
889  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
890  m_condImpl.evalSubExprsIfNeeded(NULL);
891  m_thenImpl.evalSubExprsIfNeeded(NULL);
892  m_elseImpl.evalSubExprsIfNeeded(NULL);
893  return true;
894  }
895 
896 #ifdef EIGEN_USE_THREADS
897  template <typename EvalSubExprsCallback>
898  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
899  EvaluatorPointerType, EvalSubExprsCallback done) {
900  m_condImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
901  m_thenImpl.evalSubExprsIfNeeded(nullptr, [this, done](bool) {
902  m_elseImpl.evalSubExprsIfNeeded(nullptr, [done](bool) { done(true); });
903  });
904  });
905  }
906 #endif // EIGEN_USE_THREADS
907 
908  EIGEN_STRONG_INLINE void cleanup() {
909  m_condImpl.cleanup();
910  m_thenImpl.cleanup();
911  m_elseImpl.cleanup();
912  }
913 
914  EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
915  {
916  return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index);
917  }
918  template<int LoadMode>
919  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
920  {
921  internal::Selector<PacketSize> select;
922  EIGEN_UNROLL_LOOP
923  for (Index i = 0; i < PacketSize; ++i) {
924  select.select[i] = m_condImpl.coeff(index+i);
925  }
926  return internal::pblend(select,
927  m_thenImpl.template packet<LoadMode>(index),
928  m_elseImpl.template packet<LoadMode>(index));
929 
930  }
931 
932  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
933  costPerCoeff(bool vectorized) const {
934  return m_condImpl.costPerCoeff(vectorized) +
935  m_thenImpl.costPerCoeff(vectorized)
936  .cwiseMax(m_elseImpl.costPerCoeff(vectorized));
937  }
938 
939  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
940  internal::TensorBlockResourceRequirements getResourceRequirements() const {
941  auto then_req = m_thenImpl.getResourceRequirements();
942  auto else_req = m_elseImpl.getResourceRequirements();
943 
944  auto merged_req =
945  internal::TensorBlockResourceRequirements::merge(then_req, else_req);
946  merged_req.cost_per_coeff =
947  then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff);
948 
949  return internal::TensorBlockResourceRequirements::merge(
950  m_condImpl.getResourceRequirements(), merged_req);
951  }
952 
953  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
954  block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
955  bool /*root_of_expr_ast*/ = false) const {
956  // It's unsafe to pass destination buffer to underlying expressions, because
957  // output might be aliased with one of the inputs.
958  desc.DropDestinationBuffer();
959 
960  return TensorBlock(
961  m_condImpl.block(desc, scratch), m_thenImpl.block(desc, scratch),
962  m_elseImpl.block(desc, scratch), TensorSelectOpBlockFactory());
963  }
964 
965  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
966 
967 #ifdef EIGEN_USE_SYCL
968  // binding placeholder accessors to a command group handler for SYCL
969  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
970  m_condImpl.bind(cgh);
971  m_thenImpl.bind(cgh);
972  m_elseImpl.bind(cgh);
973  }
974 #endif
975  private:
976  TensorEvaluator<IfArgType, Device> m_condImpl;
977  TensorEvaluator<ThenArgType, Device> m_thenImpl;
978  TensorEvaluator<ElseArgType, Device> m_elseImpl;
979 };
980 
981 
982 } // end namespace Eigen
983 
984 #endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H
A tensor expression mapping an existing array of data.
Definition: TensorMap.h:32
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:31