10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
13 #include "./InternalHeaderCheck.h"
25 template<
typename NewDimensions,
typename XprType>
26 struct traits<TensorReshapingOp<NewDimensions, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
29 typedef traits<XprType> XprTraits;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = array_size<NewDimensions>::value;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename NewDimensions,
typename XprType>
40 struct eval<TensorReshapingOp<NewDimensions, XprType>,
Eigen::Dense>
42 typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
45 template<
typename NewDimensions,
typename XprType>
46 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
48 typedef TensorReshapingOp<NewDimensions, XprType> type;
55 template<
typename NewDimensions,
typename XprType>
56 class TensorReshapingOp :
public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
59 typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>,
WriteAccessors> Base;
60 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
61 typedef std::remove_const_t<typename XprType::CoeffReturnType> CoeffReturnType;
62 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
63 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
64 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index
Index;
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(
const XprType& expr,
const NewDimensions& dims)
67 : m_xpr(expr), m_dims(dims) {}
70 const NewDimensions& dimensions()
const {
return m_dims; }
73 const internal::remove_all_t<typename XprType::Nested>&
74 expression()
const {
return m_xpr; }
76 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
79 typename XprType::Nested m_xpr;
80 const NewDimensions m_dims;
85 template<
typename NewDimensions,
typename ArgType,
typename Device>
86 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
88 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
89 typedef NewDimensions Dimensions;
91 typedef typename XprType::Index Index;
92 typedef typename XprType::Scalar Scalar;
93 typedef typename XprType::CoeffReturnType CoeffReturnType;
94 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
95 typedef StorageMemory<CoeffReturnType, Device> Storage;
96 typedef typename Storage::Type EvaluatorPointerType;
97 typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
99 static constexpr
int NumOutputDims = internal::array_size<Dimensions>::value;
100 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
111 static const ReshapingKind kind =
112 (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(0, 1)) ? OneByN
113 : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(1, 1)) ? NByOne
117 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
119 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
120 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
124 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess &&
125 NumInputDims > 0 && NumOutputDims > 0,
126 PreferBlockAccess =
false,
128 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
131 typedef std::remove_const_t<Scalar> ScalarNoConst;
134 typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
135 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
138 typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
143 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
144 : m_impl(op.expression(), device), m_dimensions(op.dimensions())
148 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
151 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
153 #ifdef EIGEN_USE_THREADS
154 template <
typename EvalSubExprsCallback>
155 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
156 EvaluatorPointerType data, EvalSubExprsCallback done) {
157 m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
161 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
162 return m_impl.evalSubExprsIfNeeded(data);
164 EIGEN_STRONG_INLINE
void cleanup() {
168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
170 return m_impl.coeff(index);
173 template<
int LoadMode>
174 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
176 return m_impl.template packet<LoadMode>(index);
179 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
180 return m_impl.costPerCoeff(vectorized);
183 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
184 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
185 return internal::TensorBlockResourceRequirements::any();
190 struct BlockIteratorState {
197 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
198 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
199 bool =
false)
const {
200 eigen_assert(m_impl.data() != NULL);
201 eigen_assert((kind == Runtime) ||
202 (kind == OneByN && desc.dimensions()[0] == 1) ||
203 (kind == NByOne && desc.dimensions()[1] == 1));
205 if (kind == OneByN || kind == NByOne) {
208 return TensorBlock(internal::TensorBlockKind::kView,
209 m_impl.data() + desc.offset(), desc.dimensions());
213 return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
218 EIGEN_DEVICE_FUNC
typename Storage::Type data()
const {
219 return constCast(m_impl.data());
222 EIGEN_DEVICE_FUNC
const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
224 #ifdef EIGEN_USE_SYCL
226 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
231 TensorEvaluator<ArgType, Device> m_impl;
232 NewDimensions m_dimensions;
237 template<
typename NewDimensions,
typename ArgType,
typename Device>
238 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
239 :
public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
242 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
243 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
244 typedef NewDimensions Dimensions;
246 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
248 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
249 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
250 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
251 PreferBlockAccess =
false,
253 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
256 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
260 typedef typename XprType::Index
Index;
261 typedef typename XprType::Scalar Scalar;
262 typedef typename XprType::CoeffReturnType CoeffReturnType;
263 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
266 typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
270 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
272 return this->m_impl.coeffRef(index);
275 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
276 void writePacket(Index index,
const PacketReturnType& x)
278 this->m_impl.template writePacket<StoreMode>(index, x);
281 template <
typename TensorBlock>
282 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
283 const TensorBlockDesc& desc,
const TensorBlock& block) {
284 assert(this->m_impl.data() != NULL);
286 typedef typename TensorBlock::XprType TensorBlockExpr;
287 typedef internal::TensorBlockAssignment<
288 Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr,
Index>
291 TensorBlockAssign::Run(
292 TensorBlockAssign::target(desc.dimensions(),
293 internal::strides<Layout>(this->dimensions()),
294 this->m_impl.data(), desc.offset()),
308 template<
typename StartIndices,
typename Sizes,
typename XprType>
309 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > :
public traits<XprType>
311 typedef typename XprType::Scalar Scalar;
312 typedef traits<XprType> XprTraits;
313 typedef typename XprTraits::StorageKind StorageKind;
314 typedef typename XprTraits::Index
Index;
315 typedef typename XprType::Nested Nested;
316 typedef std::remove_reference_t<Nested> Nested_;
317 static constexpr
int NumDimensions = array_size<StartIndices>::value;
318 static constexpr
int Layout = XprTraits::Layout;
319 typedef typename XprTraits::PointerType PointerType;
322 template<
typename StartIndices,
typename Sizes,
typename XprType>
323 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>,
Eigen::Dense>
325 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
328 template<
typename StartIndices,
typename Sizes,
typename XprType>
329 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
331 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
338 template<
typename StartIndices,
typename Sizes,
typename XprType>
339 class TensorSlicingOp :
public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
342 typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
343 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
344 typedef typename XprType::CoeffReturnType CoeffReturnType;
345 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
346 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
347 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index
Index;
349 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(
const XprType& expr,
const StartIndices& indices,
const Sizes& sizes)
350 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
353 const StartIndices& startIndices()
const {
return m_indices; }
355 const Sizes& sizes()
const {
return m_sizes; }
358 const internal::remove_all_t<typename XprType::Nested>&
359 expression()
const {
return m_xpr; }
361 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
364 typename XprType::Nested m_xpr;
365 const StartIndices m_indices;
373 template <
typename Index,
typename Device,
bool BlockAccess>
struct MemcpyTriggerForSlicing {
374 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const Device& device) : threshold_(2 * device.numThreads()) { }
375 EIGEN_DEVICE_FUNC
bool operator ()(
Index total,
Index contiguous)
const {
376 const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
377 return !prefer_block_evaluation && contiguous > threshold_;
387 template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index, GpuDevice, BlockAccess> {
388 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const GpuDevice&) { }
389 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
395 #ifdef EIGEN_USE_SYCL
396 template <
typename Index,
bool BlockAccess>
struct MemcpyTriggerForSlicing<
Index,
Eigen::SyclDevice, BlockAccess> {
397 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(
const SyclDevice&) { }
398 EIGEN_DEVICE_FUNC
bool operator ()(
Index,
Index contiguous)
const {
return contiguous > 4*1024*1024; }
405 template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
406 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
408 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
409 static constexpr
int NumDims = internal::array_size<Sizes>::value;
411 typedef typename XprType::Index
Index;
412 typedef typename XprType::Scalar Scalar;
413 typedef typename XprType::CoeffReturnType CoeffReturnType;
414 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
415 typedef Sizes Dimensions;
416 typedef StorageMemory<CoeffReturnType, Device> Storage;
417 typedef StorageMemory<std::remove_const_t<CoeffReturnType>, Device> ConstCastStorage;
418 typedef typename Storage::Type EvaluatorPointerType;
420 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
425 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
426 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess &&
428 !internal::is_same<std::remove_const_t<Scalar>,
bool>::value,
429 PreferBlockAccess =
true,
434 typedef std::remove_const_t<Scalar> ScalarNoConst;
437 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
438 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
441 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
445 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
446 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
448 m_is_identity =
true;
449 for (
int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
450 eigen_assert(m_impl.dimensions()[i] >=
451 op.sizes()[i] + op.startIndices()[i]);
452 if (m_impl.dimensions()[i] != op.sizes()[i] ||
453 op.startIndices()[i] != 0) {
454 m_is_identity =
false;
459 if (NumDims == 0)
return;
461 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
462 const Sizes& output_dims = op.sizes();
463 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
464 m_inputStrides[0] = 1;
465 for (
int i = 1; i < NumDims; ++i) {
466 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
470 m_outputStrides[0] = 1;
471 for (
int i = 1; i < NumDims; ++i) {
472 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
473 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
476 m_inputStrides[NumDims-1] = 1;
477 for (
int i = NumDims - 2; i >= 0; --i) {
478 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
482 m_outputStrides[NumDims-1] = 1;
483 for (
int i = NumDims - 2; i >= 0; --i) {
484 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
485 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
490 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
492 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
493 m_impl.evalSubExprsIfNeeded(NULL);
494 if (!NumTraits<std::remove_const_t<Scalar>>::RequireInitialization
495 && data && m_impl.data()) {
496 Index contiguous_values = 1;
497 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
498 for (
int i = 0; i < NumDims; ++i) {
499 contiguous_values *= dimensions()[i];
500 if (dimensions()[i] != m_impl.dimensions()[i]) {
505 for (
int i = NumDims-1; i >= 0; --i) {
506 contiguous_values *= dimensions()[i];
507 if (dimensions()[i] != m_impl.dimensions()[i]) {
513 const internal::MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
514 if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
515 EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
516 for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
517 Index offset = srcCoeff(i);
518 m_device.memcpy((
void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values *
sizeof(Scalar));
526 #ifdef EIGEN_USE_THREADS
527 template <
typename EvalSubExprsCallback>
528 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
529 EvaluatorPointerType , EvalSubExprsCallback done) {
530 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
534 EIGEN_STRONG_INLINE
void cleanup() {
538 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
541 return m_impl.coeff(index);
543 return m_impl.coeff(srcCoeff(index));
547 template<
int LoadMode>
548 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
550 const int packetSize = PacketType<CoeffReturnType, Device>::size;
551 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
552 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
555 return m_impl.template packet<LoadMode>(index);
558 Index inputIndices[] = {0, 0};
559 Index indices[] = {index, index + packetSize - 1};
560 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
562 for (
int i = NumDims - 1; i > 0; --i) {
563 const Index idx0 = indices[0] / m_fastOutputStrides[i];
564 const Index idx1 = indices[1] / m_fastOutputStrides[i];
565 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
566 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
567 indices[0] -= idx0 * m_outputStrides[i];
568 indices[1] -= idx1 * m_outputStrides[i];
570 inputIndices[0] += (indices[0] + m_offsets[0]);
571 inputIndices[1] += (indices[1] + m_offsets[0]);
574 for (
int i = 0; i < NumDims - 1; ++i) {
575 const Index idx0 = indices[0] / m_fastOutputStrides[i];
576 const Index idx1 = indices[1] / m_fastOutputStrides[i];
577 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
578 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
579 indices[0] -= idx0 * m_outputStrides[i];
580 indices[1] -= idx1 * m_outputStrides[i];
582 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
583 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
585 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
586 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
590 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
591 values[0] = m_impl.coeff(inputIndices[0]);
592 values[packetSize-1] = m_impl.coeff(inputIndices[1]);
594 for (
int i = 1; i < packetSize-1; ++i) {
595 values[i] = coeff(index+i);
597 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
602 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
603 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
606 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
607 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
608 const size_t target_size = m_device.lastLevelCacheSize();
609 return internal::TensorBlockResourceRequirements::merge(
610 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
611 m_impl.getResourceRequirements());
614 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
615 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
616 bool =
false)
const {
617 TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
618 TensorBlock block = m_impl.block(arg_desc, scratch);
619 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
623 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
624 typename Storage::Type result = constCast(m_impl.data());
627 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
628 for (
int i = 0; i < NumDims; ++i) {
629 if (m_dimensions[i] != m_impl.dimensions()[i]) {
630 offset += m_offsets[i] * m_inputStrides[i];
631 for (
int j = i+1; j < NumDims; ++j) {
632 if (m_dimensions[j] > 1) {
635 offset += m_offsets[j] * m_inputStrides[j];
641 for (
int i = NumDims - 1; i >= 0; --i) {
642 if (m_dimensions[i] != m_impl.dimensions()[i]) {
643 offset += m_offsets[i] * m_inputStrides[i];
644 for (
int j = i-1; j >= 0; --j) {
645 if (m_dimensions[j] > 1) {
648 offset += m_offsets[j] * m_inputStrides[j];
654 return result + offset;
658 #ifdef EIGEN_USE_SYCL
660 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
666 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
668 Index inputIndex = 0;
669 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
671 for (
int i = NumDims - 1; i > 0; --i) {
672 const Index idx = index / m_fastOutputStrides[i];
673 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
674 index -= idx * m_outputStrides[i];
676 inputIndex += (index + m_offsets[0]);
679 for (
int i = 0; i < NumDims - 1; ++i) {
680 const Index idx = index / m_fastOutputStrides[i];
681 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
682 index -= idx * m_outputStrides[i];
684 inputIndex += (index + m_offsets[NumDims-1]);
689 array<Index, NumDims> m_outputStrides;
690 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
691 array<Index, NumDims> m_inputStrides;
692 TensorEvaluator<ArgType, Device> m_impl;
693 const Device EIGEN_DEVICE_REF m_device;
694 Dimensions m_dimensions;
696 const StartIndices m_offsets;
701 template<
typename StartIndices,
typename Sizes,
typename ArgType,
typename Device>
702 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
703 :
public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
705 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
706 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
707 static constexpr
int NumDims = internal::array_size<Sizes>::value;
709 typedef typename XprType::Index
Index;
710 typedef typename XprType::Scalar Scalar;
711 typedef typename XprType::CoeffReturnType CoeffReturnType;
712 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
713 typedef Sizes Dimensions;
715 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
718 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
719 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
720 PreferBlockAccess =
true,
722 RawAccess = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
725 typedef std::remove_const_t<Scalar> ScalarNoConst;
728 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
729 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
732 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
736 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
738 if (this->m_is_identity) {
739 return this->m_impl.coeffRef(index);
741 return this->m_impl.coeffRef(this->srcCoeff(index));
745 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
746 void writePacket(Index index,
const PacketReturnType& x)
748 if (this->m_is_identity) {
749 this->m_impl.template writePacket<StoreMode>(index, x);
753 const int packetSize = PacketType<CoeffReturnType, Device>::size;
754 Index inputIndices[] = {0, 0};
755 Index indices[] = {index, index + packetSize - 1};
756 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
758 for (
int i = NumDims - 1; i > 0; --i) {
759 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
760 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
761 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
762 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
763 indices[0] -= idx0 * this->m_outputStrides[i];
764 indices[1] -= idx1 * this->m_outputStrides[i];
766 inputIndices[0] += (indices[0] + this->m_offsets[0]);
767 inputIndices[1] += (indices[1] + this->m_offsets[0]);
770 for (
int i = 0; i < NumDims - 1; ++i) {
771 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
772 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
773 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
774 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
775 indices[0] -= idx0 * this->m_outputStrides[i];
776 indices[1] -= idx1 * this->m_outputStrides[i];
778 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
779 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
781 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
782 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
785 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
786 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
787 this->m_impl.coeffRef(inputIndices[0]) = values[0];
788 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
790 for (
int i = 1; i < packetSize-1; ++i) {
791 this->coeffRef(index+i) = values[i];
796 template<
typename TensorBlock>
797 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
798 const TensorBlockDesc& desc,
const TensorBlock& block) {
799 TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
800 this->m_impl.writeBlock(arg_desc, block);
805 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
806 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > :
public traits<XprType>
808 typedef typename XprType::Scalar Scalar;
809 typedef traits<XprType> XprTraits;
810 typedef typename XprTraits::StorageKind StorageKind;
811 typedef typename XprTraits::Index
Index;
812 typedef typename XprType::Nested Nested;
813 typedef std::remove_reference_t<Nested> Nested_;
814 static constexpr
int NumDimensions = array_size<StartIndices>::value;
815 static constexpr
int Layout = XprTraits::Layout;
816 typedef typename XprTraits::PointerType PointerType;
819 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
820 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>,
Eigen::Dense>
822 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
825 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
826 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
828 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
834 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename XprType>
835 class TensorStridingSlicingOp :
public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
838 typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
839 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
840 typedef typename XprType::CoeffReturnType CoeffReturnType;
841 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
842 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
843 typedef typename internal::traits<TensorStridingSlicingOp>::Index
Index;
845 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
846 const XprType& expr,
const StartIndices& startIndices,
847 const StopIndices& stopIndices,
const Strides& strides)
848 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
849 m_strides(strides) {}
852 const StartIndices& startIndices()
const {
return m_startIndices; }
854 const StartIndices& stopIndices()
const {
return m_stopIndices; }
856 const StartIndices& strides()
const {
return m_strides; }
859 const internal::remove_all_t<typename XprType::Nested>&
860 expression()
const {
return m_xpr; }
862 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
865 typename XprType::Nested m_xpr;
866 const StartIndices m_startIndices;
867 const StopIndices m_stopIndices;
868 const Strides m_strides;
872 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
873 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
875 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
876 static constexpr
int NumDims = internal::array_size<Strides>::value;
877 typedef typename XprType::Index
Index;
878 typedef typename XprType::Scalar Scalar;
879 typedef typename XprType::CoeffReturnType CoeffReturnType;
880 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
881 typedef StorageMemory<CoeffReturnType, Device> Storage;
882 typedef typename Storage::Type EvaluatorPointerType;
883 typedef Strides Dimensions;
885 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
890 PacketAccess =
false,
892 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
897 typedef internal::TensorBlockNotImplemented TensorBlock;
900 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
901 : m_impl(op.expression(), device),
903 m_strides(op.strides())
906 DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
907 for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
908 eigen_assert(m_strides[i] != 0 &&
"0 stride is invalid");
909 if (m_strides[i] > 0) {
910 startIndicesClamped[i] =
911 clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
912 stopIndicesClamped[i] =
913 clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
916 startIndicesClamped[i] =
917 clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
918 stopIndicesClamped[i] =
919 clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
921 m_startIndices[i] = startIndicesClamped[i];
924 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
925 const InputDimensions& input_dims = m_impl.dimensions();
928 m_is_identity =
true;
929 for (
int i = 0; i < NumDims; i++) {
930 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
931 if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
935 (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
936 eigen_assert(m_dimensions[i] >= 0);
938 if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
939 m_is_identity =
false;
943 Strides output_dims = m_dimensions;
945 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
946 m_inputStrides[0] = m_strides[0];
947 m_offsets[0] = startIndicesClamped[0];
948 Index previousDimProduct = 1;
949 for (
int i = 1; i < NumDims; ++i) {
950 previousDimProduct *= input_dims[i-1];
951 m_inputStrides[i] = previousDimProduct * m_strides[i];
952 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
956 m_outputStrides[0] = 1;
957 for (
int i = 1; i < NumDims; ++i) {
958 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
959 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
962 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
963 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
964 Index previousDimProduct = 1;
965 for (
int i = NumDims - 2; i >= 0; --i) {
966 previousDimProduct *= input_dims[i+1];
967 m_inputStrides[i] = previousDimProduct * m_strides[i];
968 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
971 m_outputStrides[NumDims-1] = 1;
972 for (
int i = NumDims - 2; i >= 0; --i) {
973 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
974 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
979 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
982 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
983 m_impl.evalSubExprsIfNeeded(NULL);
987 EIGEN_STRONG_INLINE
void cleanup() {
991 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
994 return m_impl.coeff(index);
996 return m_impl.coeff(srcCoeff(index));
1000 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
1001 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
1004 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
1007 #ifdef EIGEN_USE_SYCL
1009 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
1014 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
1016 Index inputIndex = 0;
1017 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
1019 for (
int i = NumDims - 1; i >= 0; --i) {
1020 const Index idx = index / m_fastOutputStrides[i];
1021 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1022 index -= idx * m_outputStrides[i];
1026 for (
int i = 0; i < NumDims; ++i) {
1027 const Index idx = index / m_fastOutputStrides[i];
1028 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1029 index -= idx * m_outputStrides[i];
1035 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index clamp(Index value, Index min, Index max) {
1036 #ifndef SYCL_DEVICE_ONLY
1037 return numext::maxi(min, numext::mini(max,value));
1039 return cl::sycl::clamp(value, min, max);
1043 array<Index, NumDims> m_outputStrides;
1044 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
1045 array<Index, NumDims> m_inputStrides;
1047 TensorEvaluator<ArgType, Device> m_impl;
1048 const Device EIGEN_DEVICE_REF m_device;
1049 DSizes<Index, NumDims> m_startIndices;
1050 DSizes<Index, NumDims> m_dimensions;
1051 DSizes<Index, NumDims> m_offsets;
1052 const Strides m_strides;
1056 template<
typename StartIndices,
typename StopIndices,
typename Str
ides,
typename ArgType,
typename Device>
1057 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1058 :
public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1060 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
1061 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
1062 static constexpr
int NumDims = internal::array_size<Strides>::value;
1063 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
1067 PacketAccess =
false,
1068 BlockAccess =
false,
1069 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
1070 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
1075 typedef internal::TensorBlockNotImplemented TensorBlock;
1078 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
1082 typedef typename XprType::Index
Index;
1083 typedef typename XprType::Scalar Scalar;
1084 typedef typename XprType::CoeffReturnType CoeffReturnType;
1085 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
1086 typedef Strides Dimensions;
1088 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
1090 if (this->m_is_identity) {
1091 return this->m_impl.coeffRef(index);
1093 return this->m_impl.coeffRef(this->srcCoeff(index));
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index