10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
13 #include "./InternalHeaderCheck.h"
26 template<DenseIndex DimId,
typename XprType>
27 struct traits<TensorChippingOp<DimId, XprType> > :
public traits<XprType>
29 typedef typename XprType::Scalar Scalar;
30 typedef traits<XprType> XprTraits;
31 typedef typename XprTraits::StorageKind StorageKind;
32 typedef typename XprTraits::Index
Index;
33 typedef typename XprType::Nested Nested;
34 typedef std::remove_reference_t<Nested> Nested_;
35 static constexpr
int NumDimensions = XprTraits::NumDimensions - 1;
36 static constexpr
int Layout = XprTraits::Layout;
37 typedef typename XprTraits::PointerType PointerType;
40 template<DenseIndex DimId,
typename XprType>
41 struct eval<TensorChippingOp<DimId, XprType>,
Eigen::Dense>
43 typedef const TensorChippingOp<DimId, XprType> EIGEN_DEVICE_REF type;
46 template<DenseIndex DimId,
typename XprType>
47 struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
49 typedef TensorChippingOp<DimId, XprType> type;
52 template <DenseIndex DimId>
55 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) {
56 EIGEN_UNUSED_VARIABLE(dim);
57 eigen_assert(dim == DimId);
59 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) {
67 eigen_assert(dim >= 0);
69 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim()
const {
73 const DenseIndex actual_dim;
81 template<DenseIndex DimId,
typename XprType>
82 class TensorChippingOp :
public TensorBase<TensorChippingOp<DimId, XprType> >
85 typedef TensorBase<TensorChippingOp<DimId, XprType> > Base;
86 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar Scalar;
88 typedef typename XprType::CoeffReturnType CoeffReturnType;
89 typedef typename Eigen::internal::nested<TensorChippingOp>::type Nested;
90 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind StorageKind;
91 typedef typename Eigen::internal::traits<TensorChippingOp>::Index
Index;
93 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(
const XprType& expr,
const Index offset,
const Index dim)
94 : m_xpr(expr), m_offset(offset), m_dim(dim) {
98 const Index offset()
const {
return m_offset; }
100 const Index dim()
const {
return m_dim.actualDim(); }
103 const internal::remove_all_t<typename XprType::Nested>&
104 expression()
const {
return m_xpr; }
106 EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorChippingOp)
109 typename XprType::Nested m_xpr;
110 const Index m_offset;
111 const internal::DimensionId<DimId> m_dim;
116 template<DenseIndex DimId,
typename ArgType,
typename Device>
117 struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
119 typedef TensorChippingOp<DimId, ArgType> XprType;
120 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
121 static constexpr
int NumDims = NumInputDims-1;
122 typedef typename XprType::Index
Index;
123 typedef DSizes<Index, NumDims> Dimensions;
124 typedef typename XprType::Scalar Scalar;
125 typedef typename XprType::CoeffReturnType CoeffReturnType;
126 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
127 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
128 typedef StorageMemory<CoeffReturnType, Device> Storage;
129 typedef typename Storage::Type EvaluatorPointerType;
130 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
136 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
137 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
140 IsOuterChipping = (Layout ==
ColMajor && DimId == NumInputDims - 1) ||
143 IsInnerChipping = (Layout ==
ColMajor && DimId == 0) ||
144 (Layout ==
RowMajor && DimId == NumInputDims - 1),
147 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess ||
153 typedef std::remove_const_t<Scalar> ScalarNoConst;
156 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
157 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
159 typedef internal::TensorBlockDescriptor<NumInputDims, Index>
161 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
164 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
169 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
170 : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device)
172 EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
173 eigen_assert(NumInputDims > m_dim.actualDim());
175 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
176 eigen_assert(op.offset() < input_dims[m_dim.actualDim()]);
179 for (
int i = 0; i < NumInputDims; ++i) {
180 if (i != m_dim.actualDim()) {
181 m_dimensions[j] = input_dims[i];
188 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
189 for (
int i = 0; i < m_dim.actualDim(); ++i) {
190 m_stride *= input_dims[i];
191 m_inputStride *= input_dims[i];
194 for (
int i = NumInputDims-1; i > m_dim.actualDim(); --i) {
195 m_stride *= input_dims[i];
196 m_inputStride *= input_dims[i];
199 m_inputStride *= input_dims[m_dim.actualDim()];
200 m_inputOffset = m_stride * op.offset();
203 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
205 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
206 m_impl.evalSubExprsIfNeeded(NULL);
210 EIGEN_STRONG_INLINE
void cleanup() {
214 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
216 return m_impl.coeff(srcCoeff(index));
219 template<
int LoadMode>
220 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
222 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
224 if (isInnerChipping()) {
226 eigen_assert(m_stride == 1);
227 Index inputIndex = index * m_inputStride + m_inputOffset;
228 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
230 for (
int i = 0; i < PacketSize; ++i) {
231 values[i] = m_impl.coeff(inputIndex);
232 inputIndex += m_inputStride;
234 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
236 }
else if (isOuterChipping()) {
238 eigen_assert(m_stride > index);
239 return m_impl.template packet<LoadMode>(index + m_inputOffset);
241 const Index idx = index / m_stride;
242 const Index rem = index - idx * m_stride;
243 if (rem + PacketSize <= m_stride) {
244 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
245 return m_impl.template packet<LoadMode>(inputIndex);
248 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
250 for (
int i = 0; i < PacketSize; ++i) {
251 values[i] = coeff(index);
254 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
260 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
261 costPerCoeff(
bool vectorized)
const {
263 if ((
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor) &&
264 m_dim.actualDim() == 0) ||
265 (
static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
266 m_dim.actualDim() == NumInputDims - 1)) {
267 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
268 }
else if ((
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor) &&
269 m_dim.actualDim() == NumInputDims - 1) ||
270 (
static_cast<int>(Layout) ==
static_cast<int>(
RowMajor) &&
271 m_dim.actualDim() == 0)) {
272 cost += TensorOpCost::AddCost<Index>();
274 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
275 3 * TensorOpCost::AddCost<Index>();
278 return m_impl.costPerCoeff(vectorized) +
279 TensorOpCost(0, 0, cost, vectorized, PacketSize);
282 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
283 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
284 const size_t target_size = m_device.lastLevelCacheSize();
285 return internal::TensorBlockResourceRequirements::merge(
286 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
287 m_impl.getResourceRequirements());
290 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
291 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
292 bool root_of_expr_ast =
false)
const {
293 const Index chip_dim = m_dim.actualDim();
295 DSizes<Index, NumInputDims> input_block_dims;
296 for (
int i = 0; i < NumInputDims; ++i) {
298 = i < chip_dim ? desc.dimension(i)
299 : i > chip_dim ? desc.dimension(i - 1)
303 ArgTensorBlockDesc arg_desc(srcCoeff(desc.offset()), input_block_dims);
306 if (desc.HasDestinationBuffer()) {
307 DSizes<Index, NumInputDims> arg_destination_strides;
308 for (
int i = 0; i < NumInputDims; ++i) {
309 arg_destination_strides[i]
310 = i < chip_dim ? desc.destination().strides()[i]
311 : i > chip_dim ? desc.destination().strides()[i - 1]
315 arg_desc.template AddDestinationBuffer<Layout>(
316 desc.destination().template data<ScalarNoConst>(),
317 arg_destination_strides);
320 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
321 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
323 if (arg_block.data() != NULL) {
325 return TensorBlock(arg_block.kind(), arg_block.data(),
332 const typename TensorBlock::Storage block_storage =
333 TensorBlock::prepareStorage(desc, scratch);
335 typedef internal::TensorBlockAssignment<
336 ScalarNoConst, NumInputDims,
typename ArgTensorBlock::XprType,
Index>
337 TensorBlockAssignment;
339 TensorBlockAssignment::Run(
340 TensorBlockAssignment::target(
341 arg_desc.dimensions(),
342 internal::strides<Layout>(arg_desc.dimensions()),
343 block_storage.data()),
346 return block_storage.AsTensorMaterializedBlock();
350 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename Storage::Type data()
const {
351 typename Storage::Type result = constCast(m_impl.data());
352 if (isOuterChipping() && result) {
353 return result + m_inputOffset;
358 #ifdef EIGEN_USE_SYCL
360 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
366 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Index srcCoeff(Index index)
const
369 if (isInnerChipping()) {
371 eigen_assert(m_stride == 1);
372 inputIndex = index * m_inputStride + m_inputOffset;
373 }
else if (isOuterChipping()) {
376 eigen_assert(m_stride > index);
377 inputIndex = index + m_inputOffset;
379 const Index idx = index / m_stride;
380 inputIndex = idx * m_inputStride + m_inputOffset;
381 index -= idx * m_stride;
387 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool isInnerChipping()
const {
388 return IsInnerChipping ||
389 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
390 (
static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
393 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
bool isOuterChipping()
const {
394 return IsOuterChipping ||
395 (
static_cast<int>(Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
396 (
static_cast<int>(Layout) ==
RowMajor && m_dim.actualDim() == 0);
399 Dimensions m_dimensions;
403 TensorEvaluator<ArgType, Device> m_impl;
404 const internal::DimensionId<DimId> m_dim;
405 const Device EIGEN_DEVICE_REF m_device;
410 template<DenseIndex DimId,
typename ArgType,
typename Device>
411 struct TensorEvaluator<TensorChippingOp<DimId, ArgType>, Device>
412 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
414 typedef TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Base;
415 typedef TensorChippingOp<DimId, ArgType> XprType;
416 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
417 static constexpr
int NumDims = NumInputDims-1;
418 typedef typename XprType::Index
Index;
419 typedef DSizes<Index, NumDims> Dimensions;
420 typedef typename XprType::Scalar Scalar;
421 typedef typename XprType::CoeffReturnType CoeffReturnType;
422 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
423 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
427 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
428 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
429 Layout = TensorEvaluator<ArgType, Device>::Layout,
434 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
437 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
441 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
443 return this->m_impl.coeffRef(this->srcCoeff(index));
446 template <
int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
447 void writePacket(Index index,
const PacketReturnType& x)
449 if (this->isInnerChipping()) {
451 eigen_assert(this->m_stride == 1);
452 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
453 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
454 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
456 for (
int i = 0; i < PacketSize; ++i) {
457 this->m_impl.coeffRef(inputIndex) = values[i];
458 inputIndex += this->m_inputStride;
460 }
else if (this->isOuterChipping()) {
462 eigen_assert(this->m_stride > index);
463 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset, x);
465 const Index idx = index / this->m_stride;
466 const Index rem = index - idx * this->m_stride;
467 if (rem + PacketSize <= this->m_stride) {
468 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
469 this->m_impl.template writePacket<StoreMode>(inputIndex, x);
472 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
473 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
475 for (
int i = 0; i < PacketSize; ++i) {
476 this->coeffRef(index) = values[i];
483 template <
typename TensorBlock>
484 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void writeBlock(
485 const TensorBlockDesc& desc,
const TensorBlock& block) {
486 assert(this->m_impl.data() != NULL);
488 const Index chip_dim = this->m_dim.actualDim();
490 DSizes<Index, NumInputDims> input_block_dims;
491 for (
int i = 0; i < NumInputDims; ++i) {
492 input_block_dims[i] = i < chip_dim ? desc.dimension(i)
493 : i > chip_dim ? desc.dimension(i - 1)
497 typedef TensorReshapingOp<const DSizes<Index, NumInputDims>,
498 const typename TensorBlock::XprType>
501 typedef internal::TensorBlockAssignment<Scalar, NumInputDims,
502 TensorBlockExpr,
Index>
505 TensorBlockAssign::Run(
506 TensorBlockAssign::target(
508 internal::strides<Layout>(this->m_impl.dimensions()),
509 this->m_impl.data(), this->srcCoeff(desc.offset())),
510 block.expr().reshape(input_block_dims));
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index