10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
13 #include "./InternalHeaderCheck.h"
25 template<
typename PaddingDimensions,
typename XprType>
26 struct traits<TensorPaddingOp<PaddingDimensions, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
29 typedef traits<XprType> XprTraits;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename PaddingDimensions,
typename XprType>
40 struct eval<TensorPaddingOp<PaddingDimensions, XprType>,
Eigen::Dense>
42 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
45 template<
typename PaddingDimensions,
typename XprType>
46 struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
48 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
55 template<
typename PaddingDimensions,
typename XprType>
56 class TensorPaddingOp :
public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
59 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
61 typedef typename XprType::CoeffReturnType CoeffReturnType;
62 typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
63 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
64 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index
Index;
66 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(
const XprType& expr,
const PaddingDimensions& padding_dims,
const Scalar padding_value)
67 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
70 const PaddingDimensions& padding()
const {
return m_padding_dims; }
72 Scalar padding_value()
const {
return m_padding_value; }
75 const internal::remove_all_t<typename XprType::Nested>&
76 expression()
const {
return m_xpr; }
79 typename XprType::Nested m_xpr;
80 const PaddingDimensions m_padding_dims;
81 const Scalar m_padding_value;
86 template<
typename PaddingDimensions,
typename ArgType,
typename Device>
87 struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
89 typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
90 typedef typename XprType::Index Index;
91 static constexpr
int NumDims = internal::array_size<PaddingDimensions>::value;
92 typedef DSizes<Index, NumDims> Dimensions;
93 typedef typename XprType::Scalar Scalar;
94 typedef typename XprType::CoeffReturnType CoeffReturnType;
95 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
96 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
97 typedef StorageMemory<CoeffReturnType, Device> Storage;
98 typedef typename Storage::Type EvaluatorPointerType;
100 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
103 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
104 BlockAccess = TensorEvaluator<ArgType, Device>::RawAccess,
105 PreferBlockAccess =
true,
110 typedef std::remove_const_t<Scalar> ScalarNoConst;
113 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
114 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
116 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
121 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
122 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
127 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
130 m_dimensions = m_impl.dimensions();
131 for (
int i = 0; i < NumDims; ++i) {
132 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
134 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
135 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
136 m_inputStrides[0] = 1;
137 m_outputStrides[0] = 1;
138 for (
int i = 1; i < NumDims; ++i) {
139 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
140 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
142 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
144 m_inputStrides[NumDims - 1] = 1;
145 m_outputStrides[NumDims] = 1;
146 for (
int i = NumDims - 2; i >= 0; --i) {
147 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
148 m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
150 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
154 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_dimensions; }
156 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType) {
157 m_impl.evalSubExprsIfNeeded(NULL);
161 #ifdef EIGEN_USE_THREADS
162 template <
typename EvalSubExprsCallback>
163 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
164 EvaluatorPointerType, EvalSubExprsCallback done) {
165 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
169 EIGEN_STRONG_INLINE
void cleanup() {
173 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
175 eigen_assert(index < dimensions().TotalSize());
176 Index inputIndex = 0;
177 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
179 for (
int i = NumDims - 1; i > 0; --i) {
180 const Index idx = index / m_outputStrides[i];
181 if (isPaddingAtIndexForDim(idx, i)) {
182 return m_paddingValue;
184 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
185 index -= idx * m_outputStrides[i];
187 if (isPaddingAtIndexForDim(index, 0)) {
188 return m_paddingValue;
190 inputIndex += (index - m_padding[0].first);
193 for (
int i = 0; i < NumDims - 1; ++i) {
194 const Index idx = index / m_outputStrides[i+1];
195 if (isPaddingAtIndexForDim(idx, i)) {
196 return m_paddingValue;
198 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
199 index -= idx * m_outputStrides[i+1];
201 if (isPaddingAtIndexForDim(index, NumDims-1)) {
202 return m_paddingValue;
204 inputIndex += (index - m_padding[NumDims-1].first);
206 return m_impl.coeff(inputIndex);
209 template<
int LoadMode>
210 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index)
const
212 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
213 return packetColMajor(index);
215 return packetRowMajor(index);
218 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(
bool vectorized)
const {
219 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
220 if (
static_cast<int>(Layout) ==
static_cast<int>(
ColMajor)) {
222 for (
int i = 0; i < NumDims; ++i)
223 updateCostPerDimension(cost, i, i == 0);
226 for (
int i = NumDims - 1; i >= 0; --i)
227 updateCostPerDimension(cost, i, i == NumDims - 1);
232 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
233 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
234 const size_t target_size = m_device.lastLevelCacheSize();
235 return internal::TensorBlockResourceRequirements::merge(
236 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
237 m_impl.getResourceRequirements());
240 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
241 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
242 bool =
false)
const {
244 if (desc.size() == 0) {
245 return TensorBlock(internal::TensorBlockKind::kView, NULL,
249 static const bool IsColMajor = Layout ==
static_cast<int>(
ColMajor);
250 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
252 Index offset = desc.offset();
255 DSizes<Index, NumDims> output_offsets;
256 for (
int i = NumDims - 1; i > 0; --i) {
257 const int dim = IsColMajor ? i : NumDims - i - 1;
258 const int stride_dim = IsColMajor ? dim : dim + 1;
259 output_offsets[dim] = offset / m_outputStrides[stride_dim];
260 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
262 output_offsets[inner_dim_idx] = offset;
265 DSizes<Index, NumDims> input_offsets = output_offsets;
266 for (
int i = 0; i < NumDims; ++i) {
267 const int dim = IsColMajor ? i : NumDims - i - 1;
268 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
274 Index input_offset = 0;
275 for (
int i = 0; i < NumDims; ++i) {
276 const int dim = IsColMajor ? i : NumDims - i - 1;
277 input_offset += input_offsets[dim] * m_inputStrides[dim];
283 Index output_offset = 0;
284 const DSizes<Index, NumDims> output_strides =
285 internal::strides<Layout>(desc.dimensions());
295 array<BlockIteratorState, NumDims - 1> it;
296 for (
int i = 0; i < NumDims - 1; ++i) {
297 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
299 it[i].size = desc.dimension(dim);
301 it[i].input_stride = m_inputStrides[dim];
302 it[i].input_span = it[i].input_stride * (it[i].size - 1);
304 it[i].output_stride = output_strides[dim];
305 it[i].output_span = it[i].output_stride * (it[i].size - 1);
308 const Index input_inner_dim_size =
309 static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
312 const Index output_size = desc.size();
317 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
321 const Index output_inner_pad_before_size =
322 input_offsets[inner_dim_idx] < 0
323 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
324 output_inner_dim_size)
328 const Index output_inner_copy_size = numext::mini(
330 (output_inner_dim_size - output_inner_pad_before_size),
332 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
333 output_inner_pad_before_size),
336 eigen_assert(output_inner_copy_size >= 0);
340 const Index output_inner_pad_after_size =
341 (output_inner_dim_size - output_inner_copy_size -
342 output_inner_pad_before_size);
345 eigen_assert(output_inner_dim_size ==
346 (output_inner_pad_before_size + output_inner_copy_size +
347 output_inner_pad_after_size));
350 DSizes<Index, NumDims> output_coord = output_offsets;
351 DSizes<Index, NumDims> output_padded;
352 for (
int i = 0; i < NumDims; ++i) {
353 const int dim = IsColMajor ? i : NumDims - i - 1;
354 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
357 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
360 const typename TensorBlock::Storage block_storage =
361 TensorBlock::prepareStorage(desc, scratch);
369 const bool squeeze_writes =
372 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
374 (input_inner_dim_size == output_inner_dim_size);
376 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
379 const Index squeeze_max_coord =
380 squeeze_writes ? numext::mini(
382 static_cast<Index>(m_dimensions[squeeze_dim] -
383 m_padding[squeeze_dim].second),
385 static_cast<Index>(output_offsets[squeeze_dim] +
386 desc.dimension(squeeze_dim)))
387 : static_cast<
Index>(0);
390 for (Index size = 0; size < output_size;) {
392 bool is_padded =
false;
393 for (
int j = 1; j < NumDims; ++j) {
394 const int dim = IsColMajor ? j : NumDims - j - 1;
395 is_padded = output_padded[dim];
396 if (is_padded)
break;
401 size += output_inner_dim_size;
403 LinCopy::template Run<LinCopy::Kind::FillLinear>(
404 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
405 typename LinCopy::Src(0, 0, &m_paddingValue),
406 output_inner_dim_size);
409 }
else if (squeeze_writes) {
411 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
412 size += output_inner_dim_size * squeeze_num;
415 LinCopy::template Run<LinCopy::Kind::Linear>(
416 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
417 typename LinCopy::Src(input_offset, 1, m_impl.data()),
418 output_inner_dim_size * squeeze_num);
424 it[0].count += (squeeze_num - 1);
425 input_offset += it[0].input_stride * (squeeze_num - 1);
426 output_offset += it[0].output_stride * (squeeze_num - 1);
427 output_coord[squeeze_dim] += (squeeze_num - 1);
431 size += output_inner_dim_size;
434 const Index out = output_offset;
436 LinCopy::template Run<LinCopy::Kind::FillLinear>(
437 typename LinCopy::Dst(out, 1, block_storage.data()),
438 typename LinCopy::Src(0, 0, &m_paddingValue),
439 output_inner_pad_before_size);
443 const Index out = output_offset + output_inner_pad_before_size;
444 const Index in = input_offset + output_inner_pad_before_size;
446 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
448 LinCopy::template Run<LinCopy::Kind::Linear>(
449 typename LinCopy::Dst(out, 1, block_storage.data()),
450 typename LinCopy::Src(in, 1, m_impl.data()),
451 output_inner_copy_size);
455 const Index out = output_offset + output_inner_pad_before_size +
456 output_inner_copy_size;
458 LinCopy::template Run<LinCopy::Kind::FillLinear>(
459 typename LinCopy::Dst(out, 1, block_storage.data()),
460 typename LinCopy::Src(0, 0, &m_paddingValue),
461 output_inner_pad_after_size);
465 for (
int j = 0; j < NumDims - 1; ++j) {
466 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
468 if (++it[j].count < it[j].size) {
469 input_offset += it[j].input_stride;
470 output_offset += it[j].output_stride;
471 output_coord[dim] += 1;
472 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
476 input_offset -= it[j].input_span;
477 output_offset -= it[j].output_span;
478 output_coord[dim] -= it[j].size - 1;
479 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
483 return block_storage.AsTensorMaterializedBlock();
486 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data()
const {
return NULL; }
488 #ifdef EIGEN_USE_SYCL
490 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
496 struct BlockIteratorState {
513 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isPaddingAtIndexForDim(
514 Index index,
int dim_index)
const {
515 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
516 index < m_padding[dim_index].first) ||
517 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
518 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
521 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isLeftPaddingCompileTimeZero(
522 int dim_index)
const {
523 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
526 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
bool isRightPaddingCompileTimeZero(
527 int dim_index)
const {
528 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
532 void updateCostPerDimension(TensorOpCost& cost,
int i,
bool first)
const {
533 const double in =
static_cast<double>(m_impl.dimensions()[i]);
534 const double out = in + m_padding[i].first + m_padding[i].second;
537 const double reduction = in / out;
540 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
541 reduction * (1 * TensorOpCost::AddCost<Index>()));
543 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
544 2 * TensorOpCost::MulCost<Index>() +
545 reduction * (2 * TensorOpCost::MulCost<Index>() +
546 1 * TensorOpCost::DivCost<Index>()));
552 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index)
const
554 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
556 const Index initialIndex = index;
557 Index inputIndex = 0;
559 for (
int i = NumDims - 1; i > 0; --i) {
560 const Index firstIdx = index;
561 const Index lastIdx = index + PacketSize - 1;
562 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
563 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
564 const Index lastPaddedRight = m_outputStrides[i+1];
566 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
568 return internal::pset1<PacketReturnType>(m_paddingValue);
570 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
572 return internal::pset1<PacketReturnType>(m_paddingValue);
574 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
576 const Index idx = index / m_outputStrides[i];
577 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
578 index -= idx * m_outputStrides[i];
582 return packetWithPossibleZero(initialIndex);
586 const Index lastIdx = index + PacketSize - 1;
587 const Index firstIdx = index;
588 const Index lastPaddedLeft = m_padding[0].first;
589 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
590 const Index lastPaddedRight = m_outputStrides[1];
592 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
594 return internal::pset1<PacketReturnType>(m_paddingValue);
596 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
598 return internal::pset1<PacketReturnType>(m_paddingValue);
600 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
602 inputIndex += (index - m_padding[0].first);
603 return m_impl.template packet<Unaligned>(inputIndex);
606 return packetWithPossibleZero(initialIndex);
609 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index)
const
611 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
613 const Index initialIndex = index;
614 Index inputIndex = 0;
616 for (
int i = 0; i < NumDims - 1; ++i) {
617 const Index firstIdx = index;
618 const Index lastIdx = index + PacketSize - 1;
619 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
620 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
621 const Index lastPaddedRight = m_outputStrides[i];
623 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
625 return internal::pset1<PacketReturnType>(m_paddingValue);
627 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
629 return internal::pset1<PacketReturnType>(m_paddingValue);
631 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
633 const Index idx = index / m_outputStrides[i+1];
634 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
635 index -= idx * m_outputStrides[i+1];
639 return packetWithPossibleZero(initialIndex);
643 const Index lastIdx = index + PacketSize - 1;
644 const Index firstIdx = index;
645 const Index lastPaddedLeft = m_padding[NumDims-1].first;
646 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
647 const Index lastPaddedRight = m_outputStrides[NumDims-1];
649 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
651 return internal::pset1<PacketReturnType>(m_paddingValue);
653 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
655 return internal::pset1<PacketReturnType>(m_paddingValue);
657 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
659 inputIndex += (index - m_padding[NumDims-1].first);
660 return m_impl.template packet<Unaligned>(inputIndex);
663 return packetWithPossibleZero(initialIndex);
666 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index)
const
668 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[PacketSize];
670 for (
int i = 0; i < PacketSize; ++i) {
671 values[i] = coeff(index+i);
673 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
677 Dimensions m_dimensions;
678 array<Index, NumDims+1> m_outputStrides;
679 array<Index, NumDims> m_inputStrides;
680 TensorEvaluator<ArgType, Device> m_impl;
681 PaddingDimensions m_padding;
683 Scalar m_paddingValue;
685 const Device EIGEN_DEVICE_REF m_device;
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index