10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
13 #include "./InternalHeaderCheck.h"
25 template<
typename TargetType,
typename XprType>
26 struct traits<TensorConversionOp<TargetType, XprType> >
29 typedef TargetType Scalar;
30 typedef typename traits<XprType>::StorageKind StorageKind;
31 typedef typename traits<XprType>::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = traits<XprType>::NumDimensions;
35 static constexpr
int Layout = traits<XprType>::Layout;
37 typedef typename TypeConversion<Scalar, typename traits<XprType>::PointerType>::type PointerType;
40 template<
typename TargetType,
typename XprType>
41 struct eval<TensorConversionOp<TargetType, XprType>,
Eigen::Dense>
43 typedef const TensorConversionOp<TargetType, XprType>& type;
46 template<
typename TargetType,
typename XprType>
47 struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type>
49 typedef TensorConversionOp<TargetType, XprType> type;
55 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket,
int SrcCoeffRatio,
int TgtCoeffRatio>
56 struct PacketConverter;
58 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
59 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
60 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
61 PacketConverter(
const TensorEvaluator& impl)
64 template<
int LoadMode,
typename Index>
65 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
66 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
70 const TensorEvaluator& m_impl;
74 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
75 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
76 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
77 PacketConverter(
const TensorEvaluator& impl)
80 template<
int LoadMode,
typename Index>
81 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
82 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
84 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
85 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
86 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
91 const TensorEvaluator& m_impl;
94 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
95 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
96 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
97 PacketConverter(
const TensorEvaluator& impl)
100 template<
int LoadMode,
typename Index>
101 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
102 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
104 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
105 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
106 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
107 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
108 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
113 const TensorEvaluator& m_impl;
116 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket>
117 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
118 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
119 PacketConverter(
const TensorEvaluator& impl)
122 template<
int LoadMode,
typename Index>
123 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
124 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
126 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
127 SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
128 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
129 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
130 SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
131 SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
132 SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
133 SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
134 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
139 const TensorEvaluator& m_impl;
142 template <
typename TensorEvaluator,
typename SrcPacket,
typename TgtPacket,
int TgtCoeffRatio>
143 struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
144 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
145 PacketConverter(
const TensorEvaluator& impl)
146 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
148 template<
int LoadMode,
typename Index>
149 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(
Index index)
const {
150 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
154 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
156 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
158 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
159 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
160 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
161 internal::scalar_cast_op<SrcType, TgtType> converter;
162 EIGEN_ALIGN_MAX
typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
164 for (
int i = 0; i < TgtPacketSize; ++i) {
165 values[i] = converter(m_impl.coeff(index+i));
167 TgtPacket rslt = internal::pload<TgtPacket>(values);
173 const TensorEvaluator& m_impl;
174 const typename TensorEvaluator::Index m_maxIndex;
177 template<
typename TargetType,
typename XprType>
181 typedef typename internal::traits<TensorConversionOp>::Scalar Scalar;
182 typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind;
183 typedef typename internal::traits<TensorConversionOp>::Index Index;
184 typedef typename internal::nested<TensorConversionOp>::type Nested;
185 typedef Scalar CoeffReturnType;
192 const internal::remove_all_t<typename XprType::Nested>&
193 expression()
const {
return m_xpr; }
196 typename XprType::Nested m_xpr;
199 template <
bool SameType,
typename Eval,
typename EvalPo
interType>
struct ConversionSubExprEval {
200 static EIGEN_STRONG_INLINE
bool run(Eval& impl, EvalPointerType) {
201 impl.evalSubExprsIfNeeded(NULL);
206 template <
typename Eval,
typename EvalPo
interType>
struct ConversionSubExprEval<true, Eval, EvalPointerType> {
207 static EIGEN_STRONG_INLINE
bool run(Eval& impl, EvalPointerType data) {
208 return impl.evalSubExprsIfNeeded(data);
212 #ifdef EIGEN_USE_THREADS
213 template <
bool SameType,
typename Eval,
typename EvalPointerType,
214 typename EvalSubExprsCallback>
215 struct ConversionSubExprEvalAsync {
216 static EIGEN_STRONG_INLINE
void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
217 impl.evalSubExprsIfNeededAsync(
nullptr, std::move(done));
221 template <
typename Eval,
typename EvalPointerType,
222 typename EvalSubExprsCallback>
223 struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
224 EvalSubExprsCallback> {
225 static EIGEN_STRONG_INLINE
void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
226 impl.evalSubExprsIfNeededAsync(data, std::move(done));
233 template <
typename SrcType,
typename TargetType,
bool IsSameT>
235 template <
typename ArgType,
typename Device>
236 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
237 internal::scalar_cast_op<SrcType, TargetType> converter;
238 return converter(impl.coeff(index));
242 template <
typename SrcType,
typename TargetType>
243 struct CoeffConv<SrcType, TargetType, true> {
244 template <
typename ArgType,
typename Device>
245 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
246 return impl.coeff(index);
250 template <
typename SrcPacket,
typename TargetPacket,
int LoadMode,
bool ActuallyVectorize,
bool IsSameT>
252 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
253 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
255 static constexpr
int PacketSize = internal::unpacket_traits<TargetPacket>::size;
257 template <
typename ArgType,
typename Device>
258 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
259 internal::scalar_cast_op<SrcType, TargetType> converter;
260 EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
262 for (
int i = 0; i < PacketSize; ++i) {
263 values[i] = converter(impl.coeff(index+i));
265 TargetPacket rslt = internal::pload<TargetPacket>(values);
270 template <
typename SrcPacket,
typename TargetPacket,
int LoadMode,
bool IsSameT>
271 struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
272 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
273 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
275 template <
typename ArgType,
typename Device>
276 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
277 const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
278 const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
279 PacketConverter<TensorEvaluator<ArgType, Device>, SrcPacket, TargetPacket,
280 SrcCoeffRatio, TgtCoeffRatio> converter(impl);
281 return converter.template packet<LoadMode>(index);
285 template <
typename SrcPacket,
typename TargetPacket,
int LoadMode>
286 struct PacketConv<SrcPacket, TargetPacket, LoadMode, false, true> {
287 typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
288 static constexpr
int PacketSize = internal::unpacket_traits<TargetPacket>::size;
290 template <
typename ArgType,
typename Device>
291 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
292 EIGEN_ALIGN_MAX std::remove_const_t<TargetType> values[PacketSize];
293 for (
int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
294 return internal::pload<TargetPacket>(values);
298 template <
typename SrcPacket,
typename TargetPacket,
int LoadMode>
299 struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, true> {
300 template <
typename ArgType,
typename Device>
301 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(
const TensorEvaluator<ArgType, Device>& impl,
Index index) {
302 return impl.template packet<LoadMode>(index);
309 template<
typename TargetType,
typename ArgType,
typename Device>
310 struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
312 typedef TensorConversionOp<TargetType, ArgType> XprType;
313 typedef typename XprType::Index Index;
314 typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions;
315 typedef TargetType Scalar;
316 typedef TargetType CoeffReturnType;
317 typedef internal::remove_all_t<typename internal::traits<ArgType>::Scalar> SrcType;
318 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
319 typedef typename PacketType<SrcType, Device>::type PacketSourceType;
320 static constexpr
int PacketSize = PacketType<CoeffReturnType, Device>::size;
321 static constexpr
bool IsSameType = internal::is_same<TargetType, SrcType>::value;
322 typedef StorageMemory<CoeffReturnType, Device> Storage;
323 typedef typename Storage::Type EvaluatorPointerType;
328 #ifndef EIGEN_USE_SYCL
331 TensorEvaluator<ArgType, Device>::PacketAccess &
332 internal::type_casting_traits<SrcType, TargetType>::VectorizedCast,
334 BlockAccess = TensorEvaluator<ArgType, Device>::BlockAccess,
335 PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
339 static constexpr
int Layout = TensorEvaluator<ArgType, Device>::Layout;
340 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
343 typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
344 typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
346 typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
349 struct TensorConversionOpBlockFactory {
350 template <
typename ArgXprType>
352 typedef TensorConversionOp<TargetType, const ArgXprType> type;
355 template <
typename ArgXprType>
356 typename XprType<ArgXprType>::type expr(
const ArgXprType& expr)
const {
357 return typename XprType<ArgXprType>::type(expr);
361 typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
366 EIGEN_STRONG_INLINE TensorEvaluator(
const XprType& op,
const Device& device)
367 : m_impl(op.expression(), device)
371 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Dimensions& dimensions()
const {
return m_impl.dimensions(); }
373 EIGEN_STRONG_INLINE
bool evalSubExprsIfNeeded(EvaluatorPointerType data)
375 return ConversionSubExprEval<IsSameType, TensorEvaluator<ArgType, Device>, EvaluatorPointerType>::run(m_impl, data);
378 #ifdef EIGEN_USE_THREADS
379 template <
typename EvalSubExprsCallback>
380 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
381 EvaluatorPointerType data, EvalSubExprsCallback done) {
382 ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
383 EvaluatorPointerType,
384 EvalSubExprsCallback>::run(m_impl, data, std::move(done));
388 EIGEN_STRONG_INLINE
void cleanup()
393 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index)
const
395 return internal::CoeffConv<SrcType, TargetType, IsSameType>::run(m_impl,index);
398 template<
int LoadMode>
399 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
400 packet(Index index)
const {
404 const bool Vectorizable =
406 ? TensorEvaluator<ArgType, Device>::PacketAccess
407 : int(TensorEvaluator<ArgType, Device>::PacketAccess) &
408 int(internal::type_casting_traits<SrcType, TargetType>::VectorizedCast);
410 return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
411 Vectorizable, IsSameType>::run(m_impl, index);
414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
415 costPerCoeff(
bool vectorized)
const {
416 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
418 const double SrcCoeffRatio =
419 internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio;
420 const double TgtCoeffRatio =
421 internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio;
422 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
423 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
425 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
429 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
430 internal::TensorBlockResourceRequirements getResourceRequirements()
const {
431 return m_impl.getResourceRequirements();
434 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
435 block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
436 bool =
false)
const {
437 return TensorBlock(m_impl.block(desc, scratch),
438 TensorConversionOpBlockFactory());
441 EIGEN_DEVICE_FUNC EvaluatorPointerType data()
const {
return NULL; }
444 const TensorEvaluator<ArgType, Device>& impl()
const {
return m_impl; }
445 #ifdef EIGEN_USE_SYCL
447 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void bind(cl::sycl::handler &cgh)
const {
453 TensorEvaluator<ArgType, Device> m_impl;
The tensor base class.
Definition: TensorForwardDeclarations.h:58
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition: TensorConversion.h:179
Namespace containing all symbols from the Eigen library.
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index