sycl: Use syclcompat::dp4a (#10267)
* sycl: Use syclcompat::dp4a * Using the syclcompat version allow the compiler to optimize the operation with native function * Update news section * Update CI Windows oneAPI version to 2025.0 * Reword doc * Call syclcompat::dp4a inside dpct::dp4a This reverts commit 90cb61d692d61360b46954a1c7f780bd2e569b73.
This commit is contained in:
parent
1607a5e5b0
commit
5a54af4d4f
4 changed files with 9 additions and 27 deletions
|
@ -15,6 +15,7 @@
|
|||
|
||||
#include <sycl/sycl.hpp>
|
||||
#include <sycl/half_type.hpp>
|
||||
#include <syclcompat/math.hpp>
|
||||
#include <oneapi/mkl.hpp>
|
||||
#include <map>
|
||||
|
||||
|
@ -1830,31 +1831,10 @@ namespace dpct
|
|||
: id);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
sycl::vec<T, 4> extract_and_sign_or_zero_extend4(T val)
|
||||
{
|
||||
return sycl::vec<T, 1>(val)
|
||||
.template as<sycl::vec<
|
||||
std::conditional_t<std::is_signed_v<T>, int8_t, uint8_t>, 4>>()
|
||||
.template convert<T>();
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
using dot_product_acc_t =
|
||||
std::conditional_t<std::is_unsigned_v<T1> && std::is_unsigned_v<T2>,
|
||||
uint32_t, int32_t>;
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
inline auto dp4a(T1 a, T2 b, T3 c)
|
||||
{
|
||||
dot_product_acc_t<T1, T2> res = c;
|
||||
auto va = extract_and_sign_or_zero_extend4(a);
|
||||
auto vb = extract_and_sign_or_zero_extend4(b);
|
||||
res += va[0] * vb[0];
|
||||
res += va[1] * vb[1];
|
||||
res += va[2] * vb[2];
|
||||
res += va[3] * vb[3];
|
||||
return res;
|
||||
return syclcompat::dp4a(a, b, c);
|
||||
}
|
||||
|
||||
struct sub_sat
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue