CUDA: optimize and refactor MMQ (#8416)
* CUDA: optimize and refactor MMQ * explicit q8_1 memory layouts, add documentation
This commit is contained in:
parent
a977c11544
commit
808aba3916
5 changed files with 867 additions and 687 deletions
|
|
@ -70,6 +70,10 @@ struct mma_int_A_I16K8 {
|
|||
}
|
||||
#endif // defined(INT8_MMA_AVAILABLE)
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void load_low(const int * __restrict__ xs0, const int & stride) {
|
||||
((mma_int_A_I16K4 *) x)[0].load(xs0, stride);
|
||||
}
|
||||
};
|
||||
|
||||
struct mma_int_B_J8K4 {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue