Compare commits

...

2 commits

Author SHA1 Message Date
7c46a4e35a Better simd abs 2025-05-21 23:17:28 -07:00
66b211e969 Don't hardcode stride 2025-05-21 23:17:15 -07:00

View file

@ -90,12 +90,14 @@
(declaim (inline simd-abs-f32.8 simd-abs-f64.4)) (declaim (inline simd-abs-f32.8 simd-abs-f64.4))
(defun simd-abs-f32.8 (pack) (defun simd-abs-f32.8 (pack)
(declare (f32.8 pack)) (declare (f32.8 pack)
(f32.8-sqrt (f32.8* pack pack))) (optimize . #.*optimize-qualities*))
(f32.8-if (f32.8< pack (f32.8 0.0)) (f32.8* pack (f32.8 -1.0)) pack))
(defun simd-abs-f64.4 (pack) (defun simd-abs-f64.4 (pack)
(declare (f64.4 pack)) (declare (f64.4 pack)
(f64.4-sqrt (f64.4* pack pack))) (optimize . #.*optimize-qualities*))
(f64.4-if (f64.4< pack 0) (f64.4* pack -1) pack))
(push '(simd-abs simd-abs-f32.8) *single-float-fn-generic*) (push '(simd-abs simd-abs-f32.8) *single-float-fn-generic*)
(push '(simd-abs simd-abs-f64.4) *double-float-fn-generic*) (push '(simd-abs simd-abs-f64.4) *double-float-fn-generic*)
@ -305,7 +307,7 @@
(defblas gemm-serial ((A 2) (B 2) (C 2) (alpha 0) (beta 0)) (defblas gemm-serial ((A 2) (B 2) (C 2) (alpha 0) (beta 0))
(loop with m fixnum = (array-dimension C 0) (loop with m fixnum = (array-dimension C 0)
with n fixnum = (array-dimension C 1) with n fixnum = (array-dimension C 1)
with r-block fixnum = (* (floor (array-dimension A 1) 8) 8) with r-block fixnum = (* (floor (array-dimension A 1) stride) stride)
with r fixnum = (array-dimension A 1) with r fixnum = (array-dimension A 1)
with Bt of-type (simple-array float 2) = (transpose B) with Bt of-type (simple-array float 2) = (transpose B)
for i fixnum from 0 below m for i fixnum from 0 below m