diff --git a/src/cl-blas.lisp b/src/cl-blas.lisp index 1daae83..d003f9a 100644 --- a/src/cl-blas.lisp +++ b/src/cl-blas.lisp @@ -222,6 +222,25 @@ do (setf m (max m (abs (aref x i))))) m)) +;;;; Level 2 BLAS: matrix-vector, O(n^2) operations + +;;; gemv + +(defblas gemv ((A 2) (x 1) (y 1) (alpha 0) (beta 0)) + (let* ((n (array-dimension A 1)) + (n-block (* (floor n stride) stride)) + (alpha-vec (simd alpha))) + (declare (fixnum n n-block) + (simd alpha-vec)) + (loop for i fixnum from 0 below (array-dimension A 0) + do (setf (aref y i) + (+ (* beta (aref y i)) + (loop for acc of-type simd = (simd 0) then (simd+ acc (simd* alpha-vec (simd-aref A i j) (aref x j))) + for j fixnum from 0 below n-block by stride + finally (return (the float (simd-horizontal+ acc)))) + (loop for j fixnum from n-block below n + sum (* alpha (aref A i j) (aref x j)) of-type float)))))) + ;;;; Level 3 BLAS: matrix-matrix, O(n^3) operations (defblas transpose ((A 2))