Add a handful of initial blas functions
This commit is contained in:
commit
f4696a2251
5 changed files with 412 additions and 0 deletions
17
.gitignore
vendored
Normal file
17
.gitignore
vendored
Normal file
|
@ -0,0 +1,17 @@
|
|||
*.FASL
|
||||
*.fasl
|
||||
*.lisp-temp
|
||||
*.dfsl
|
||||
*.pfsl
|
||||
*.d64fsl
|
||||
*.p64fsl
|
||||
*.lx64fsl
|
||||
*.lx32fsl
|
||||
*.dx64fsl
|
||||
*.dx32fsl
|
||||
*.fx64fsl
|
||||
*.fx32fsl
|
||||
*.sx64fsl
|
||||
*.sx32fsl
|
||||
*.wx64fsl
|
||||
*.wx32fsl
|
14
cl-blas.asd
Normal file
14
cl-blas.asd
Normal file
|
@ -0,0 +1,14 @@
|
|||
(asdf:defsystem #:cl-blas
|
||||
:description "A Common Lisp implementation of the BLAS."
|
||||
:version "0.0.1"
|
||||
:author "Dominik Martinez"
|
||||
:license "GPLv3"
|
||||
:depends-on (#:alexandria
|
||||
#:serapeum
|
||||
#:lparallel
|
||||
#:sb-simd)
|
||||
:components ((:module "src"
|
||||
:components
|
||||
((:file "package")
|
||||
(:file "cl-blas")
|
||||
(:file "benchmarks")))))
|
17
src/benchmarks.lisp
Normal file
17
src/benchmarks.lisp
Normal file
|
@ -0,0 +1,17 @@
|
|||
(in-package :cl-blas)
|
||||
|
||||
(defmacro benchmark-runtime ((iterations) &body body)
|
||||
`(let* ((total-time (loop for i from 1 to ,iterations
|
||||
sum (let ((start-time (get-internal-real-time)))
|
||||
,@body
|
||||
(- (get-internal-real-time) start-time)))))
|
||||
(float (/ total-time ,iterations 1000000))))
|
||||
|
||||
(defun benchmark-gemm (size)
|
||||
(let* ((A (make-random-array `(,size ,size)))
|
||||
(B (make-random-array `(,size ,size)))
|
||||
(C (make-random-array `(,size ,size)))
|
||||
(alpha 1.0)
|
||||
(beta 1.0))
|
||||
(format t "running benchmark-mul~%")
|
||||
(benchmark-runtime (5) (gemm A B C alpha beta))))
|
353
src/cl-blas.lisp
Normal file
353
src/cl-blas.lisp
Normal file
|
@ -0,0 +1,353 @@
|
|||
(in-package :cl-blas)
|
||||
|
||||
(eval-when (:compile-toplevel :load-toplevel :execute)
|
||||
(defparameter *optimize-qualities* '((speed 3)
|
||||
(safety 0)
|
||||
(debug 0)))
|
||||
(defparameter +single-float-simd-generic+
|
||||
'((simd f32.8)
|
||||
(simd-aref f32.8-aref)
|
||||
(simd+ f32.8+)
|
||||
(simd* f32.8*)
|
||||
(simd-horizontal+ f32.8-horizontal+)
|
||||
(stride 8)
|
||||
(float single-float)
|
||||
(0f 0.0)))
|
||||
(defparameter +double-float-simd-generic+
|
||||
'((simd f64.4)
|
||||
(simd-aref f64.4-aref)
|
||||
(simd+ f64.4+)
|
||||
(simd* f64.4*)
|
||||
(simd-horizontal+ f64.4-horizontal+)
|
||||
(stride 4)
|
||||
(float double-float)
|
||||
(0f 0.0d0)))
|
||||
(defun parse-type-declarations (args type)
|
||||
(mapcar (lambda (arg)
|
||||
(if (= (second arg) 0)
|
||||
`(,type ,(first arg))
|
||||
`((simple-array ,type ,(second arg)) ,(first arg))))
|
||||
args))
|
||||
(defun parse-body (body mapping)
|
||||
(if (constantp body)
|
||||
body
|
||||
(mapcar
|
||||
(lambda (el)
|
||||
(typecase el
|
||||
(cons (parse-body el mapping))
|
||||
(symbol (let* ((match (second (assoc el mapping)))
|
||||
(sym (if match match el)))
|
||||
sym))
|
||||
(t el)))
|
||||
body)))
|
||||
(defun parse-cond-clauses (type-decs)
|
||||
`(and ,@(mapcar (lambda (type-dec)
|
||||
`(typep ,(second type-dec) ',(first type-dec)))
|
||||
type-decs))))
|
||||
|
||||
(defparameter +num-cpus+ (serapeum:count-cpus))
|
||||
|
||||
(defun random-range (min max)
|
||||
(+ (random (- max min)) min))
|
||||
|
||||
(defun make-random-array (dimensions &key (min -100) (max 100) (float-type 'single-float))
|
||||
(declare ((or fixnum (cons fixnum)) dimensions)
|
||||
(integer min max))
|
||||
(let ((arr (make-array dimensions :element-type float-type)))
|
||||
(loop for i from 0 below (array-total-size arr)
|
||||
do (setf (row-major-aref arr i) (coerce (random-range min max) float-type)))
|
||||
arr))
|
||||
|
||||
(defun transpose (A)
|
||||
"Transpose A of type (simple-array single-float 2)"
|
||||
(declare ((simple-array single-float 2) A)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with m fixnum = (array-dimension A 0)
|
||||
with n fixnum = (array-dimension A 1)
|
||||
with B of-type (simple-array single-float 2) = (make-array `(,n ,m) :element-type 'single-float)
|
||||
for i fixnum from 0 below m
|
||||
do (loop for j fixnum from 0 below n
|
||||
do (setf (aref B j i) (aref A i j)))
|
||||
finally (return B)))
|
||||
|
||||
(defmacro defblas (name args &body body)
|
||||
;; ARGS should be of the form ((NAME RANK)+).
|
||||
;; If RANK is 0, then this NAME is a scalar
|
||||
(let* ((single-float-type-declarations (parse-type-declarations args 'single-float))
|
||||
(double-float-type-declarations (parse-type-declarations args 'double-float))
|
||||
(single-float-name (alexandria:symbolicate name '-single-float))
|
||||
(double-float-name (alexandria:symbolicate name '-double-float))
|
||||
(lambda-list (mapcar #'first args)))
|
||||
`(progn
|
||||
(defun ,single-float-name ,lambda-list
|
||||
(declare (optimize . #.*optimize-qualities*)
|
||||
,@single-float-type-declarations)
|
||||
,@(parse-body body +single-float-simd-generic+))
|
||||
(defun ,double-float-name ,lambda-list
|
||||
(declare (optimize . #.*optimize-qualities*)
|
||||
,@double-float-type-declarations)
|
||||
,@(parse-body body +double-float-simd-generic+))
|
||||
(defun ,name ,lambda-list
|
||||
(declare (optimize . #.*optimize-qualities*))
|
||||
(cond
|
||||
(,(parse-cond-clauses single-float-type-declarations)
|
||||
(,single-float-name ,@lambda-list))
|
||||
(,(parse-cond-clauses double-float-type-declarations)
|
||||
(,double-float-name ,@lambda-list)))))))
|
||||
|
||||
;;;; Level 1 BLAS: vector, O(n) operations
|
||||
|
||||
;;; axpy
|
||||
|
||||
(defblas axpy ((alpha 0) (x 1) (y 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride)))
|
||||
(declare (fixnum n n-block))
|
||||
(loop with alpha-vec of-type simd = (simd alpha)
|
||||
for i fixnum from 0 below n-block by 8
|
||||
do (setf (simd-aref y i)
|
||||
(simd+ (simd-aref y i)
|
||||
(simd* alpha-vec
|
||||
(simd-aref x i)))))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf (aref y i) (+ (aref y i) (* alpha (aref x i)))))))
|
||||
|
||||
;;; scal
|
||||
|
||||
(defblas scal ((alpha 0) (x 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride)))
|
||||
(declare (fixnum n n-block))
|
||||
(loop with alpha-vec of-type simd = (simd alpha)
|
||||
for i fixnum from 0 below n-block by 8
|
||||
do (setf (simd-aref x i)
|
||||
(simd* alpha-vec
|
||||
(simd-aref x i))))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf (aref x i) (* alpha (aref x i))))))
|
||||
|
||||
;;; copy
|
||||
|
||||
(defblas copy ((x 1) (y 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride)))
|
||||
(declare (fixnum n n-block))
|
||||
(loop for i fixnum from 0 below n-block by 8
|
||||
do (setf (simd-aref y i) (simd-aref x i)))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf (aref y i) (aref x i)))))
|
||||
|
||||
;;; swap
|
||||
|
||||
(defblas swap ((x 1) (y 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride)))
|
||||
(declare (fixnum n n-block))
|
||||
(loop with tmp of-type simd = (simd 0)
|
||||
for i fixnum from 0 below n-block by stride
|
||||
do (setf tmp (simd-aref x i)
|
||||
(simd-aref x i) (simd-aref y i)
|
||||
(simd-aref y i) tmp))
|
||||
(loop with tmp of-type float
|
||||
for i fixnum from n-block below n
|
||||
do (setf tmp (aref x i)
|
||||
(aref x i) (aref y i)
|
||||
(aref y i) tmp))))
|
||||
|
||||
;;; dot
|
||||
|
||||
(defblas dot ((x 1) (y 1))11
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride))
|
||||
(sum 0f))
|
||||
(declare (fixnum n n-block)
|
||||
(float sum))
|
||||
(loop with acc of-type simd = (simd 0)
|
||||
for i fixnum from 0 below n-block by stride
|
||||
do (setf acc (simd+ acc (simd* (simd-aref x i) (simd-aref y i))))
|
||||
finally (setf sum (simd-horizontal+ acc)))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf sum (+ sum (* (aref x i) (aref y i)))))
|
||||
sum))
|
||||
|
||||
;;; nrm2
|
||||
|
||||
(defblas nrm2 ((x 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride))
|
||||
(norm 0f))
|
||||
(declare (fixnum n n-block)
|
||||
(float norm))
|
||||
(loop with acc of-type simd = (simd 0)
|
||||
for i fixnum from 0 below n-block by stride
|
||||
for sel of-type simd = (simd-aref x i)
|
||||
do (setf acc (simd+ acc (simd* sel sel)))
|
||||
finally (setf norm (simd-horizontal+ acc)))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf norm (+ norm (expt (aref x i) 2))))
|
||||
(the float (sqrt norm))))
|
||||
|
||||
;;; asum
|
||||
|
||||
(defblas asum ((x 1))
|
||||
(let* ((n (array-dimension x 0))
|
||||
(n-block (* (floor n stride) stride))
|
||||
(norm 0f))
|
||||
(declare (fixnum n n-block)
|
||||
(float norm))
|
||||
(loop with acc of-type simd = (simd 0)
|
||||
for i fixnum from 0 below n-block by stride
|
||||
for sel of-type simd = (simd-aref x i)
|
||||
do (setf acc (simd+ acc (simd* sel sel)))
|
||||
finally (setf norm (simd-horizontal+ acc)))
|
||||
(loop for i fixnum from n-block below n
|
||||
do (setf norm (+ norm (expt (aref x i) 2))))
|
||||
norm))
|
||||
|
||||
;;;; Level 3 BLAS: matrix-matrix, O(n^3) operations
|
||||
|
||||
(defun transpose-single-float (A)
|
||||
(declare ((simple-array single-float 2) A)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with m fixnum = (array-dimension A 0)
|
||||
with n fixnum = (array-dimension A 1)
|
||||
with B of-type (simple-array single-float 2) = (make-array `(,n ,m) :element-type 'single-float)
|
||||
for i fixnum from 0 below m
|
||||
do (loop for j fixnum from 0 below n
|
||||
do (setf (aref B j i) (aref A i j)))
|
||||
finally (return B)))
|
||||
|
||||
(defun transpose-double-float (A)
|
||||
(declare ((simple-array double-float 2) A)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with m fixnum = (array-dimension A 0)
|
||||
with n fixnum = (array-dimension A 1)
|
||||
with B of-type (simple-array double-float 2) = (make-array `(,n ,m) :element-type 'double-float)
|
||||
for i fixnum from 0 below m
|
||||
do (loop for j fixnum from 0 below n
|
||||
do (setf (aref B j i) (aref A i j)))
|
||||
finally (return B)))
|
||||
|
||||
;;; gemm
|
||||
|
||||
(declaim (inline gemm-row-dot-single-float gemm-row-dot-double-float))
|
||||
(defun gemm-row-dot-single-float (A Bt C alpha beta i n r r-block)
|
||||
(declare ((simple-array single-float 2) A Bt C)
|
||||
(single-float alpha beta)
|
||||
(fixnum i n r r-block)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with alpha-vec of-type f32.8 = (f32.8 alpha)
|
||||
for j fixnum from 0 below n
|
||||
do (setf (aref C i j)
|
||||
(+ (* beta (aref C i j))
|
||||
(loop for acc of-type f32.8 = (f32.8 0) then (f32.8+ acc (f32.8* alpha-vec (f32.8-aref A i k) (f32.8-aref Bt j k)))
|
||||
for k fixnum from 0 below r-block by 8
|
||||
finally (return (the single-float (f32.8-horizontal+ acc))))
|
||||
(loop for k fixnum from r-block below r
|
||||
sum (* alpha (aref A i k) (aref Bt j k)) of-type single-float)))))
|
||||
|
||||
(defun gemm-row-dot-double-float (A Bt C alpha beta i n r r-block)
|
||||
(declare ((simple-array double-float 2) A Bt C)
|
||||
(double-float alpha beta)
|
||||
(fixnum i n r r-block)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with alpha-vec of-type f64.4 = (f64.4 alpha)
|
||||
for j fixnum from 0 below n
|
||||
do (setf (aref C i j)
|
||||
(+ (* beta (aref C i j))
|
||||
(loop for acc of-type f64.4 = (f64.4 0) then (f64.4+ acc (f64.4* alpha-vec (f64.4-aref A i k) (f64.4-aref Bt j k)))
|
||||
for k fixnum from 0 below r-block by 4
|
||||
finally (return (the double-float (f64.4-horizontal+ acc))))
|
||||
(loop for k fixnum from r-block below r
|
||||
sum (* alpha (aref A i k) (aref Bt j k)) of-type double-float)))))
|
||||
|
||||
(defun gemm-parallel-single-float (A B C alpha beta)
|
||||
(declare ((simple-array single-float 2) A B C)
|
||||
(single-float alpha beta)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(setf lparallel:*kernel* (lparallel:make-kernel +num-cpus+))
|
||||
(let ((channel (lparallel:make-channel)))
|
||||
(loop with m fixnum = (array-dimension C 0)
|
||||
with n fixnum = (array-dimension C 1)
|
||||
with r-block fixnum = (* (floor (array-dimension A 1) 8) 8)
|
||||
with r fixnum = (array-dimension A 1)
|
||||
with Bt of-type (simple-array single-float 2) = (transpose-single-float B)
|
||||
for i fixnum from 0 below m
|
||||
do (lparallel:submit-task channel #'gemm-row-dot-single-float A Bt C alpha beta i n r r-block))
|
||||
(loop for i from 1 to (array-dimension C 0)
|
||||
do (lparallel:receive-result channel)))
|
||||
(lparallel:end-kernel)
|
||||
nil)
|
||||
|
||||
(defun gemm-parallel-double-float (A B C alpha beta)
|
||||
(declare ((simple-array double-float 2) A B C)
|
||||
(double-float alpha beta)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(setf lparallel:*kernel* (lparallel:make-kernel +num-cpus+))
|
||||
(let ((channel (lparallel:make-channel)))
|
||||
(loop with m fixnum = (array-dimension C 0)
|
||||
with n fixnum = (array-dimension C 1)
|
||||
with r-block fixnum = (* (floor (array-dimension A 1) 4) 4)
|
||||
with r fixnum = (array-dimension A 1)
|
||||
with Bt of-type (simple-array double-float 2) = (transpose-double-float B)
|
||||
for i fixnum from 0 below m
|
||||
do (lparallel:submit-task channel #'gemm-row-dot-double-float A Bt C alpha beta i n r r-block))
|
||||
(loop for i from 1 to (array-dimension C 0)
|
||||
do (lparallel:receive-result channel)))
|
||||
(lparallel:end-kernel)
|
||||
nil)
|
||||
|
||||
(defun gemm-serial-single-float (A B C alpha beta)
|
||||
(declare ((simple-array single-float 2) A B C)
|
||||
(single-float alpha beta)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with m fixnum = (array-dimension C 0)
|
||||
with n fixnum = (array-dimension C 1)
|
||||
with r-block fixnum = (* (floor (array-dimension A 1) 8) 8)
|
||||
with r fixnum = (array-dimension A 1)
|
||||
with Bt of-type (simple-array single-float 2) = (transpose-single-float B)
|
||||
for i fixnum from 0 below m
|
||||
do (gemm-row-dot-single-float A Bt C alpha beta i n r r-block)))
|
||||
|
||||
(defun gemm-serial-double-float (A B C alpha beta)
|
||||
(declare ((simple-array double-float 2) A B C)
|
||||
(double-float alpha beta)
|
||||
(optimize . #.*optimize-qualities*))
|
||||
(loop with m fixnum = (array-dimension C 0)
|
||||
with n fixnum = (array-dimension C 1)
|
||||
with r-block fixnum = (* (floor (array-dimension A 1) 4) 4)
|
||||
with r fixnum = (array-dimension A 1)
|
||||
with Bt of-type (simple-array double-float 2) = (transpose-double-float B)
|
||||
for i fixnum from 0 below m
|
||||
do (gemm-row-dot-double-float A Bt C alpha beta i n r r-block)))
|
||||
|
||||
(defun gemm-single-float (A B C alpha beta)
|
||||
(declare ((simple-array single-float 2) A B C)
|
||||
(single-float alpha beta)
|
||||
(optimize (speed 3)))
|
||||
(if (< (array-total-size C) 512)
|
||||
(gemm-serial-single-float A B C alpha beta)
|
||||
(gemm-parallel-single-float A B C alpha beta)))
|
||||
|
||||
(defun gemm-double-float (A B C alpha beta)
|
||||
(declare ((simple-array double-float 2) A B C)
|
||||
(double-float alpha beta)
|
||||
(optimize (speed 3)))
|
||||
(if (< (array-total-size C) 512)
|
||||
(gemm-serial-double-float A B C alpha beta)
|
||||
(gemm-parallel-double-float A B C alpha beta)))
|
||||
|
||||
(defun gemm (A B C alpha beta)
|
||||
(cond
|
||||
((and (typep A '(simple-array single-float 2))
|
||||
(typep B '(simple-array single-float 2))
|
||||
(typep C '(simple-array single-float 2))
|
||||
(typep alpha 'single-float)
|
||||
(typep beta 'single-float))
|
||||
(gemm-single-float A B C alpha beta))
|
||||
((and (typep A '(simple-array double-float 2))
|
||||
(typep B '(simple-array double-float 2))
|
||||
(typep C '(simple-array double-float 2))
|
||||
(typep alpha 'double-float)
|
||||
(typep beta 'double-float))
|
||||
(gemm-double-float A B C alpha beta))))
|
11
src/package.lisp
Normal file
11
src/package.lisp
Normal file
|
@ -0,0 +1,11 @@
|
|||
(defpackage :cl-blas
|
||||
(:use #:cl #:sb-simd-avx)
|
||||
(:export #:axpy
|
||||
#:scal
|
||||
#:copy
|
||||
#:swap
|
||||
#:dot
|
||||
#:nrm2
|
||||
#:asum
|
||||
#:gemm))
|
||||
|
Loading…
Add table
Reference in a new issue