; boyer-moore-patch.lisp ; ; Make the case-matters optional arg to compute-bm-tables work. (in-package :ccl) (eval-when (:compile-toplevel :execute) (let ((*warn-if-redefine* nil) (*warn-if-redefine-kernel* nil)) (defmacro %char-code-upcase (char-code) (let ((c (gensym))) `(the fixnum (let ((,c ,char-code)) (declare (fixnum ,c)) (if (and (<= (char-code #\a) ,c) (<= ,c (char-code #\z))) (the fixnum (+ ,c (- (char-code #\A) (char-code #\a)))) ,c))))) )) (let ((*warn-if-redefine* nil) (*warn-if-redefine-kernel* nil)) (defstruct bm-tables string ; the search string as a vector of character codes len ; the length of the search string match ; index in string -> shift for last chars match mismatch ; char -> shift for last char mismatch case-matters ; boolean, true if case matters ) (defun compute-bm-tables (string &optional case-matters) (setq string (ensure-simple-string (if case-matters string (string-upcase string)))) (let* ((len (length string)) (len-1 (1- len)) (len-2 (1- len-1)) (mismatch (make-array 256 :element-type t :initial-element len)) (match (make-array (max 0 len-1) :element-type t)) (pred (if case-matters #'char= #'char-equal))) (declare (fixnum len len-1 len-2)) ; compute mismatch table. ; mismatch[i] = how far to shift if there is a mismatch on the first ; compare (with string[len-1]) and the character in the text is (code-char i) (dotimes (i len-1) (declare (fixnum i)) (setf (aref mismatch (char-code (schar string i))) (- len-1 i))) ; Compute match table ; match[i] = how far to shift if there is a mismatch in the ith position ; of the search string (i < len-1). (dotimes (i len-1) ; i is mismatch position (declare (fixnum i)) (setf (aref match i) (block match (do ((end len-2 (1- end))) ((< end 0) len) (declare (fixnum end)) (do ((j len-1 (1- j)) (k end (1- k))) ((< k 0) (return-from match (- len-1 end))) (declare (fixnum j k)) (when (eql j i) (if (not (funcall pred (schar string j) (schar string k))) (return-from match (- len-1 end)) (return))) (unless (funcall pred (schar string j) (schar string k)) (return))))))) (make-bm-tables :string (map 'vector #'char-code string) :len len :mismatch mismatch :match match :case-matters case-matters))) ; Search array from start to end for the string in the bm-tables descriptor (defun bm-search-array (bm-tables array start end) (declare (fixnum start end) (type macptr array)) (declare (optimize (speed 3) (safety 0))) (let* ((string (bm-tables-string bm-tables)) (len (bm-tables-len bm-tables)) (len-1 (1- len)) (match (bm-tables-match bm-tables)) (mismatch (bm-tables-mismatch bm-tables)) (i (+ start len-1)) (char-code 0)) (declare (fixnum len len-1 i char-code)) (macrolet ((do-it () `(loop (when (>= i end) (return nil)) (let ((array-idx i) (string-idx len-1)) (declare (fixnum array-idx string-idx)) (if (not (eql (the fixnum (svref string string-idx)) (setq char-code (array-ref array array-idx)))) (incf i (the fixnum (svref mismatch char-code))) (loop (when (< (decf string-idx) 0) (return-from bm-search-array (the fixnum (- i len-1)))) (decf array-idx) (when (not (eql (the fixnum (svref string string-idx)) (array-ref array array-idx))) (return (the fixnum (incf i (the fixnum (svref match string-idx)))))))))))) (if (bm-tables-case-matters bm-tables) (macrolet ((array-ref (array index) `(%get-unsigned-byte ,array ,index))) (do-it)) (macrolet ((array-ref (array index) `(%char-code-upcase (%get-unsigned-byte ,array ,index)))) (do-it)))))) )