LISTING 7.2 L7-2.ASM
; Program to illustrate searching through a buffer of a specified
; length until a specified zero byte is encountered.
; A loop unrolled four times and terminated with LOOP is used.
.model small
.stack 100h
.data
; Sample string to search through.
SampleStringlabelbyte
db This is a sample string of a long enough length
db so that raw searching speed can outweigh any
db extra set-up time that may be required.,0
SAMPLE_STRING_LENGTH equ $-SampleString
; User prompt.
Prompt db Enter character to search for:$
; Result status messages.
ByteFoundMsg db 0dh,0ah
db Specified byte found.,0dh,0ah,$
ZeroByteFoundMsg db 0dh,0ah
db Zero byte encountered., 0dh, 0ah, $
NoByteFoundMsg db 0dh,0ah
db Buffer exhausted with no match., 0dh, 0ah, $
; Table of initial, possibly partial loop entry points for
; SearchMaxLength.
SearchMaxLengthEntryTable labelword
dw SearchMaxLengthEntry4
dw SearchMaxLengthEntry1
dw SearchMaxLengthEntry2
dw SearchMaxLengthEntry3
.code
Start proc near
mov ax,@data ;point to standard data segment
mov ds,ax
mov dx,offset Prompt
mov ah,9 ;DOS print string function
int 21h ;prompt the user
mov ah,1 ;DOS get key function
int 21h ;get the key to search for
mov ah,al ;put character to search for in AH
mov cx,SAMPLE_STRING_LENGTH ;# of bytes to search
mov si,offset SampleString ;point to buffer to search
call SearchMaxLength ;search the buffer
mov dx,offset ByteFoundMsg ;assume we found the byte
jc PrintStatus ;we did find the byte
;we didnt find the byte, figure out
;whether we found a zero byte or
;ran out of buffer
mov dx,offset NoByteFoundMsg
;assume we didnt find a zero byte
jcxz PrintStatus ;we didnt find a zero byte
mov dx,offset ZeroByteFoundMsg ;we found a zero byte
PrintStatus:
mov ah,9 ;DOS print string function
int 21h ;report status
mov ah,4ch ;return to DOS
int 21h
Startendp
; Function to search a buffer of a specified length until either a
; specified byte or a zero byte is encountered.
; Input:
; AH = character to search for
; CX = maximum length to be searched (must be > 0)
; DS:SI = pointer to buffer to be searched
; Output:
; CX = 0 if and only if we ran out of bytes without finding
; either the desired byte or a zero byte
; DS:SI = pointer to searched-for byte if found, otherwise byte
; after zero byte if found, otherwise byte after last
; byte checked if neither searched-for byte nor zero
; byte is found
; Carry Flag = set if searched-for byte found, reset otherwise
SearchMaxLength proc near
cld
mov bx,cx
add cx,3 ;calculate the maximum # of passes
shr cx,1 ;through the loop, which is
shr cx,1 ;unrolled 4 times
and bx,3 ;calculate the index into the entry
;point table for the first,
;possibly partial loop
shl bx,1 ;prepare for a word-sized look-up
jmp SearchMaxLengthEntryTable[bx]
;branch into the unrolled loop to do
;the first, possibly partial loop
SearchMaxLengthLoop:
SearchMaxLengthEntry4:
lodsb ;get the next byte
cmp al,ah ;is this the byte we want?
jz ByteFound ;yes, were done with success
and al,al ;is this the terminating 0 byte?
jz ByteNotFound ;yes, were done with failure
SearchMaxLengthEntry3:
lodsb ;get the next byte
cmp al,ah ;is this the byte we want?
jz ByteFound ;yes, were done with success
and al,al ;is this the terminating 0 byte?
jz ByteNotFound ;yes, were done with failure
SearchMaxLengthEntry2:
lodsb ;get the next byte
cmp al,ah ;is this the byte we want?
jz ByteFound ;yes, were done with success
and al,al ;is this the terminating 0 byte?
jz ByteNotFound ;yes, were done with failure
SearchMaxLengthEntry1:
lodsb ;get the next byte
cmp al,ah ;is this the byte we want?
jz ByteFound ;yes, were done with success
and al,al ;is this the terminating 0 byte?
jz ByteNotFound ;yes, were done with failure
loop SearchMaxLengthLoop ;its neither, so check the next
; four bytes, if any
ByteNotFound:
clc ;return not found status
ret
ByteFound:
dec si ;point back to the location at which
; we found the searched-for byte
stc ;return found status
ret
SearchMaxLengthendp
end Start
How much difference? Listing 7.2 runs in 121 µs40 percent faster than Listing 7.1, even though Listing 7.2 still uses LOOP rather than DEC CX/JNZ. (The loop in Listing 7.2 could be unrolled further, too; its just a question of how much more memory you want to trade for ever-decreasing performance benefits.) Thats typical of local optimization; it wont often yield the order-of-magnitude improvements that algorithmic improvements can produce, but it can get you a critical 50 percent or 100 percent improvement when youve exhausted all other avenues.
| The point is simply this: You can gain far more by stepping back a bit and thinking of the fastest overall way for the CPU to perform a task than you can by saving a cycle here or there using different instructions. Try to think at the level of sequences of instructions rather than individual instructions, and learn to treat x86 instructions as building blocks with unique characteristics rather than as instructions dedicated to specific tasks.
|
Graphics Programming Black Book © 2001 Michael Abrash