Previous Table of Contents Next


Listings 8.5 and 8.6 together go the final step and change the rules in favor of assembly language. Listing 8.5 creates the same list of linked blocks as Listing 8.1. However, instead of storing an array of structures within each block, it stores two arrays in each block, one consisting of ID numbers and the other consisting of the corresponding values, as shown in Figure 8.3. No information is lost; the data is merely rearranged.

LISTING 8.5 L8-5.C

/* Program to search an array spanning a linked list of variable-
   sized blocks, for all entries with a specified ID number,
   and return the average of the values of all such entries. Each of
   the variable-sized blocks may contain any number of data entries,
   stored in the form of two separate arrays, one for ID numbers and
   one for values. */

#include <stdio.h>
#ifdef __TURBOC__
#include <alloc.h>
#else
#include <malloc.h>
#endif

void main(void);
void exit(int);
extern unsigned int FindIDAverage2(unsigned int,
                                   struct BlockHeader *);


Figure 8.3
  Linked array storage format (version 2).

/* Structure that starts each variable-sized block */
struct BlockHeader {
   struct BlockHeader *NextBlock; /* Pointer to next block, or NULL
                                     if this is the last block in the
                                     linked list */
   unsigned int BlockCount;       /* The number of DataElement entries
                                     in this variable-sized block */
};

void main(void) {
   int i,j;
   unsigned int IDToFind;
   struct BlockHeader *BaseArrayBlockPointer,*WorkingBlockPointer;
   int *WorkingDataPointer;
   struct BlockHeader **LastBlockPointer;

   printf(”ID # for which to find average: “);
   scanf(”%d”,&IDToFind);

   /* Build an array across 5 blocks, for testing */
   /* Anchor the linked list to BaseArrayBlockPointer */
   LastBlockPointer = &BaseArrayBlockPointer;
   /* Create 5 blocks of varying sizes */
   for (i = 1; i < 6; i++) {
      /* Try to get memory for the next block */
      if ((WorkingBlockPointer =
          (struct BlockHeader *) malloc(sizeof(struct BlockHeader) +
           sizeof(int) * 2 * i * 10)) == NULL) {
         exit(1);
      }
      /* Set the number of data elements in this block */
      WorkingBlockPointer->BlockCount = i * 10;
      /* Link the new block into the chain */
      *LastBlockPointer = WorkingBlockPointer;
      /* Point to the first data field */
      WorkingDataPointer = (int *) ((char *)WorkingBlockPointer +
            sizeof(struct BlockHeader));
      /* Fill the data fields with ID numbers and values */
      for (j = 0; j < (i * 10); j++, WorkingDataPointer++) {
         *WorkingDataPointer = j;
         *(WorkingDataPointer + i * 10) = i * 1000 + j;
      }
      /* Remember where to set link from this block to the next */
      LastBlockPointer = &WorkingBlockPointer->NextBlock;
   }
   /* Set the last block’s “next block” pointer to NULL to indicate
      that there are no more blocks */
   WorkingBlockPointer->NextBlock = NULL;
   printf(”Average of all elements with ID %d: %u\n”,
         IDToFind, FindIDAverage2(IDToFind, BaseArrayBlockPointer));
   exit(0);
}

LISTING 8.6 L8-6.ASM

; Alternative optimized assembly language version of FindIDAverage
; requires data organized as two arrays within each block rather
; than as an array of two-value element structures. This allows the
; use of REP SCASW for ID searching.

SearchedForIDequ4               ;Passed parameter offsets in the
BlockPointerequ6                ; stack frame (skip over pushed BP
                                ; and the return address)
NextBlockequ0                   ;Field offsets in struct BlockHeader
BlockCountequ2
BLOCK_HEADER_SIZEequ4           ;Number of bytes in struct BlockHeader

        .model  small
        .code
        public  _FindIDAverage2
_FindIDAverage2 proc    near
        push    bp              ;Save caller’s stack frame
        mov     bp,sp           ;Point to our stack frame
        push    di              ;Preserve C register variables
        push    si
        mov     di,ds           ;Prepare for SCASW
        mov     es,di
        cld
        mov     si,[bp+BlockPointer]    ;Pointer to first block
        mov     ax,[bp+SearchedForID]   ;ID we’re looking for
        sub     dx,dx                       ;IDMatchSum = 0
        mov     bp,dx                       ;IDMatchCount = 0
                                            ;***stack frame no longer available***
; Search through all the linked blocks until the last block
; (marked with a NULL pointer to the next block) has been searched.
BlockLoop:
; Search through all the DataElement entries within this block
; and accumulate data from all that match the desired ID.
        mov     cx,[si+BlockCount]
        jcxz    DoNextBlock;Skip this block if there’s no data
                                   ; to search through
        mov     bx,cx              ;We’ll use BX to point to the
        shl     bx,1               ; corresponding value entry in the
; case of an ID match (BX is the
; length in bytes of the ID array)
; Point to the first DataElement entry within this block.
        lea     di,[si+BLOCK_HEADER_SIZE]
IntraBlockLoop:
        repnz   scasw              ;Search for the ID
        jnz     DoNextBlock        ;No match, the block is done
        inc     bp                 ;We have a match; IDMatchCount++;
        add     dx,[di+bx-2];IDMatchSum += DataPointer->Value;
; (SCASW has advanced DI 2 bytes)
        and     cx,cx              ;Is there more data to search through?
        jnz     IntraBlockLoop     ;yes
; Point to the next block and continue if that pointer isn’t NULL.
DoNextBlock:
        mov     si,[si+NextBlock] ;Get pointer to the next block
        and     si,si           ;Is it a NULL pointer?
        jnz     BlockLoop       ;No, continue
; Calculate the average of all matches.
        sub     ax,ax           ;Assume we found no matches
        and     bp,bp
        jz      Done            ;We didn’t find any matches, return 0
        xchg    ax,dx           ;Prepare for division
        div     bp              ;Return IDMatchSum / IDMatchCount
Done:   pop     si              ;Restore C register variables
        pop     di
        pop     bp              ;Restore caller’s stack frame
        ret
_FindIDAverage2 ENDP
        end

The whole point of this rearrangement is to allow us to use REP SCASW to search through each block, and that’s exactly what FindIDAverage2 in Listing 8.6 does. The result: Listing 8.6 calculates the average about three times as fast as the original C implementation and more than twice as fast as Listing 8.4, heavily optimized as the latter code is.

I trust you get the picture. The sort of instruction-by-instruction optimization that so many of us love to do as a kind of puzzle is fun, but compilers can do it nearly as well as you can, and in the future will surely do it better. What a compiler can’t do is tie together the needs of the program specification on the high end and the processor on the low end, resulting in critical code that runs just about as fast as the hardware permits. The only software that can do that is located north of your sternum and slightly aft of your nose. Dust it off and put it to work—and your code will never again be confused with anything by Hamilton, Joe, Frank, eynolds or Bo Donaldson and the Heywoods.


Previous Table of Contents Next

Graphics Programming Black Book © 2001 Michael Abrash