; FIR_asm_func_opt.asm 
; asm function called from C to implement fixed-point FIR 
; A4 = x[n] address, B4 = h[0] address, A6 = filter order N 
; input samples organized as x(n)...x(n-N)
; coefficients as h[0]...h[N]     

                .def    _FIR_asm_func_opt
_FIR_asm_func_opt:                      ; asm function called from C
                MV     .L1    A6,A1     ; setup loop count in A1
                ZERO   .S1    A8        ; init A8 for accumulation      
        LDH    .D1    *A4++,A2          ; x[n]
   ||   LDH    .D2    *B4++,B2          ; h[0]
        NOP           4
        MPY    .M1x   A2,B2,A7          ; A7=x[n]*h[0]
        NOP 
                ADD    .L1    A7,A8,A8  ; accumlate in A8

LOOP:                                   ; start of FIR loop  
                MV     .L1    A2,A3     ; used to update input vector x
                LDH    .D1    *A4,A2    ; A2=x[n-k] k=1,...,N
   ||   LDH    .D2    *B4++,B2          ; B2=h[k]   k=1,...,N
                SUB    .S1    A1,1,A1   ; decrement loop count
  [A1]  B      .S2    LOOP              ; branch to loop if count # 0
        NOP           2                 ; 3rd and 4th NOPs for LDH
                MPY    .M1x   A2,B2,A7  ; A7=h[k]*x[n-k]
                STH    .D1    A3,*A4++  ; update input vector, .D1 now avail.
                ADD    .L1    A7,A8,A8  ; accumlate in A8               

                B      .S2    B3        ; return addr to calling routine
                MV     .L1    A8,A4     ; result returned in A4
                NOP               4
