; FIR_asm_circ_func.asm 
; asm function called from C to implement fixed-point FIR filter
; using circular addressing
; A4=newest sample, B4=coefficient address, A6=filter order 
; input samples organized: x[n]...x[n-N]
; coefficients as h[0]...h[N]

                .def          _FIR_asm_circ_func
                .def          last_addr 
                .def          delays
                .sect         "circdata"  ; circular data section
                .align        256         ; align delay buffer 256-byte boundary
delays          .space        256         ; init 256-byte buffer with 0's
last_addr       .int          last_addr-1 ; point to input buffer

                .text                   ; code section
_FIR_asm_circ_func:                     ; FIR function using circ addr
                ADD           A6,1,A6   ; duration N+1 samples                    
                MV            A6,A1     ; setup loop count 
                MPY           A6,2,A6   ; 2-byte filter coeff.
                ZERO          A8        ; init A8 for accum. (NOP for MPY)
                ADD           A6,B4,B4  ; go to bottom of filter buffer
                SUB           B4,1,B4   ; align with bottom of filter buffer
                  
                MVKL          0x00070040,B6   ; select A7 as pointer and BK0
                MVKH          0x00070040,B6   ; BK0 for 256 bytes (128 shorts) 
       
                MVC           B6,AMR          ; set address mode register AMR   

                MVK           last_addr,A9    ; A9=last circ addr(lower 16 bits)
                MVKH          last_addr,A9    ; last circ addr (higher 16 bits) 
             
                LDW           *A9,A7          ; A5=last circ addr
                NOP           4 
                STH           A4,*A7++        ; newest sample to last address
             
LOOP:                                     ; start of FIR loop  
                LDH    .D1    *A7++,A2    ; A2=x[n-k] k=1,...,N
        ||      LDH    .D2    *B4--,B2    ; B2=h[k]   k=1,...,N
                SUB    .S1    A1,1,A1     ; decrement loop count
  [A1]          B      .S2    LOOP        ; branch to loop if count # 0
                NOP           2           ; 3rd and 4th NOPs for LDH
                MPY    .M1x   A2,B2,A6    ; A7=h[k]*x[n-k]
                NOP
                ADD    .L1    A6,A8,A8    ; accumlate in A8             

                STW           A7,*A9      ; store last circ addr to last_addr
                B      .S2    B3          ; return addr to calling routine
                MV     .L1    A8,A4       ; result returned in A4
                NOP               4
