        TTL     z:\refracted\BitmapScaler\blitStretchClippedFixed.asm
        EXPORT  |blitStretchClippedFixed|

        GBLA    FS      ; The fixed point precision
FS      SETA    16      ;       =16

        AREA    |.pdata|, PDATA

        AREA    |.text|, CODE

;       ***********************************************************************
;       2005/08/19.Lunchtime -- First version (Refractor)
;       ***********************************************************************
;	Original C code by "mm40"
;	ARM code by "refractor" -- contact via private message at http://forums.pocketmatrix.com
;
;	Compile with:
;		armasm -arch 4 -cpu strongarm1 -o blitStretchClippedFixed.asm.obj blitStretchClippedFixed.asm
;
;	Then add blitStretchClippedFixed.asm.obj to your library list and reference it like this from C:
;
;	extern "C" {
;		void blitStretchClippedFixed(
;			word *dst, int dw,      // dest surface + pitch
;			word *src, int sw,      // source surface + pitch
;			int x, int y,           // position and size of scaling
;       		int mx, int my,
;			dword fdx, dword fdy,
;			dword fsx, dword fcy);
;	}
;
;/*******************************************************************************/
;/*     r0                      =       *dst                                    */
;/*     r1                      =       dw                                      */
;/*     r2                      =       *src                                    */
;/*     r3                      =       sw                                      */
;/*     r13[00] =               x                       4                       */
;/*     r13[04] =               y                       5                       */
;/*     r13[08] =               mx                      6                       */
;/*     r13[0C] =               my                      7                       */
;/*     r13[10] =               fdx     (const)                                 */
;/*     r13[14] =               fdy                                             */
;/*     r13[18] =               fsx     (const)                                 */
;/*     r13[1C] =               fcy                                             */
;/*******************************************************************************/
|blitStretchClippedFixed| PROC                          ;
        mov             r12,r13                         ;       Lots of params on the stack -- let's keep it around
        stmfd r13!,{r4-r11,r14}                         ;       Stack what we're going to trash
                                                        ;
                ldmia r12!,{r4-r7}                      ;       Load the params we need for setup
                                                        ;
                mov     r3,r3,lsl #1                    ;       sw<<=1
                                                        ;
                mla     r8,r5,r1,r4                     ;       =(y * dw) + x
                                                        ;
                sub     r9,r6,r4                        ;       =(mx - x) = "scaledw"
                                                        ;
                add     r0,r8,r0,lsr #1                 ;       (dst>>1)+= (y * dw) + x =       "cur_dstp"
                sub     r7,r7,r5                        ;       =(my - y)
                                                        ;
                mla     r8,r1,r7,r0                     ;       =cur_dstp+(dw * (my -y)) = "cur_dstpend"
                                                        ;
                ldmia   r12,{r4-r7}                     ;       More parameters please                  
                                                        ;
                cmp     r0,r8                           ;
                bcs     |$blitStretchClippedFixed_Exit| ;

;/*     r0      =       *cur_dstp                                               */
;/*     r1      =       dw                                                      */
;/*     r2      =       *src                                                    */
;/*     r3      =       sw*2                                                    */
;/*     r4      =       fdx                                                     */
;/*     r5      =       fdy                                                     */
;/*     r6      =       fsx/fcx                                                 */
;/*     r7      =       fcy                                                     */
;/*     r8      =       cur_dstpend                                             */
;/*     r9      =       scaledw                                                 */
;/*     r10     = mangleable                                                    */
;/*     r11     = mangleable                                                    */
;/*     r12     = mangleable                                                    */
;/*     r14     = mangleable                                                    */

|$blitStretchClippedFixed_Outer|
                mov     r11,r7,lsr #FS                  ;       = fcy>>16
                add     r10,r0,r0                       ;       = "dstp"
                mla     r11,r3,r11,r2                   ;       ( (sw *2) * fcy>>16 ) + src = "srcp"
                                                        ;
                add     r14,r0,r9                       ;       cur_dstp + scaledw = "dstpend"
                                                        ;
                cmp     r10,r14,lsl #1                  ;
                bcs     |$blitStretchClippedFixed_InnerEnd|
                                                        ;
|$blitStretchClippedFixed_Inner|                        ;
                        mov     r12,r6,lsr #(FS-1)      ;
                        bic     r12,r12,#1              ;        Ensure the bottom bit is clear
                        ldrh    r12,[r11,r12]           ;       *src + fcx
                        add     r6,r6,r4                ;
                        strh    r12,[r10],#2            ;
                                                        ;
                cmp     r10,r14,lsl #1                  ;
                bcc     |$blitStretchClippedFixed_Inner|;
                                                        ;
                ldr     r6,[r13,#0x3c]                  ;       fcx = fsx (0x24 for the 9 stacked, 0x18 into the stack for fsx)
                                                        ;
|$blitStretchClippedFixed_InnerEnd|                     ;
                add     r0,r0,r1                        ;       cur_dstp+=dw
                add     r7,r7,r5                        ;       fcy+=fdy
                                                        ;
                cmp     r0,r8                           ;
                bcc     |$blitStretchClippedFixed_Outer|;
                                                        ;
|$blitStretchClippedFixed_Exit|                         ;
        ldmfd r13!,{r4-r11,pc}                          ;
                                                        ;
        ENDP;   |blitStretchClippedFixed|               ;

        END;