почему оптимизация clang нарушает мой встроенный ассемблерный код?

В попытке узнать что-то о сборке ARM я написал простой тестовый проект для выполнения уменьшения масштаба изображения с использованием встроенной сборки и инструкций NEON. вы можете увидеть это здесь:

https://github.com/rmaz/NEON-Image-Downscaling

после некоторых усилий мне удалось заставить его работать, счастливые дни. за исключением того, что он работает только для уровней оптимизации ниже -O2. я взглянул на сгенерированный ASM, но не вижу никакой очевидной причины, по которой это должно происходить. кто-нибудь может предложить какое-либо понимание? вот функция, отвечающая за встроенную сборочную деталь:

static void inline resizeRow(uint32_t *dst, uint32_t *src, uint32_t pixelsPerRow)
{
    const uint32_t * rowB = src + pixelsPerRow;

    // force the number of pixels per row to a mutliple of 8
    pixelsPerRow = 8 * (pixelsPerRow / 8);    

    __asm__ volatile("Lresizeloop:                      \n" // start loop
                     "vld1.32       {d0-d3}, [%1]!      \n" // load 8 pixels from the top row
                     "vld1.32       {d4-d7}, [%2]!      \n" // load 8 pixels from the bottom row
                     "vhadd.u8      q0, q0, q2          \n" // average the pixels vertically
                     "vhadd.u8      q1, q1, q3          \n"
                     "vtrn.32       q0, q2              \n" // transpose to put the horizontally adjacent pixels in different registers
                     "vtrn.32       q1, q3              \n"
                     "vhadd.u8      q0, q0, q2          \n" // average the pixels horizontally
                     "vhadd.u8      q1, q1, q3          \n"
                     "vtrn.32       d0, d1              \n" // fill the registers with pixels
                     "vtrn.32       d2, d3              \n"
                     "vswp          d1, d2              \n"
                     "vst1.64       {d0-d1}, [%0]!      \n" // store the result
                     "subs          %3, %3, #8          \n" // subtract 8 from the pixel count
                     "bne           Lresizeloop         \n" // repeat until the row is complete
                     : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow)
                     : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow)
                     : "q0", "q1", "q2", "q3"
                     );
}

функционирующий сгенерированный выходной сигнал в O1 для функции окружения и цикла выглядит следующим образом:

.align  2
   .code   16                      @ @"\01-[BDPViewController downscaleImageNeon:]"
   .thumb_func "-[BDPViewController downscaleImageNeon:]"
"-[BDPViewController downscaleImageNeon:]":
   .cfi_startproc
Lfunc_begin4:
   .loc    1 86 0                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0
@ BB#0:
   .loc    1 86 1 prologue_end     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1
    push    {r4, r5, r6, r7, lr}
    add r7, sp, #12
    push.w  {r8, r10, r11}
    sub sp, #20
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0
   .loc    1 88 20                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20
Ltmp41:
    movw    r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
Ltmp42:
    mov r6, r2
Ltmp43:
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0
    movt    r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
LPC4_0:
    add r0, pc
    ldr.w   r11, [r0]
    mov r0, r6
    blx _objc_retain
    mov r4, r0
    mov r0, r6
    mov r1, r11
Ltmp44:
    blx _objc_msgSend
    blx _CGImageGetWidth
    mov r5, r0
Ltmp45:
    @DEBUG_VALUE: width <- R5+0
   .loc    1 89 21                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21
    mov r0, r6
    mov r1, r11
    str r5, [sp, #16]           @ 4-byte Spill
    blx _objc_msgSend
    blx _CGImageGetHeight
    mov r10, r0
Ltmp46:
    @DEBUG_VALUE: height <- R10+0
   .loc    1 90 26                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    blx _CGImageGetBytesPerRow
    str r0, [sp, #12]           @ 4-byte Spill
Ltmp47:
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
   .loc    1 91 35                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    blx _CGImageGetAlphaInfo
    str r0, [sp, #4]            @ 4-byte Spill
Ltmp48:
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
   .loc    1 94 45                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    mov r6, r0
Ltmp49:
    mov r0, r4
    blx _objc_release
    mov r0, r6
   .loc    1 98 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
    mul r8, r10, r5
Ltmp50:
    @DEBUG_VALUE: width <- [sp+#16]+#0
   .loc    1 94 45                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
    blx _CGImageGetDataProvider
    blx _CGDataProviderCopyData
Ltmp51:
    @DEBUG_VALUE: data <- R0+0
    str r0, [sp, #8]            @ 4-byte Spill
Ltmp52:
    @DEBUG_VALUE: data <- [sp+#8]+#0
   .loc    1 95 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29
    blx _CFDataGetBytePtr
    mov r4, r0
Ltmp53:
    @DEBUG_VALUE: buffer <- R4+0
   .loc    1 98 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
    lsr.w   r0, r8, #2
    movs    r1, #4
    blx _calloc
    mov r5, r0
Ltmp54:
    @DEBUG_VALUE: outputBuffer <- R5+0
    mov r0, r10
Ltmp55:
    @DEBUG_VALUE: height <- R0+0
   .loc    1 101 29                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
    cmp r0, #0
Ltmp56:
    @DEBUG_VALUE: rowIndex <- 0+0
    beq LBB4_3
@ BB#1:                                 @ %.lr.ph
Ltmp57:
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: buffer <- R4+0
    @DEBUG_VALUE: outputBuffer <- R5+0
    @DEBUG_VALUE: rowIndex <- 0+0
    ldr r1, [sp, #12]           @ 4-byte Reload
Ltmp58:
    @DEBUG_VALUE: bytesPerRow <- R1+0
    mov.w   r8, #0
    lsl.w   r11, r1, #1
   .loc    1 104 74                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74
Ltmp59:
    lsr.w   r10, r1, #1
Ltmp60:
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
LBB4_2:                                 @ =>This Inner Loop Header: Depth=1
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: outputBuffer <- R5+0
    @DEBUG_VALUE: rowIndex <- 0+0
    lsr.w   r1, r8, #1
Ltmp61:
    mov r6, r0
Ltmp62:
    @DEBUG_VALUE: height <- R6+0
    mla r0, r1, r10, r5
Ltmp63:
    @DEBUG_VALUE: destRow <- R1+0
   .loc    1 105 9                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9
    ldr r2, [sp, #16]           @ 4-byte Reload
    mov r1, r4
Ltmp64:
    bl  _resizeRow
    mov r0, r6
Ltmp65:
    @DEBUG_VALUE: height <- R0+0
   .loc    1 101 50                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50
    add.w   r8, r8, #2
Ltmp66:
    @DEBUG_VALUE: rowIndex <- R8+0
   .loc    1 101 29                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
    add r4, r11
    cmp r8, r0
    blo LBB4_2
Ltmp67:
LBB4_3:                                 @ %._crit_edge
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: outputBuffer <- R5+0
   .loc    1 109 28                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28
    ldr r1, [sp, #4]            @ 4-byte Reload
Ltmp68:
    lsrs    r2, r0, #1
    str r1, [sp]
    mov r6, r5
Ltmp69:
    @DEBUG_VALUE: outputBuffer <- R6+0
    ldr r1, [sp, #16]           @ 4-byte Reload
    ldr r0, [sp, #12]           @ 4-byte Reload
Ltmp70:
    lsrs    r1, r1, #1
    lsrs    r3, r0, #1
    mov r0, r5
    bl  _createBitmapContext
    mov r4, r0
Ltmp71:
    @DEBUG_VALUE: context <- R4+0
   .loc    1 110 30                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
    blx _CGBitmapContextCreateImage
   .loc    1 111 66                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
    movw    r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
   .loc    1 110 30                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
    mov r5, r0
Ltmp72:
    @DEBUG_VALUE: scaledImage <- R5+0
   .loc    1 111 66                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
    movt    r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
    movw    r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
    movt    r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
LPC4_1:
    add r1, pc
LPC4_2:
    add r0, pc
    mov r2, r5
    ldr r1, [r1]
    ldr r0, [r0]
    blx _objc_msgSend
Ltmp73:
    @DEBUG_VALUE: returnImage <- R0+0
    @ InlineAsm Start
    mov r7, r7      @ marker for objc_retainAutoreleaseReturnValue
    @ InlineAsm End
    blx _objc_retainAutoreleasedReturnValue
Ltmp74:
    mov r8, r0
   .loc    1 112 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5
    mov r0, r5
    blx _CGImageRelease
   .loc    1 113 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5
    mov r0, r4
    blx _CGContextRelease
   .loc    1 114 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5
    ldr r0, [sp, #8]            @ 4-byte Reload
    blx _CFRelease
   .loc    1 115 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5
    mov r0, r6
    blx _free
Ltmp75:
   .loc    1 118 1                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1
    mov r0, r8
    add sp, #20
    pop.w   {r8, r10, r11}
    pop.w   {r4, r5, r6, r7, lr}
Ltmp76:
    b.w _objc_autoreleaseReturnValue
Ltmp77:
Lfunc_end4:
   .cfi_endproc

   .align  2
   .code   16                      @ @resizeRow
   .thumb_func _resizeRow
_resizeRow:
   .cfi_startproc
Lfunc_begin5:
   .loc    1 26 0                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0
@ BB#0:
    @DEBUG_VALUE: resizeRow:dst <- R0+0
    @DEBUG_VALUE: resizeRow:src <- R1+0
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0
   .loc    1 27 47 prologue_end    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47
    add.w   r3, r1, r2, lsl #2
Ltmp78:
    @DEBUG_VALUE: rowB <- R3+0
   .loc    1 30 5                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5
    bic r2, r2, #7
Ltmp79:
   .loc    1 32 5                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5
    @ InlineAsm Start
    Lresizeloop:                      
vld1.32       {d0-d3}, [r1]!      
vld1.32       {d4-d7}, [r3]!      
vhadd.u8      q0, q0, q2          
vhadd.u8      q1, q1, q3          
vtrn.32       q0, q2              
vtrn.32       q1, q3              
vhadd.u8      q0, q0, q2          
vhadd.u8      q1, q1, q3          
vtrn.32       d0, d1              
vtrn.32       d2, d3              
vswp          d1, d2              
vst1.64       {d0-d1}, [r0]!      
subs          r2, r2, #8          
bne           Lresizeloop         

    @ InlineAsm End
Ltmp80:
   .loc    1 51 1                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1
    bx  lr
Ltmp81:
Lfunc_end5:
   .cfi_endproc

а неработающий выход на O2 выглядит следующим образом:

   .align  2
   .code   16                      @ @"\01-[BDPViewController downscaleImageNeon:]"
   .thumb_func "-[BDPViewController downscaleImageNeon:]"
"-[BDPViewController downscaleImageNeon:]":
   .cfi_startproc
Lfunc_begin4:
   .loc    1 86 0                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0
@ BB#0:
   .loc    1 86 1 prologue_end     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1
    push    {r4, r5, r6, r7, lr}
    add r7, sp, #12
    push.w  {r8, r10, r11}
    sub sp, #20
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0
   .loc    1 88 20                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20
Ltmp41:
    movw    r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
Ltmp42:
    mov r6, r2
Ltmp43:
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0
    movt    r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4))
LPC4_0:
    add r0, pc
    ldr.w   r11, [r0]
    mov r0, r6
    blx _objc_retain
    mov r4, r0
    mov r0, r6
    mov r1, r11
Ltmp44:
    blx _objc_msgSend
    blx _CGImageGetWidth
    mov r5, r0
Ltmp45:
    @DEBUG_VALUE: width <- R5+0
   .loc    1 89 21                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21
    mov r0, r6
    mov r1, r11
    str r5, [sp, #16]           @ 4-byte Spill
    blx _objc_msgSend
    blx _CGImageGetHeight
    mov r10, r0
Ltmp46:
    @DEBUG_VALUE: height <- R10+0
   .loc    1 90 26                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    blx _CGImageGetBytesPerRow
    str r0, [sp, #12]           @ 4-byte Spill
Ltmp47:
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
   .loc    1 91 35                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    blx _CGImageGetAlphaInfo
    str r0, [sp, #4]            @ 4-byte Spill
Ltmp48:
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
   .loc    1 94 45                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
    mov r0, r6
    mov r1, r11
    blx _objc_msgSend
    mov r6, r0
Ltmp49:
    mov r0, r4
    blx _objc_release
    mov r0, r6
   .loc    1 98 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
    mul r8, r10, r5
Ltmp50:
    @DEBUG_VALUE: width <- [sp+#16]+#0
   .loc    1 94 45                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45
    blx _CGImageGetDataProvider
    blx _CGDataProviderCopyData
Ltmp51:
    @DEBUG_VALUE: data <- R0+0
    str r0, [sp, #8]            @ 4-byte Spill
Ltmp52:
    @DEBUG_VALUE: data <- [sp+#8]+#0
   .loc    1 95 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29
    blx _CFDataGetBytePtr
    mov r4, r0
Ltmp53:
    @DEBUG_VALUE: buffer <- R4+0
   .loc    1 98 29                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29
    lsr.w   r0, r8, #2
    movs    r1, #4
    blx _calloc
    mov r5, r0
Ltmp54:
    @DEBUG_VALUE: outputBuffer <- R5+0
    mov r0, r10
Ltmp55:
    @DEBUG_VALUE: height <- R0+0
   .loc    1 101 29                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
    cmp r0, #0
Ltmp56:
    @DEBUG_VALUE: rowIndex <- 0+0
    beq LBB4_3
@ BB#1:                                 @ %.lr.ph
Ltmp57:
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: buffer <- R4+0
    @DEBUG_VALUE: outputBuffer <- R5+0
    @DEBUG_VALUE: rowIndex <- 0+0
    ldr r1, [sp, #12]           @ 4-byte Reload
Ltmp58:
    @DEBUG_VALUE: bytesPerRow <- R1+0
    mov.w   r8, #0
    lsl.w   r11, r1, #1
   .loc    1 104 74                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74
Ltmp59:
    lsr.w   r10, r1, #1
Ltmp60:
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
LBB4_2:                                 @ =>This Inner Loop Header: Depth=1
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: outputBuffer <- R5+0
    @DEBUG_VALUE: rowIndex <- 0+0
    lsr.w   r1, r8, #1
Ltmp61:
    mov r6, r0
Ltmp62:
    @DEBUG_VALUE: height <- R6+0
    mla r0, r1, r10, r5
Ltmp63:
    @DEBUG_VALUE: destRow <- R1+0
   .loc    1 105 9                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9
    ldr r2, [sp, #16]           @ 4-byte Reload
    mov r1, r4
Ltmp64:
    bl  _resizeRow
    mov r0, r6
Ltmp65:
    @DEBUG_VALUE: height <- R0+0
   .loc    1 101 50                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50
    add.w   r8, r8, #2
Ltmp66:
    @DEBUG_VALUE: rowIndex <- R8+0
   .loc    1 101 29                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29
    add r4, r11
    cmp r8, r0
    blo LBB4_2
Ltmp67:
LBB4_3:                                 @ %._crit_edge
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0
    @DEBUG_VALUE: width <- [sp+#16]+#0
    @DEBUG_VALUE: height <- R0+0
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0
    @DEBUG_VALUE: data <- [sp+#8]+#0
    @DEBUG_VALUE: outputBuffer <- R5+0
   .loc    1 109 28                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28
    ldr r1, [sp, #4]            @ 4-byte Reload
Ltmp68:
    lsrs    r2, r0, #1
    str r1, [sp]
    mov r6, r5
Ltmp69:
    @DEBUG_VALUE: outputBuffer <- R6+0
    ldr r1, [sp, #16]           @ 4-byte Reload
    ldr r0, [sp, #12]           @ 4-byte Reload
Ltmp70:
    lsrs    r1, r1, #1
    lsrs    r3, r0, #1
    mov r0, r5
    bl  _createBitmapContext
    mov r4, r0
Ltmp71:
    @DEBUG_VALUE: context <- R4+0
   .loc    1 110 30                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
    blx _CGBitmapContextCreateImage
   .loc    1 111 66                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
    movw    r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
   .loc    1 110 30                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30
    mov r5, r0
Ltmp72:
    @DEBUG_VALUE: scaledImage <- R5+0
   .loc    1 111 66                @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66
    movt    r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4))
    movw    r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
    movt    r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4))
LPC4_1:
    add r1, pc
LPC4_2:
    add r0, pc
    mov r2, r5
    ldr r1, [r1]
    ldr r0, [r0]
    blx _objc_msgSend
Ltmp73:
    @DEBUG_VALUE: returnImage <- R0+0
    @ InlineAsm Start
    mov r7, r7      @ marker for objc_retainAutoreleaseReturnValue
    @ InlineAsm End
    blx _objc_retainAutoreleasedReturnValue
Ltmp74:
    mov r8, r0
   .loc    1 112 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5
    mov r0, r5
    blx _CGImageRelease
   .loc    1 113 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5
    mov r0, r4
    blx _CGContextRelease
   .loc    1 114 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5
    ldr r0, [sp, #8]            @ 4-byte Reload
    blx _CFRelease
   .loc    1 115 5                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5
    mov r0, r6
    blx _free
Ltmp75:
   .loc    1 118 1                 @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1
    mov r0, r8
    add sp, #20
    pop.w   {r8, r10, r11}
    pop.w   {r4, r5, r6, r7, lr}
Ltmp76:
    b.w _objc_autoreleaseReturnValue
Ltmp77:
Lfunc_end4:
   .cfi_endproc

   .align  2
   .code   16                      @ @resizeRow
   .thumb_func _resizeRow
_resizeRow:
   .cfi_startproc
Lfunc_begin5:
   .loc    1 26 0                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0
@ BB#0:
    @DEBUG_VALUE: resizeRow:dst <- R0+0
    @DEBUG_VALUE: resizeRow:src <- R1+0
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0
   .loc    1 27 47 prologue_end    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47
    add.w   r3, r1, r2, lsl #2
Ltmp78:
    @DEBUG_VALUE: rowB <- R3+0
   .loc    1 30 5                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5
    bic r2, r2, #7
Ltmp79:
   .loc    1 32 5                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5
    @ InlineAsm Start
    Lresizeloop:                      
vld1.32       {d0-d3}, [r1]!      
vld1.32       {d4-d7}, [r3]!      
vhadd.u8      q0, q0, q2          
vhadd.u8      q1, q1, q3          
vtrn.32       q0, q2              
vtrn.32       q1, q3              
vhadd.u8      q0, q0, q2          
vhadd.u8      q1, q1, q3          
vtrn.32       d0, d1              
vtrn.32       d2, d3              
vswp          d1, d2              
vst1.64       {d0-d1}, [r0]!      
subs          r2, r2, #8          
bne           Lresizeloop         

    @ InlineAsm End
Ltmp80:
   .loc    1 51 1                  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1
    bx  lr
Ltmp81:
Lfunc_end5:
   .cfi_endproc

5
задан Tark 16 August 2012 в 18:03
поделиться