Skip to content

Commit

Permalink
Reduce CPU usage when FFOSD not in use in 12bpp modes
Browse files Browse the repository at this point in the history
  • Loading branch information
IanSB committed Apr 19, 2021
1 parent 63b3375 commit fddfad7
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 14 deletions.
41 changes: 37 additions & 4 deletions src/capture_line_default_twelvebits_8bpp_16bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -94,24 +94,27 @@ preload_capture_line_default_eightbits_8bpp:
capture_line_default_twelvebits_16bpp:
push {lr}
SETUP_VSYNC_DEBUG_16BPP_R11
tst r3, #BIT_OSD
bne OSD_capture_line_default_twelvebits_16bpp

SKIP_PSYNC_NO_OLD_CPLD
SETUP_TWELVE_BITS_MASK_R14
loop_16bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8

Expand All @@ -121,6 +124,36 @@ loop_16bpp:
bne loop_16bpp

pop {r0, pc}

OSD_capture_line_default_twelvebits_16bpp:
SKIP_PSYNC_NO_OLD_CPLD
SETUP_TWELVE_BITS_MASK_R14
OSD_loop_16bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8

WRITE_R5_R6_R7_R10_16BPP

subs r1, r1, #1
bne OSD_loop_16bpp

pop {r0, pc}

preload_capture_line_default_twelvebits_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_twelvebits_16bpp
Expand Down
40 changes: 36 additions & 4 deletions src/capture_line_fast_twelvebits_8bpp_16bpp.S
Original file line number Diff line number Diff line change
Expand Up @@ -94,24 +94,27 @@ preload_capture_line_fast_eightbits_8bpp:
capture_line_fast_twelvebits_16bpp:
push {lr}
SETUP_VSYNC_DEBUG_16BPP_R11
tst r3, #BIT_OSD
bne OSD_capture_line_fast_twelvebits_16bpp

SKIP_PSYNC_NO_OLD_CPLD_FAST
SETUP_TWELVE_BITS_MASK_R14
loop_16bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
stmia r0!, {r5, r6, r7, r10}
Expand All @@ -122,6 +125,35 @@ loop_16bpp:
mov r0, r2
pop {pc}

OSD_capture_line_fast_twelvebits_16bpp:
SKIP_PSYNC_NO_OLD_CPLD_FAST
SETUP_TWELVE_BITS_MASK_R14
OSD_loop_16bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
stmia r0!, {r5, r6, r7, r10}

subs r1, r1, #1
bne OSD_loop_16bpp

mov r0, r2
pop {pc}

preload_capture_line_fast_twelvebits_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_twelvebits_16bpp
Expand Down
26 changes: 22 additions & 4 deletions src/macros.S
Original file line number Diff line number Diff line change
Expand Up @@ -798,21 +798,39 @@ wait_wr\@:
// Pixel in GPIO 13.. 2 -> 15.. 0
and r9, r8, r14
eor r10, \reg, r9, lsr #(PIXEL_BASE)
.endm

.macro CAPTURE_TWELVE_BITS_16BPP_HI reg
// Pixel in GPIO 13.. 2 -> 31.. 16
and r9, r8, r14
eor \reg, r10, r9, lsl #(16 - PIXEL_BASE)
.endm

.macro OSD_TEST_CAPTURE_TWELVE_BITS_16BPP_LO reg
// Pixel in GPIO 13.. 2 -> 15.. 0
and r9, r8, r14
eor r10, \reg, r9, lsr #(PIXEL_BASE)
tst r8, #MUX_MASK
orrne r10, #0xff00
orrne r3, #BIT_PROBE
.endm

.macro OSD_CAPTURE_TWELVE_BITS_16BPP_LO reg
// Pixel in GPIO 13.. 2 -> 15.. 0
and r9, r8, r14
eor r10, \reg, r9, lsr #(PIXEL_BASE)
tst r8, #MUX_MASK
orrne r3, #BIT_PROBE
orrne r10, #0xff00
orrne r10, #0x00ff
.endm

.macro CAPTURE_TWELVE_BITS_16BPP_HI reg
.macro OSD_CAPTURE_TWELVE_BITS_16BPP_HI reg
// Pixel in GPIO 13.. 2 -> 31.. 16
and r9, r8, r14
eor \reg, r10, r9, lsl #(16 - PIXEL_BASE)

tst r8, #MUX_MASK
orrne \reg, \reg, #(0xff000000)
orrne r3, #BIT_PROBE
orrne \reg, \reg, #(0xff000000)
orrne \reg, \reg, #(0x00ff0000)
.endm

Expand Down
2 changes: 0 additions & 2 deletions src/rgb_to_fb.S
Original file line number Diff line number Diff line change
Expand Up @@ -681,13 +681,11 @@ skip_line_loop_exit:
ldr r12, capture_address
sub r12, r12, #4
// Call preload capture line function (runs all paths of capture code to preload it into cache - OSD version)
// waits for csync so loses one line
blx r12
pop {r3}
ldr r12, capture_address
sub r12, r12, #4
// Call preload capture line function (runs all paths of capture code to preload it into cache)
// waits for csync so loses one line
blx r12
pop {r1-r5, r11}
mov r6, #0
Expand Down

0 comments on commit fddfad7

Please sign in to comment.