From 3d9802260e7f1fecf9bc75fddb87f810848869ba Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 10:55:22 +0000 Subject: [PATCH 01/19] Document purpose and timing of existing M0 code. This commit does not modify the code; it only updates comments. --- firmware/hackrf_usb/sgpio_m0.s | 195 +++++++++++++++++++++++++-------- 1 file changed, 147 insertions(+), 48 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 9bbca574..9423d363 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -1,9 +1,93 @@ /* - * This file is part of GreatFET + * Copyright 2019-2022 Great Scott Gadgets * - * Specialized SGPIO interrupt handler for Rhododendron. + * This file is part of HackRF. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. */ +/* + +Introduction +============ + +This file contains the code that runs on the Cortex-M0 core of the LPC43xx. + +The M0 core is used to implement all the timing-critical usage of the SGPIO +peripheral, which interfaces to the MAX5864 ADC/DAC via the CPLD. + +The M0 reads or writes 32 bytes at a time from the SGPIO registers, +transferring these bytes to or from a shared USB bulk buffer. The M4 core +handles transferring data between this buffer and the USB host. + +The SGPIO peripheral is set up and enabled by the M4 core. All the M0 needs to +do is handle the SGPIO exchange interrupt, which indicates that new data can +now be read from or written to the SGPIO shadow registers. + +Timing +====== + +This code has tight timing constraints. + +We have to complete a read or write from SGPIO every 163 cycles. + +The CPU clock is 204MHz. We exchange 32 bytes at a time in the SGPIO +registers, which is 16 samples worth of IQ data. At the maximum sample rate of +20MHz, the SGPIO update rate is 20 / 16 = 1.25MHz. So we have 204 / 1.25 = +163.2 cycles available. + +Access to the SGPIO peripheral is slow, due to the asynchronous bridge that +connects it to the AHB bus matrix. Section 20.4.1 of the LPC43xx user manual +(UM10503) specifies the access latencies as: + +Read: 4 x MCLK + 4 x CLK_PERIPH_SGPIO +Write: 4 x MCLK + 2 x CLK_PERIPH_SGPIO + +In our case both these clocks are at 204MHz so reads add 8 cycles and writes +add 6. These are latencies that add to the usual M0 instruction timings, so an +ldr from SGPIO takes 10 cycles, and an str to SGPIO takes 8 cycles. + +These latencies are assumed to apply to all accesses to the SGPIO peripheral's +address space, which includes its interrupt control registers as well as the +shadow registers. + +There are two key code paths, with the following worst-case timings: + +RX: 159 cycles +TX: 144 cycles + +Design +====== + +Due to the timing constraints, this code is highly optimised. + +This is the only code that runs on the M0, so it does not need to follow +calling conventions, nor use features of the architecture in standard ways. + +The SGPIO handling does not run as an ISR. It polls the interrupt status. +This saves the cycle costs of interrupt entry and exit, and allows all +registers to be used freely. + +All possible registers, including the stack pointer and link register, can be +used to store values needed in the code, to minimise memory loads and stores. + +There are no function calls. There is no stack usage. All values are in +registers and fixed memory addresses. + +*/ // Constants that point to registers we'll need to modify in the SGPIO block. .equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000 @@ -19,36 +103,53 @@ .equ TARGET_BUFFER_TX, 0x20007004 .equ TARGET_BUFFER_MASK, 0x7fff +// Entry point. At this point, the libopencm3 startup code has set things up as +// normal; .data and .bss are initialised, the stack is set up, etc. However, +// we don't actually use any of that. All the code in this file would work +// fine if the M0 jumped straight to main at reset. .global main .thumb_func -main: +main: // Cycle counts: + // The worst case timing is assumed to occur when reading the interrupt + // status register *just* misses the flag being set - so we include the + // cycles required to check it a second time. + // + // We also assume that we can spend a full 10 cycles doing an ldr from + // SGPIO the first time (2 for ldr, plus 8 for SGPIO-AHB bus latency), + // and still miss a flag that was set at the start of those 10 cycles. + // + // This latter asssumption is probably slightly pessimistic, since the + // sampling of the flag on the SGPIO side must occur some time after + // the ldr instruction begins executing on the M0. However, we avoid + // relying on any assumptions about the timing details of a read over + // the SGPIO to AHB bridge. // Spin until we're ready to handle an SGPIO packet: // Grab the exchange interrupt staus... - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG - ldr r0, [r0] + ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG // 2, twice + ldr r0, [r0] // 10, twice // ... check to see if it has any interrupt bits set... - lsr r0, #1 + lsr r0, #1 // 1, twice // ... and if not, jump back to the beginning. - bcc main + bcc main // 3, then 1 // Clear the interrupt pending bits for the SGPIO slices we're working with. - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG - ldr r1, =0xffff - str r1, [r0] + ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG // 2 + ldr r1, =0xffff // 2 + str r1, [r0] // 8 // Grab the base address of the SGPIO shadow registers... - ldr r7, =SGPIO_SHADOW_REGISTERS_BASE + ldr r7, =SGPIO_SHADOW_REGISTERS_BASE // 2 // ... and grab the address of the buffer segment we want to write to / read from. - ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer - ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer - ldr r2, [r3] // r2 = position_in_buffer - add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer + ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2 + ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2 + ldr r2, [r3] // r2 = position_in_buffer // 2 + add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer // 1 - mov r8, r3 // Store &position_in_buffer. + mov r8, r3 // Store &position_in_buffer. // 1 // Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): // L -> F -> K -> C -> J -> E -> I -> A @@ -56,53 +157,51 @@ main: // 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0 // Load direction (TX or RX) - ldr r0, =TARGET_BUFFER_TX - ldr r0, [r0] + ldr r0, =TARGET_BUFFER_TX // 2 + ldr r0, [r0] // 2 // TX? - lsr r0, #1 - bcc direction_rx + lsr r0, #1 // 1 + bcc direction_rx // 1 thru, 3 taken direction_tx: - ldm r6!, {r0-r5} - str r0, [r7, #44] - str r1, [r7, #20] - str r2, [r7, #40] - str r3, [r7, #8 ] - str r4, [r7, #36] - str r5, [r7, #16] + ldm r6!, {r0-r5} // 7 + str r0, [r7, #44] // 8 + str r1, [r7, #20] // 8 + str r2, [r7, #40] // 8 + str r3, [r7, #8 ] // 8 + str r4, [r7, #36] // 8 + str r5, [r7, #16] // 8 - ldm r6!, {r0-r1} - str r0, [r7, #32] - str r1, [r7, #0] + ldm r6!, {r0-r1} // 3 + str r0, [r7, #32] // 8 + str r1, [r7, #0] // 8 - b done + b done // 3 direction_rx: - // 8 cycles - ldr r0, [r7, #44] // 2 - ldr r1, [r7, #20] // 2 - ldr r2, [r7, #40] // 2 - ldr r3, [r7, #8 ] // 2 - ldr r4, [r7, #36] // 2 - ldr r5, [r7, #16] // 2 - stm r6!, {r0-r5} // 7 + ldr r0, [r7, #44] // 10 + ldr r1, [r7, #20] // 10 + ldr r2, [r7, #40] // 10 + ldr r3, [r7, #8 ] // 10 + ldr r4, [r7, #36] // 10 + ldr r5, [r7, #16] // 10 + stm r6!, {r0-r5} // 7 - // 6 cycles - ldr r0, [r7, #32] // 2 - ldr r1, [r7, #0] // 2 - stm r6!, {r0-r1} + ldr r0, [r7, #32] // 10 + ldr r1, [r7, #0] // 10 + stm r6!, {r0-r1} // 3 done: // Finally, update the buffer location... - ldr r0, =TARGET_BUFFER_MASK - and r0, r6, r0 // r0 = (position_in_buffer + size_copied) % buffer_size + ldr r0, =TARGET_BUFFER_MASK // 2 + and r0, r6, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 // ... restore &position_in_buffer, and store the new position there... - mov r1, r8 - str r0, [r1] // position_in_buffer = (position_in_buffer + size_copied) % buffer_size + mov r1, r8 // 1 + str r0, [r1] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2 - b main + b main // 3 From 39c6f3385e92c0dc38ed4f10136a960c2842fb9f Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 11:19:26 +0000 Subject: [PATCH 02/19] Remove usb_bulk_buffer.c containing unused variable definition. This removes the definition of the offset variable, volatile uint32_t usb_bulk_buffer_offset = 0; which is actually superfluous. This variable, along with its neighbour usb_bulk_buffer_tx, is placed explicitly by the linker script. Its type is defined by the declaration in usb_bulk_buffer.h. There is no need to define it here, and doing so gives the misleading impression that its initial value can be changed by modifying this line! The initialization to zero never actually takes effect, because the variable is not placed in the .data or .bss sections which are initialised by the startup code. The offset and tx variables are both set in set_transceiver_mode before SGPIO streaming is started, so the M0 code does not use them uninitialised. --- firmware/hackrf_usb/CMakeLists.txt | 1 - firmware/hackrf_usb/usb_bulk_buffer.c | 25 ------------------------- 2 files changed, 26 deletions(-) delete mode 100644 firmware/hackrf_usb/usb_bulk_buffer.c diff --git a/firmware/hackrf_usb/CMakeLists.txt b/firmware/hackrf_usb/CMakeLists.txt index 651c6527..1e58db95 100644 --- a/firmware/hackrf_usb/CMakeLists.txt +++ b/firmware/hackrf_usb/CMakeLists.txt @@ -35,7 +35,6 @@ set(SRC_M4 hackrf_usb.c "${PATH_HACKRF_FIRMWARE_COMMON}/tuning.c" "${PATH_HACKRF_FIRMWARE_COMMON}/streaming.c" - usb_bulk_buffer.c "${PATH_HACKRF_FIRMWARE_COMMON}/usb.c" "${PATH_HACKRF_FIRMWARE_COMMON}/usb_request.c" "${PATH_HACKRF_FIRMWARE_COMMON}/usb_standard_request.c" diff --git a/firmware/hackrf_usb/usb_bulk_buffer.c b/firmware/hackrf_usb/usb_bulk_buffer.c deleted file mode 100644 index ea3e6b77..00000000 --- a/firmware/hackrf_usb/usb_bulk_buffer.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2012 Jared Boone - * Copyright 2013 Benjamin Vernoux - * - * This file is part of HackRF. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, - * Boston, MA 02110-1301, USA. - */ - -#include "usb_bulk_buffer.h" - -volatile uint32_t usb_bulk_buffer_offset = 0; From 5b2a3907282977f590091598902c0643f31af232 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 10:18:33 +0000 Subject: [PATCH 03/19] Move M0 offset and tx variables into a state struct. These variables are already placed together; this commit just groups them into a struct and declares this in a new header. This commit should not result in any change to the firmware binary. Only the names of symbols are changed. --- firmware/common/LPC43xx_M4_memory.ld | 3 +- firmware/hackrf_usb/m0_state.h | 36 +++++++++++++++++++++++ firmware/hackrf_usb/usb_api_sweep.c | 5 ++-- firmware/hackrf_usb/usb_api_transceiver.c | 17 ++++++----- firmware/hackrf_usb/usb_bulk_buffer.h | 4 --- 5 files changed, 49 insertions(+), 16 deletions(-) create mode 100644 firmware/hackrf_usb/m0_state.h diff --git a/firmware/common/LPC43xx_M4_memory.ld b/firmware/common/LPC43xx_M4_memory.ld index 6d501e3c..85b9a6c4 100644 --- a/firmware/common/LPC43xx_M4_memory.ld +++ b/firmware/common/LPC43xx_M4_memory.ld @@ -34,6 +34,5 @@ MEMORY } usb_bulk_buffer = ORIGIN(ram_usb); -usb_bulk_buffer_offset = ORIGIN(ram_shared); -usb_bulk_buffer_tx = ORIGIN(ram_shared)+4; +m0_state = ORIGIN(ram_shared); PROVIDE(__ram_m0_start__ = ORIGIN(ram_m0)); diff --git a/firmware/hackrf_usb/m0_state.h b/firmware/hackrf_usb/m0_state.h new file mode 100644 index 00000000..102a3ada --- /dev/null +++ b/firmware/hackrf_usb/m0_state.h @@ -0,0 +1,36 @@ +/* + * Copyright 2022 Great Scott Gadgets + * + * This file is part of HackRF. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +#ifndef __M0_STATE_H__ +#define __M0_STATE_H__ + +struct m0_state { + uint32_t offset; + uint32_t tx; +}; + +/* Address of m0_state is set in ldscripts. If you change the name of this + * variable, it won't be where it needs to be in the processor's address space, + * unless you also adjust the ldscripts. + */ +extern volatile struct m0_state m0_state; + +#endif/*__M0_STATE_H__*/ diff --git a/firmware/hackrf_usb/usb_api_sweep.c b/firmware/hackrf_usb/usb_api_sweep.c index 1a73e8dd..648a812d 100644 --- a/firmware/hackrf_usb/usb_api_sweep.c +++ b/firmware/hackrf_usb/usb_api_sweep.c @@ -25,6 +25,7 @@ #include #include "usb_api_transceiver.h" #include "usb_bulk_buffer.h" +#include "m0_state.h" #include "tuning.h" #include "usb_endpoint.h" #include "streaming.h" @@ -99,7 +100,7 @@ void sweep_mode(void) { while (TRANSCEIVER_MODE_RX_SWEEP == transceiver_mode()) { // Set up IN transfer of buffer 0. - if ( usb_bulk_buffer_offset >= 16384 && phase == 1) { + if ( m0_state.offset >= 16384 && phase == 1) { transfer = true; buffer = &usb_bulk_buffer[0x0000]; phase = 0; @@ -107,7 +108,7 @@ void sweep_mode(void) { } // Set up IN transfer of buffer 1. - if ( usb_bulk_buffer_offset < 16384 && phase == 0) { + if ( m0_state.offset < 16384 && phase == 0) { transfer = true; buffer = &usb_bulk_buffer[0x4000]; phase = 1; diff --git a/firmware/hackrf_usb/usb_api_transceiver.c b/firmware/hackrf_usb/usb_api_transceiver.c index 1f40c86d..a580bca7 100644 --- a/firmware/hackrf_usb/usb_api_transceiver.c +++ b/firmware/hackrf_usb/usb_api_transceiver.c @@ -27,6 +27,7 @@ #include #include "usb_bulk_buffer.h" +#include "m0_state.h" #include "usb_api_cpld.h" // Remove when CPLD update is handled elsewhere @@ -262,20 +263,20 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) { led_off(LED3); led_on(LED2); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_RX); - usb_bulk_buffer_tx = false; + m0_state.tx = false; break; case TRANSCEIVER_MODE_TX: led_off(LED2); led_on(LED3); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_TX); - usb_bulk_buffer_tx = true; + m0_state.tx = true; break; case TRANSCEIVER_MODE_OFF: default: led_off(LED2); led_off(LED3); rf_path_set_direction(&rf_path, RF_PATH_DIRECTION_OFF); - usb_bulk_buffer_tx = false; + m0_state.tx = false; } @@ -284,7 +285,7 @@ void set_transceiver_mode(const transceiver_mode_t new_transceiver_mode) { hw_sync_enable(_hw_sync_mode); - usb_bulk_buffer_offset = 0; + m0_state.offset = 0; } } @@ -330,7 +331,7 @@ void rx_mode(void) { while (TRANSCEIVER_MODE_RX == _transceiver_mode) { // Set up IN transfer of buffer 0. - if (16384 <= usb_bulk_buffer_offset && 1 == phase) { + if (16384 <= m0_state.offset && 1 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_in, &usb_bulk_buffer[0x0000], @@ -340,7 +341,7 @@ void rx_mode(void) { phase = 0; } // Set up IN transfer of buffer 1. - if (16384 > usb_bulk_buffer_offset && 0 == phase) { + if (16384 > m0_state.offset && 0 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_in, &usb_bulk_buffer[0x4000], @@ -368,7 +369,7 @@ void tx_mode(void) { while (TRANSCEIVER_MODE_TX == _transceiver_mode) { // Set up OUT transfer of buffer 0. - if (16384 <= usb_bulk_buffer_offset && 1 == phase) { + if (16384 <= m0_state.offset && 1 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_out, &usb_bulk_buffer[0x0000], @@ -378,7 +379,7 @@ void tx_mode(void) { phase = 0; } // Set up OUT transfer of buffer 1. - if (16384 > usb_bulk_buffer_offset && 0 == phase) { + if (16384 > m0_state.offset && 0 == phase) { usb_transfer_schedule_block( &usb_endpoint_bulk_out, &usb_bulk_buffer[0x4000], diff --git a/firmware/hackrf_usb/usb_bulk_buffer.h b/firmware/hackrf_usb/usb_bulk_buffer.h index 0900fd4d..6e120714 100644 --- a/firmware/hackrf_usb/usb_bulk_buffer.h +++ b/firmware/hackrf_usb/usb_bulk_buffer.h @@ -32,8 +32,4 @@ */ extern uint8_t usb_bulk_buffer[32768]; -extern volatile uint32_t usb_bulk_buffer_offset; - -extern bool usb_bulk_buffer_tx; - #endif/*__USB_BULK_BUFFER_H__*/ From dc0f8f48c587d3289068809d502b8a2e187c1351 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:18:05 +0000 Subject: [PATCH 04/19] Use defines for offsets into SGPIO shadow registers. This is just to make the SGPIO code less cryptic, and to place the explanation of the offsets closer to where they are defined. --- firmware/hackrf_usb/sgpio_m0.s | 50 ++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 9423d363..1cfe6729 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -103,6 +103,19 @@ registers and fixed memory addresses. .equ TARGET_BUFFER_TX, 0x20007004 .equ TARGET_BUFFER_MASK, 0x7fff +// Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): +// L -> F -> K -> C -> J -> E -> I -> A +// Which has equivalent shadow register offsets: +// 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0 +.equ SLICE0, 44 +.equ SLICE1, 20 +.equ SLICE2, 40 +.equ SLICE3, 8 +.equ SLICE4, 36 +.equ SLICE5, 16 +.equ SLICE6, 32 +.equ SLICE7, 0 + // Entry point. At this point, the libopencm3 startup code has set things up as // normal; .data and .bss are initialised, the stack is set up, etc. However, // we don't actually use any of that. All the code in this file would work @@ -151,11 +164,6 @@ main: mov r8, r3 // Store &position_in_buffer. // 1 - // Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): - // L -> F -> K -> C -> J -> E -> I -> A - // Which has equivalent shadow register offsets: - // 44 -> 20 -> 40 -> 8 -> 36 -> 16 -> 32 -> 0 - // Load direction (TX or RX) ldr r0, =TARGET_BUFFER_TX // 2 ldr r0, [r0] // 2 @@ -167,31 +175,31 @@ main: direction_tx: ldm r6!, {r0-r5} // 7 - str r0, [r7, #44] // 8 - str r1, [r7, #20] // 8 - str r2, [r7, #40] // 8 - str r3, [r7, #8 ] // 8 - str r4, [r7, #36] // 8 - str r5, [r7, #16] // 8 + str r0, [r7, #SLICE0] // 8 + str r1, [r7, #SLICE1] // 8 + str r2, [r7, #SLICE2] // 8 + str r3, [r7, #SLICE3] // 8 + str r4, [r7, #SLICE4] // 8 + str r5, [r7, #SLICE5] // 8 ldm r6!, {r0-r1} // 3 - str r0, [r7, #32] // 8 - str r1, [r7, #0] // 8 + str r0, [r7, #SLICE6] // 8 + str r1, [r7, #SLICE7] // 8 b done // 3 direction_rx: - ldr r0, [r7, #44] // 10 - ldr r1, [r7, #20] // 10 - ldr r2, [r7, #40] // 10 - ldr r3, [r7, #8 ] // 10 - ldr r4, [r7, #36] // 10 - ldr r5, [r7, #16] // 10 + ldr r0, [r7, #SLICE0] // 10 + ldr r1, [r7, #SLICE1] // 10 + ldr r2, [r7, #SLICE2] // 10 + ldr r3, [r7, #SLICE3] // 10 + ldr r4, [r7, #SLICE4] // 10 + ldr r5, [r7, #SLICE5] // 10 stm r6!, {r0-r5} // 7 - ldr r0, [r7, #32] // 10 - ldr r1, [r7, #0] // 10 + ldr r0, [r7, #SLICE6] // 10 + ldr r1, [r7, #SLICE7] // 10 stm r6!, {r0-r1} // 3 done: From f61a03deadaba30fbceacb3f2bbc067e6c738d49 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:19:01 +0000 Subject: [PATCH 05/19] Assign names to registers which are used for a single purpose. This is just to improve readability; there is no change to the code. --- firmware/hackrf_usb/sgpio_m0.s | 51 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 1cfe6729..1c47eb11 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -116,6 +116,11 @@ registers and fixed memory addresses. .equ SLICE6, 32 .equ SLICE7, 0 +/* Allocations of single-use registers */ + +sgpio_data .req r7 +buf_ptr .req r6 + // Entry point. At this point, the libopencm3 startup code has set things up as // normal; .data and .bss are initialised, the stack is set up, etc. However, // we don't actually use any of that. All the code in this file would work @@ -154,13 +159,13 @@ main: str r1, [r0] // 8 // Grab the base address of the SGPIO shadow registers... - ldr r7, =SGPIO_SHADOW_REGISTERS_BASE // 2 + ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 // ... and grab the address of the buffer segment we want to write to / read from. ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2 ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2 ldr r2, [r3] // r2 = position_in_buffer // 2 - add r6, r0, r2 // r6 = buffer_target = &buffer + position_in_buffer // 1 + add buf_ptr, r0, r2 // buf_ptr = &buffer + position_in_buffer // 1 mov r8, r3 // Store &position_in_buffer. // 1 @@ -174,39 +179,39 @@ main: direction_tx: - ldm r6!, {r0-r5} // 7 - str r0, [r7, #SLICE0] // 8 - str r1, [r7, #SLICE1] // 8 - str r2, [r7, #SLICE2] // 8 - str r3, [r7, #SLICE3] // 8 - str r4, [r7, #SLICE4] // 8 - str r5, [r7, #SLICE5] // 8 + ldm buf_ptr!, {r0-r5} // 7 + str r0, [sgpio_data, #SLICE0] // 8 + str r1, [sgpio_data, #SLICE1] // 8 + str r2, [sgpio_data, #SLICE2] // 8 + str r3, [sgpio_data, #SLICE3] // 8 + str r4, [sgpio_data, #SLICE4] // 8 + str r5, [sgpio_data, #SLICE5] // 8 - ldm r6!, {r0-r1} // 3 - str r0, [r7, #SLICE6] // 8 - str r1, [r7, #SLICE7] // 8 + ldm buf_ptr!, {r0-r1} // 3 + str r0, [sgpio_data, #SLICE6] // 8 + str r1, [sgpio_data, #SLICE7] // 8 b done // 3 direction_rx: - ldr r0, [r7, #SLICE0] // 10 - ldr r1, [r7, #SLICE1] // 10 - ldr r2, [r7, #SLICE2] // 10 - ldr r3, [r7, #SLICE3] // 10 - ldr r4, [r7, #SLICE4] // 10 - ldr r5, [r7, #SLICE5] // 10 - stm r6!, {r0-r5} // 7 + ldr r0, [sgpio_data, #SLICE0] // 10 + ldr r1, [sgpio_data, #SLICE1] // 10 + ldr r2, [sgpio_data, #SLICE2] // 10 + ldr r3, [sgpio_data, #SLICE3] // 10 + ldr r4, [sgpio_data, #SLICE4] // 10 + ldr r5, [sgpio_data, #SLICE5] // 10 + stm buf_ptr!, {r0-r5} // 7 - ldr r0, [r7, #SLICE6] // 10 - ldr r1, [r7, #SLICE7] // 10 - stm r6!, {r0-r1} // 3 + ldr r0, [sgpio_data, #SLICE6] // 10 + ldr r1, [sgpio_data, #SLICE7] // 10 + stm buf_ptr!, {r0-r1} // 3 done: // Finally, update the buffer location... ldr r0, =TARGET_BUFFER_MASK // 2 - and r0, r6, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 + and r0, buf_ptr, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 // ... restore &position_in_buffer, and store the new position there... mov r1, r8 // 1 From c6362381d17464d04693f48c473322bec80a1e0d Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:25:41 +0000 Subject: [PATCH 06/19] Initialise register with a constant value before SGIO loop. Previously this register was reloaded with the same value during each loop. Initialising it once, outside the loop, saves two cycles. Note the separation of the loop start ("loop") from the entry point ("main"). Code between these labels will be run once, at startup. --- firmware/hackrf_usb/sgpio_m0.s | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 1c47eb11..713b1092 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 159 cycles -TX: 144 cycles +RX: 157 cycles +TX: 142 cycles Design ====== @@ -128,6 +128,9 @@ buf_ptr .req r6 .global main .thumb_func main: // Cycle counts: + // Grab the base address of the SGPIO shadow registers... + ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 +loop: // The worst case timing is assumed to occur when reading the interrupt // status register *just* misses the flag being set - so we include the // cycles required to check it a second time. @@ -151,16 +154,13 @@ main: lsr r0, #1 // 1, twice // ... and if not, jump back to the beginning. - bcc main // 3, then 1 + bcc loop // 3, then 1 // Clear the interrupt pending bits for the SGPIO slices we're working with. ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG // 2 ldr r1, =0xffff // 2 str r1, [r0] // 8 - // Grab the base address of the SGPIO shadow registers... - ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 - // ... and grab the address of the buffer segment we want to write to / read from. ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2 ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2 @@ -217,4 +217,4 @@ done: mov r1, r8 // 1 str r0, [r1] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2 - b main // 3 + b loop // 3 From 9206a8b7526d75f91dc06dfcb6fab3dbcbc8c6b3 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:21:06 +0000 Subject: [PATCH 07/19] Free up two registers by accessing SGPIO in two 16-byte chunks. The current code does reads and writes in two chunks: one of 6 words, followed by one of 2. Instead, use two chunks of 4 words each. This takes the same number of total cycles, but frees up two registers for other uses. Note that we can't do things in one chunk, because we'd need eight registers to hold the data, plus a ninth to hold the buffer pointer. The ldm/stm instructions can only use the eight low registers, r0-r7. So we have to use two chunks, and the most register-efficient way to do that is to use two equal chunks. --- firmware/hackrf_usb/sgpio_m0.s | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 713b1092..f062739d 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -179,17 +179,17 @@ loop: direction_tx: - ldm buf_ptr!, {r0-r5} // 7 + ldm buf_ptr!, {r0-r3} // 5 str r0, [sgpio_data, #SLICE0] // 8 str r1, [sgpio_data, #SLICE1] // 8 str r2, [sgpio_data, #SLICE2] // 8 str r3, [sgpio_data, #SLICE3] // 8 - str r4, [sgpio_data, #SLICE4] // 8 - str r5, [sgpio_data, #SLICE5] // 8 - ldm buf_ptr!, {r0-r1} // 3 - str r0, [sgpio_data, #SLICE6] // 8 - str r1, [sgpio_data, #SLICE7] // 8 + ldm buf_ptr!, {r0-r3} // 5 + str r0, [sgpio_data, #SLICE4] // 8 + str r1, [sgpio_data, #SLICE5] // 8 + str r2, [sgpio_data, #SLICE6] // 8 + str r3, [sgpio_data, #SLICE7] // 8 b done // 3 @@ -199,13 +199,13 @@ direction_rx: ldr r1, [sgpio_data, #SLICE1] // 10 ldr r2, [sgpio_data, #SLICE2] // 10 ldr r3, [sgpio_data, #SLICE3] // 10 - ldr r4, [sgpio_data, #SLICE4] // 10 - ldr r5, [sgpio_data, #SLICE5] // 10 - stm buf_ptr!, {r0-r5} // 7 + stm buf_ptr!, {r0-r3} // 5 - ldr r0, [sgpio_data, #SLICE6] // 10 - ldr r1, [sgpio_data, #SLICE7] // 10 - stm buf_ptr!, {r0-r1} // 3 + ldr r0, [sgpio_data, #SLICE4] // 10 + ldr r1, [sgpio_data, #SLICE5] // 10 + ldr r2, [sgpio_data, #SLICE6] // 10 + ldr r3, [sgpio_data, #SLICE7] // 10 + stm buf_ptr!, {r0-r3} // 5 done: From 8f43dc1be57d497ab3e2dabec50e35cc46e42ef4 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:36:22 +0000 Subject: [PATCH 08/19] Use a register to hold base address of SGPIO interrupt registers. This allows us to use ldr/str with an immediate offset to access the SGPIO interrupt registers, rather than first having to load a register with the specific address we want to access. This change saves a total of 6 cycles, by eliminating two loads (2 cycles each), one of which could be executed twice. --- firmware/hackrf_usb/sgpio_m0.s | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index f062739d..6d910300 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 157 cycles -TX: 142 cycles +RX: 151 cycles +TX: 136 cycles Design ====== @@ -92,10 +92,12 @@ registers and fixed memory addresses. // Constants that point to registers we'll need to modify in the SGPIO block. .equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000 .equ SGPIO_SHADOW_REGISTERS_BASE, 0x40101100 -.equ SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG, 0x40101F30 -.equ SGPIO_EXCHANGE_INTERRUPT_STATUS_REG, 0x40101F2C +.equ SGPIO_EXCHANGE_INTERRUPT_BASE, 0x40101F00 .equ SGPIO_GPIO_INPUT, 0x40101210 +// Offsets into the interrupt control registers. +.equ INT_CLEAR, 0x30 +.equ INT_STATUS, 0x2C // Buffer that we're funneling data to/from. .equ TARGET_DATA_BUFFER, 0x20008000 @@ -119,7 +121,8 @@ registers and fixed memory addresses. /* Allocations of single-use registers */ sgpio_data .req r7 -buf_ptr .req r6 +sgpio_int .req r6 +buf_ptr .req r5 // Entry point. At this point, the libopencm3 startup code has set things up as // normal; .data and .bss are initialised, the stack is set up, etc. However, @@ -128,7 +131,8 @@ buf_ptr .req r6 .global main .thumb_func main: // Cycle counts: - // Grab the base address of the SGPIO shadow registers... + // Initialise registers used for constant values. + ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // 2 ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 loop: // The worst case timing is assumed to occur when reading the interrupt @@ -147,8 +151,7 @@ loop: // Spin until we're ready to handle an SGPIO packet: // Grab the exchange interrupt staus... - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_STATUS_REG // 2, twice - ldr r0, [r0] // 10, twice + ldr r0, [sgpio_int, #INT_STATUS] // 10, twice // ... check to see if it has any interrupt bits set... lsr r0, #1 // 1, twice @@ -157,9 +160,8 @@ loop: bcc loop // 3, then 1 // Clear the interrupt pending bits for the SGPIO slices we're working with. - ldr r0, =SGPIO_EXCHANGE_INTERRUPT_CLEAR_REG // 2 ldr r1, =0xffff // 2 - str r1, [r0] // 8 + str r1, [sgpio_int, #INT_CLEAR] // 8 // ... and grab the address of the buffer segment we want to write to / read from. ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2 From 2f26ebffd4e4c5baf801906dec1e45ac9fa40254 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 12:58:10 +0000 Subject: [PATCH 09/19] Keep buffer base & size mask in high registers. The high registers (r8-r14) cannot be used directly by most of the instructions in the Cortex-M0 instruction set. One of the few instructions that can use them is mov, which can use any pair of registers. This allows saving two cycles, by replacing two loads (2 cycles each) with moves (1 cycle each), after stashing the required values in high registers at startup. --- firmware/hackrf_usb/sgpio_m0.s | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 6d910300..e0e09319 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 151 cycles -TX: 136 cycles +RX: 149 cycles +TX: 134 cycles Design ====== @@ -120,6 +120,8 @@ registers and fixed memory addresses. /* Allocations of single-use registers */ +buf_base .req r12 +buf_mask .req r11 sgpio_data .req r7 sgpio_int .req r6 buf_ptr .req r5 @@ -134,6 +136,10 @@ main: // Initialise registers used for constant values. ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // 2 ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 + ldr r0, =TARGET_DATA_BUFFER // 2 + mov buf_base, r0 // 1 + ldr r0, =TARGET_BUFFER_MASK // 2 + mov buf_mask, r0 // 1 loop: // The worst case timing is assumed to occur when reading the interrupt // status register *just* misses the flag being set - so we include the @@ -164,7 +170,7 @@ loop: str r1, [sgpio_int, #INT_CLEAR] // 8 // ... and grab the address of the buffer segment we want to write to / read from. - ldr r0, =TARGET_DATA_BUFFER // r0 = &buffer // 2 + mov r0, buf_base // r0 = &buffer // 1 ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2 ldr r2, [r3] // r2 = position_in_buffer // 2 add buf_ptr, r0, r2 // buf_ptr = &buffer + position_in_buffer // 1 @@ -212,7 +218,7 @@ direction_rx: done: // Finally, update the buffer location... - ldr r0, =TARGET_BUFFER_MASK // 2 + mov r0, buf_mask // 1 and r0, buf_ptr, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 // ... restore &position_in_buffer, and store the new position there... From f8ea1e8e56284669d0cf98e84a80893d43f482be Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:08:15 +0000 Subject: [PATCH 10/19] Use stack pointer to hold base address of state structure. Keeping the base address of this structure in a register allows us to use offsets to load individual fields from it, without needing their individual addresses. However, the ldr instruction can only use immediate offsets relative to the low registers (r0-r7), or the stack pointer (r13). Low registers are in short supply and are needed for other instructions which can only use r0-r7, so we use the stack pointer here. It's safe to do this because we do not use the stack. There are no function calls, interrupt handlers or push/pop instructions in the M0 code. This change saves four cycles by eliminating loads of the addresses for the offset & tx registers, plus a further two by eliminating the need to stash one of these addresses in r8. --- firmware/hackrf_usb/sgpio_m0.s | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index e0e09319..f77b81da 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 149 cycles -TX: 134 cycles +RX: 143 cycles +TX: 128 cycles Design ====== @@ -101,10 +101,15 @@ registers and fixed memory addresses. // Buffer that we're funneling data to/from. .equ TARGET_DATA_BUFFER, 0x20008000 -.equ TARGET_BUFFER_POSITION, 0x20007000 -.equ TARGET_BUFFER_TX, 0x20007004 .equ TARGET_BUFFER_MASK, 0x7fff +// Base address of the state structure. +.equ STATE_BASE, 0x20007000 + +// Offsets into the state structure. +.equ OFFSET, 0x00 +.equ TX, 0x04 + // Our slice chain is set up as follows (ascending data age; arrows are reversed for flow): // L -> F -> K -> C -> J -> E -> I -> A // Which has equivalent shadow register offsets: @@ -120,6 +125,7 @@ registers and fixed memory addresses. /* Allocations of single-use registers */ +state .req r13 buf_base .req r12 buf_mask .req r11 sgpio_data .req r7 @@ -140,6 +146,9 @@ main: mov buf_base, r0 // 1 ldr r0, =TARGET_BUFFER_MASK // 2 mov buf_mask, r0 // 1 + ldr r0, =STATE_BASE // 2 + mov state, r0 // 1 + loop: // The worst case timing is assumed to occur when reading the interrupt // status register *just* misses the flag being set - so we include the @@ -171,15 +180,11 @@ loop: // ... and grab the address of the buffer segment we want to write to / read from. mov r0, buf_base // r0 = &buffer // 1 - ldr r3, =TARGET_BUFFER_POSITION // r3 = &position_in_buffer // 2 - ldr r2, [r3] // r2 = position_in_buffer // 2 + ldr r2, [state, #OFFSET] // r2 = position_in_buffer // 2 add buf_ptr, r0, r2 // buf_ptr = &buffer + position_in_buffer // 1 - mov r8, r3 // Store &position_in_buffer. // 1 - // Load direction (TX or RX) - ldr r0, =TARGET_BUFFER_TX // 2 - ldr r0, [r0] // 2 + ldr r0, [state, #TX] // 2 // TX? lsr r0, #1 // 1 @@ -221,8 +226,7 @@ done: mov r0, buf_mask // 1 and r0, buf_ptr, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 - // ... restore &position_in_buffer, and store the new position there... - mov r1, r8 // 1 - str r0, [r1] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2 + // ... and store the new position. + str r0, [state, #OFFSET] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2 b loop // 3 From bd7d0b919447e3c04ad492f5d4829d196f122cb6 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:14:48 +0000 Subject: [PATCH 11/19] Correct a misleading comment. The effect of the lsr instruction here is to shift the LSB of r0 into the processor's carry flag. The subsequent bcc instruction ("branch if carry clear") will then branch if this bit was zero. The LSB corresponds to the exchange interrupt flag for slice A only. The other interrupt flag bits are not checked here, contrary to the comment. --- firmware/hackrf_usb/sgpio_m0.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index f77b81da..2bc1505b 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -168,7 +168,7 @@ loop: // Grab the exchange interrupt staus... ldr r0, [sgpio_int, #INT_STATUS] // 10, twice - // ... check to see if it has any interrupt bits set... + // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit... lsr r0, #1 // 1, twice // ... and if not, jump back to the beginning. From c1c665d5b82931a225a668b26a4f3241d3f6ac9c Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:16:30 +0000 Subject: [PATCH 12/19] Stash which interrupt bits were set, and use them to clear. The lsr instruction here shifts the value in r0 right by one bit, putting the LSB into the carry flag. By setting the destination register to r1, we can retain the original unshifted value in r0, and later write this to the INT_CLEAR register in order to clear all bits that were set. This saves two cycles by avoiding the need to load an 0xFFFF value to write to INT_CLEAR. --- firmware/hackrf_usb/sgpio_m0.s | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 2bc1505b..a6e9589c 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 143 cycles -TX: 128 cycles +RX: 141 cycles +TX: 126 cycles Design ====== @@ -169,14 +169,13 @@ loop: ldr r0, [sgpio_int, #INT_STATUS] // 10, twice // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit... - lsr r0, #1 // 1, twice + lsr r1, r0, #1 // 1, twice // ... and if not, jump back to the beginning. bcc loop // 3, then 1 - // Clear the interrupt pending bits for the SGPIO slices we're working with. - ldr r1, =0xffff // 2 - str r1, [sgpio_int, #INT_CLEAR] // 8 + // Clear the interrupt pending bits that were set. + str r0, [sgpio_int, #INT_CLEAR] // 8 // ... and grab the address of the buffer segment we want to write to / read from. mov r0, buf_base // r0 = &buffer // 1 From e531fb507bb65d437d4267f78656c7abb0a7498a Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:26:57 +0000 Subject: [PATCH 13/19] Use faster way to calculate buffer pointer. One of the few instructions that can use the high registers (r8-r14) is the add instruction, which can add any two registers, as long as one of them is also used as the destination register. By using this form of add , we can add buf_base (in a high register) to the offset within the buffer (in a low register), to get the desired pointer value (buf_ptr) which we want to access. This saves one cycle by eliminating the need to move buf_base to a low register first. --- firmware/hackrf_usb/sgpio_m0.s | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index a6e9589c..2fd87695 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -66,8 +66,8 @@ shadow registers. There are two key code paths, with the following worst-case timings: -RX: 141 cycles -TX: 126 cycles +RX: 140 cycles +TX: 125 cycles Design ====== @@ -178,9 +178,8 @@ loop: str r0, [sgpio_int, #INT_CLEAR] // 8 // ... and grab the address of the buffer segment we want to write to / read from. - mov r0, buf_base // r0 = &buffer // 1 - ldr r2, [state, #OFFSET] // r2 = position_in_buffer // 2 - add buf_ptr, r0, r2 // buf_ptr = &buffer + position_in_buffer // 1 + ldr buf_ptr, [state, #OFFSET] // buf_ptr = position_in_buffer // 2 + add buf_ptr, buf_base // buf_ptr = &buffer + position_in_buffer // 1 // Load direction (TX or RX) ldr r0, [state, #TX] // 2 From 5df28efb3f19687a8bccc13413a3324782d23279 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:39:46 +0000 Subject: [PATCH 14/19] Assign names to registers used for temporary purposes. This is just to improve readability; there is no change to the code. --- firmware/hackrf_usb/sgpio_m0.s | 35 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 2fd87695..cbef0364 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -140,14 +140,15 @@ buf_ptr .req r5 .thumb_func main: // Cycle counts: // Initialise registers used for constant values. + value .req r0 ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // 2 ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 - ldr r0, =TARGET_DATA_BUFFER // 2 - mov buf_base, r0 // 1 - ldr r0, =TARGET_BUFFER_MASK // 2 - mov buf_mask, r0 // 1 - ldr r0, =STATE_BASE // 2 - mov state, r0 // 1 + ldr value, =TARGET_DATA_BUFFER // 2 + mov buf_base, value // 1 + ldr value, =TARGET_BUFFER_MASK // 2 + mov buf_mask, value // 1 + ldr value, =STATE_BASE // 2 + mov state, value // 1 loop: // The worst case timing is assumed to occur when reading the interrupt @@ -164,28 +165,33 @@ loop: // relying on any assumptions about the timing details of a read over // the SGPIO to AHB bridge. + int_status .req r0 + scratch .req r1 + // Spin until we're ready to handle an SGPIO packet: // Grab the exchange interrupt staus... - ldr r0, [sgpio_int, #INT_STATUS] // 10, twice + ldr int_status, [sgpio_int, #INT_STATUS] // 10, twice // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit... - lsr r1, r0, #1 // 1, twice + lsr scratch, int_status, #1 // 1, twice // ... and if not, jump back to the beginning. bcc loop // 3, then 1 // Clear the interrupt pending bits that were set. - str r0, [sgpio_int, #INT_CLEAR] // 8 + str int_status, [sgpio_int, #INT_CLEAR] // 8 // ... and grab the address of the buffer segment we want to write to / read from. ldr buf_ptr, [state, #OFFSET] // buf_ptr = position_in_buffer // 2 add buf_ptr, buf_base // buf_ptr = &buffer + position_in_buffer // 1 + tx .req r0 + // Load direction (TX or RX) - ldr r0, [state, #TX] // 2 + ldr tx, [state, #TX] // 2 // TX? - lsr r0, #1 // 1 + lsr tx, #1 // 1 bcc direction_rx // 1 thru, 3 taken direction_tx: @@ -219,12 +225,13 @@ direction_rx: stm buf_ptr!, {r0-r3} // 5 done: + offset .req r0 // Finally, update the buffer location... - mov r0, buf_mask // 1 - and r0, buf_ptr, r0 // r0 = (pos_in_buffer + size_copied) % buffer_size // 1 + mov offset, buf_mask // 1 + and offset, buf_ptr // offset = (pos_in_buffer + size_copied) % buffer_size // 1 // ... and store the new position. - str r0, [state, #OFFSET] // pos_in_buffer = (pos_in_buffer + size_copied) % buffer_size // 2 + str offset, [state, #OFFSET] // 2 b loop // 3 From 14065bb69dd4cfed30e42497a4d22802b796d774 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 13:47:34 +0000 Subject: [PATCH 15/19] Initialise M0 state at startup. This is not currently essential, since the current M4 code will not trigger an SGPIO interrupt until the offset and tx fields are set. In future though, we want to explicitly set up the M0 state here. --- firmware/hackrf_usb/sgpio_m0.s | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index cbef0364..d06c295e 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -150,6 +150,12 @@ main: ldr value, =STATE_BASE // 2 mov state, value // 1 + // Initialise state. + zero .req r0 + mov zero, #0 // 1 + str zero, [state, #OFFSET] // 2 + str zero, [state, #TX] // 2 + loop: // The worst case timing is assumed to occur when reading the interrupt // status register *just* misses the flag being set - so we include the From 030898315d708d9a9c92e8649f535bc114eabf27 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Mon, 20 Dec 2021 14:03:05 +0000 Subject: [PATCH 16/19] Remove unused constants. Neither of these constants was used in the code. --- firmware/hackrf_usb/sgpio_m0.s | 2 -- 1 file changed, 2 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index d06c295e..8d7c76ed 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -90,10 +90,8 @@ registers and fixed memory addresses. */ // Constants that point to registers we'll need to modify in the SGPIO block. -.equ SGPIO_REGISTER_BLOCK_BASE, 0x40101000 .equ SGPIO_SHADOW_REGISTERS_BASE, 0x40101100 .equ SGPIO_EXCHANGE_INTERRUPT_BASE, 0x40101F00 -.equ SGPIO_GPIO_INPUT, 0x40101210 // Offsets into the interrupt control registers. .equ INT_CLEAR, 0x30 From 59be1fef5a3e1df9e525694f660ab85d4ce2086f Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Tue, 21 Dec 2021 21:09:45 +0000 Subject: [PATCH 17/19] Add pseudocode for all instructions. This is intended to make the code possible to follow without knowledge of the ARM instruction set. --- firmware/hackrf_usb/sgpio_m0.s | 90 +++++++++++++++++----------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index 8d7c76ed..f6c90361 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -139,20 +139,20 @@ buf_ptr .req r5 main: // Cycle counts: // Initialise registers used for constant values. value .req r0 - ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // 2 - ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // 2 - ldr value, =TARGET_DATA_BUFFER // 2 - mov buf_base, value // 1 - ldr value, =TARGET_BUFFER_MASK // 2 - mov buf_mask, value // 1 - ldr value, =STATE_BASE // 2 - mov state, value // 1 + ldr sgpio_int, =SGPIO_EXCHANGE_INTERRUPT_BASE // sgpio_int = SGPIO_INT_BASE // 2 + ldr sgpio_data, =SGPIO_SHADOW_REGISTERS_BASE // sgpio_data = SGPIO_REG_SS // 2 + ldr value, =TARGET_DATA_BUFFER // value = TARGET_DATA_BUFFER // 2 + mov buf_base, value // buf_base = value // 1 + ldr value, =TARGET_BUFFER_MASK // value = TARGET_DATA_MASK // 2 + mov buf_mask, value // buf_mask = value // 1 + ldr value, =STATE_BASE // value = STATE_BASE // 2 + mov state, value // state = value // 1 // Initialise state. zero .req r0 - mov zero, #0 // 1 - str zero, [state, #OFFSET] // 2 - str zero, [state, #TX] // 2 + mov zero, #0 // zero = 0 // 1 + str zero, [state, #OFFSET] // state.offset = zero // 2 + str zero, [state, #TX] // state.tx = zero // 2 loop: // The worst case timing is assumed to occur when reading the interrupt @@ -174,68 +174,68 @@ loop: // Spin until we're ready to handle an SGPIO packet: // Grab the exchange interrupt staus... - ldr int_status, [sgpio_int, #INT_STATUS] // 10, twice + ldr int_status, [sgpio_int, #INT_STATUS] // int_status = SGPIO_STATUS_1 // 10, twice // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit... - lsr scratch, int_status, #1 // 1, twice + lsr scratch, int_status, #1 // scratch = int_status >> 1 // 1, twice // ... and if not, jump back to the beginning. - bcc loop // 3, then 1 + bcc loop // if !carry: goto loop // 3, then 1 // Clear the interrupt pending bits that were set. - str int_status, [sgpio_int, #INT_CLEAR] // 8 + str int_status, [sgpio_int, #INT_CLEAR] // SGPIO_CLR_STATUS_1 = int_status // 8 // ... and grab the address of the buffer segment we want to write to / read from. - ldr buf_ptr, [state, #OFFSET] // buf_ptr = position_in_buffer // 2 - add buf_ptr, buf_base // buf_ptr = &buffer + position_in_buffer // 1 + ldr buf_ptr, [state, #OFFSET] // buf_ptr = state.offset // 2 + add buf_ptr, buf_base // buf_ptr += buf_base // 1 tx .req r0 // Load direction (TX or RX) - ldr tx, [state, #TX] // 2 + ldr tx, [state, #TX] // tx = state.tx // 2 // TX? - lsr tx, #1 // 1 - bcc direction_rx // 1 thru, 3 taken + lsr tx, #1 // tx >>= 1 // 1 + bcc direction_rx // if !carry: goto direction_rx // 1 thru, 3 taken direction_tx: - ldm buf_ptr!, {r0-r3} // 5 - str r0, [sgpio_data, #SLICE0] // 8 - str r1, [sgpio_data, #SLICE1] // 8 - str r2, [sgpio_data, #SLICE2] // 8 - str r3, [sgpio_data, #SLICE3] // 8 + ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5 + str r0, [sgpio_data, #SLICE0] // SGPIO_REG_SS[SLICE0] = r0 // 8 + str r1, [sgpio_data, #SLICE1] // SGPIO_REG_SS[SLICE1] = r1 // 8 + str r2, [sgpio_data, #SLICE2] // SGPIO_REG_SS[SLICE2] = r2 // 8 + str r3, [sgpio_data, #SLICE3] // SGPIO_REG_SS[SLICE3] = r3 // 8 - ldm buf_ptr!, {r0-r3} // 5 - str r0, [sgpio_data, #SLICE4] // 8 - str r1, [sgpio_data, #SLICE5] // 8 - str r2, [sgpio_data, #SLICE6] // 8 - str r3, [sgpio_data, #SLICE7] // 8 + ldm buf_ptr!, {r0-r3} // r0-r3 = buf_ptr[0:16]; buf_ptr += 16 // 5 + str r0, [sgpio_data, #SLICE4] // SGPIO_REG_SS[SLICE4] = r0 // 8 + str r1, [sgpio_data, #SLICE5] // SGPIO_REG_SS[SLICE5] = r1 // 8 + str r2, [sgpio_data, #SLICE6] // SGPIO_REG_SS[SLICE6] = r2 // 8 + str r3, [sgpio_data, #SLICE7] // SGPIO_REG_SS[SLICE7] = r3 // 8 - b done // 3 + b done // goto done // 3 direction_rx: - ldr r0, [sgpio_data, #SLICE0] // 10 - ldr r1, [sgpio_data, #SLICE1] // 10 - ldr r2, [sgpio_data, #SLICE2] // 10 - ldr r3, [sgpio_data, #SLICE3] // 10 - stm buf_ptr!, {r0-r3} // 5 + ldr r0, [sgpio_data, #SLICE0] // r0 = SGPIO_REG_SS[SLICE0] // 10 + ldr r1, [sgpio_data, #SLICE1] // r1 = SGPIO_REG_SS[SLICE1] // 10 + ldr r2, [sgpio_data, #SLICE2] // r2 = SGPIO_REG_SS[SLICE2] // 10 + ldr r3, [sgpio_data, #SLICE3] // r3 = SGPIO_REG_SS[SLICE3] // 10 + stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5 - ldr r0, [sgpio_data, #SLICE4] // 10 - ldr r1, [sgpio_data, #SLICE5] // 10 - ldr r2, [sgpio_data, #SLICE6] // 10 - ldr r3, [sgpio_data, #SLICE7] // 10 - stm buf_ptr!, {r0-r3} // 5 + ldr r0, [sgpio_data, #SLICE4] // r0 = SGPIO_REG_SS[SLICE4] // 10 + ldr r1, [sgpio_data, #SLICE5] // r1 = SGPIO_REG_SS[SLICE5] // 10 + ldr r2, [sgpio_data, #SLICE6] // r2 = SGPIO_REG_SS[SLICE6] // 10 + ldr r3, [sgpio_data, #SLICE7] // r3 = SGPIO_REG_SS[SLICE7] // 10 + stm buf_ptr!, {r0-r3} // buf_ptr[0:16] = r0-r3; buf_ptr += 16 // 5 done: offset .req r0 // Finally, update the buffer location... - mov offset, buf_mask // 1 - and offset, buf_ptr // offset = (pos_in_buffer + size_copied) % buffer_size // 1 + mov offset, buf_mask // offset = buf_mask // 1 + and offset, buf_ptr // offset &= buf_ptr // 1 // ... and store the new position. - str offset, [state, #OFFSET] // 2 + str offset, [state, #OFFSET] // state.offset = offset // 2 - b loop // 3 + b loop // goto loop // 3 From 42a7c5ede9888276268d13a9d90bee77505ec245 Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Tue, 21 Dec 2021 21:14:25 +0000 Subject: [PATCH 18/19] Add a label at the end of the code to indicate the literal pool. This makes objdump disassembly of the code a bit clearer, by separating the constants from code following the last label. --- firmware/hackrf_usb/sgpio_m0.s | 3 +++ 1 file changed, 3 insertions(+) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index f6c90361..dcb147b7 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -239,3 +239,6 @@ done: str offset, [state, #OFFSET] // state.offset = offset // 2 b loop // goto loop // 3 + +// The linker will put a literal pool here, so add a label for clearer objdump output: +constants: From 98df8c23be8836e6329e668dd149668f37c061da Mon Sep 17 00:00:00 2001 From: Martin Ling Date: Tue, 21 Dec 2021 21:41:10 +0000 Subject: [PATCH 19/19] Fix a typo. --- firmware/hackrf_usb/sgpio_m0.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firmware/hackrf_usb/sgpio_m0.s b/firmware/hackrf_usb/sgpio_m0.s index dcb147b7..0d1ab215 100644 --- a/firmware/hackrf_usb/sgpio_m0.s +++ b/firmware/hackrf_usb/sgpio_m0.s @@ -173,7 +173,7 @@ loop: scratch .req r1 // Spin until we're ready to handle an SGPIO packet: - // Grab the exchange interrupt staus... + // Grab the exchange interrupt status... ldr int_status, [sgpio_int, #INT_STATUS] // int_status = SGPIO_STATUS_1 // 10, twice // ... check to see if bit #0 (slice A) was set, by shifting it into the carry bit...