So after some input I figured out a solution where I redefined the HAL functions and basically threw everything out that was slow:
void HAL_GPIO_WritePin_Fast(GPIO_TypeDef* GPIOx, uint16_t GPIO_Pin, GPIO_PinState PinState)
{
if(PinState != GPIO_PIN_RESET)
{
GPIOx->BSRR = GPIO_Pin;
}
else
{
GPIOx->BSRR = (uint32_t)GPIO_Pin << 16U;
}
}
HAL_StatusTypeDef HAL_SPI_Transmit_fast(SPI_HandleTypeDef *hspi, uint8_t *pData, uint16_t Size, uint32_t Timeout)
{
// uint32_t tickstart = 0U;
HAL_StatusTypeDef errorcode = HAL_OK;
/* Check Direction parameter */
/* Process Locked */
__HAL_LOCK(hspi);
/* Init tickstart for timeout management*/
// tickstart = HAL_GetTick();
// if(hspi->State != HAL_SPI_STATE_READY)
// {
// errorcode = HAL_BUSY;
// goto error;
// }
//
// if((pData == NULL ) || (Size == 0))
// {
// errorcode = HAL_ERROR;
// goto error;
// }
/* Set the transaction information */
hspi->State = HAL_SPI_STATE_BUSY_TX;
hspi->ErrorCode = HAL_SPI_ERROR_NONE;
hspi->pTxBuffPtr = (uint8_t *)pData;
hspi->TxXferSize = Size;
hspi->TxXferCount = Size;
/*Init field not used in handle to zero */
hspi->pRxBuffPtr = (uint8_t *)NULL;
hspi->RxXferSize = 0U;
hspi->RxXferCount = 0U;
hspi->TxISR = NULL;
hspi->RxISR = NULL;
/* Configure communication direction : 1Line */
if(hspi->Init.Direction == SPI_DIRECTION_1LINE)
{
SPI_1LINE_TX(hspi);
}
#if (USE_SPI_CRC != 0U)
/* Reset CRC Calculation */
if(hspi->Init.CRCCalculation == SPI_CRCCALCULATION_ENABLE)
{
SPI_RESET_CRC(hspi);
}
#endif /* USE_SPI_CRC */
/* Check if the SPI is already enabled */
if((hspi->Instance->CR1 & SPI_CR1_SPE) != SPI_CR1_SPE)
{
/* Enable SPI peripheral */
__HAL_SPI_ENABLE(hspi);
}
/* Transmit data in 16 Bit mode */
if(hspi->Init.DataSize == SPI_DATASIZE_16BIT)
{
if((hspi->Init.Mode == SPI_MODE_SLAVE) || (hspi->TxXferCount == 0x01))
{
hspi->Instance->DR = *((uint16_t *)pData);
pData += sizeof(uint16_t);
hspi->TxXferCount--;
}
/* Transmit data in 16 Bit mode */
while (hspi->TxXferCount > 0U)
{
/* Wait until TXE flag is set to send data */
if(__HAL_SPI_GET_FLAG(hspi, SPI_FLAG_TXE))
{
hspi->Instance->DR = *((uint16_t *)pData);
pData += sizeof(uint16_t);
hspi->TxXferCount--;
}
else
{
// /* Timeout management */
// if((Timeout == 0U) || ((Timeout != HAL_MAX_DELAY) && ((HAL_GetTick()-tickstart) >= Timeout)))
// {
// errorcode = HAL_TIMEOUT;
// goto error;
// }
}
}
}
/* Transmit data in 8 Bit mode */
else
{
if((hspi->Init.Mode == SPI_MODE_SLAVE)|| (hspi->TxXferCount == 0x01))
{
*((__IO uint8_t*)&hspi->Instance->DR) = (*pData);
pData += sizeof(uint8_t);
hspi->TxXferCount--;
}
while (hspi->TxXferCount > 0U)
{
/* Wait until TXE flag is set to send data */
if(__HAL_SPI_GET_FLAG(hspi, SPI_FLAG_TXE))
{
*((__IO uint8_t*)&hspi->Instance->DR) = (*pData);
pData += sizeof(uint8_t);
hspi->TxXferCount--;
}
else
{
// /* Timeout management */
// if((Timeout == 0U) || ((Timeout != HAL_MAX_DELAY) && ((HAL_GetTick()-tickstart) >= Timeout)))
// {
// errorcode = HAL_TIMEOUT;
// goto error;
// }
}
}
}
/* Clear overrun flag in 2 Lines communication mode because received is not read */
if(hspi->Init.Direction == SPI_DIRECTION_2LINES)
{
__HAL_SPI_CLEAR_OVRFLAG(hspi);
}
#if (USE_SPI_CRC != 0U)
/* Enable CRC Transmission */
if(hspi->Init.CRCCalculation == SPI_CRCCALCULATION_ENABLE)
{
SET_BIT(hspi->Instance->CR1, SPI_CR1_CRCNEXT);
}
#endif /* USE_SPI_CRC */
if(hspi->ErrorCode != HAL_SPI_ERROR_NONE)
{
errorcode = HAL_ERROR;
}
error:
hspi->State = HAL_SPI_STATE_READY;
/* Process Unlocked */
__HAL_UNLOCK(hspi);
return errorcode;
}
That's definitely an option but probably not the most elegant :) It sped up the time dramatically though:
Edit:
berendis solution is even faster:
Heres the code for multiple bytes:
spiTxBuf[0] = 0b00000110;
spiTxBuf[1] = 0b00000111;
spiTxBuf[2] = 0b00000111;
spiTxBuf[3] = 0b00000111;
spiTxBuf[4] = 0b00000111;
GPIOA->BSRR = 1 << (8 + 16);
for(int i=0; i<5; i++){
*(volatile uint8_t *)&SPI1->DR = spiTxBuf[i];
while ((SPI1->SR & SPI_SR_TXE) == RESET);
}
while((SPI1->SR & (SPI_SR_TXE | SPI_SR_BSY)) != SPI_SR_TXE);
GPIOA->BSRR = 1 << 8;
HAL_Delay(100);