Implement DELAY_NS with CYCCNT on Cortex-M7 (#12283)
This commit is contained in:
		
							parent
							
								
									f5498168ae
								
							
						
					
					
						commit
						cafabf2055
					
				@ -30,6 +30,7 @@
 | 
			
		||||
#include "HAL.h"
 | 
			
		||||
 | 
			
		||||
#include "../../inc/MarlinConfig.h"
 | 
			
		||||
#include "../shared/Delay.h"
 | 
			
		||||
 | 
			
		||||
#if ENABLED(EEPROM_EMULATED_WITH_SRAM)
 | 
			
		||||
  #if STM32F7xx
 | 
			
		||||
@ -80,6 +81,11 @@ uint16_t HAL_adc_result;
 | 
			
		||||
// HAL initialization task
 | 
			
		||||
void HAL_init(void) {
 | 
			
		||||
 | 
			
		||||
  // Needed for DELAY_NS() / DELAY_US() on CORTEX-M7
 | 
			
		||||
  #if (defined(__arm__) || defined(__thumb__)) && __CORTEX_M == 7
 | 
			
		||||
    enableCycleCounter();
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  FastIO_init();
 | 
			
		||||
 | 
			
		||||
  #if ENABLED(SDSUPPORT)
 | 
			
		||||
 | 
			
		||||
@ -153,8 +153,6 @@ extern uint16_t HAL_adc_result;
 | 
			
		||||
// Public functions
 | 
			
		||||
// --------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Memory related
 | 
			
		||||
#define __bss_end __bss_end__
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -26,7 +26,7 @@
 | 
			
		||||
#ifdef __MK20DX256__
 | 
			
		||||
 | 
			
		||||
#include "HAL.h"
 | 
			
		||||
#include "../Delay.h"
 | 
			
		||||
#include "../shared/Delay.h"
 | 
			
		||||
 | 
			
		||||
#include <Wire.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -19,6 +19,7 @@
 | 
			
		||||
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 *
 | 
			
		||||
 */
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Busy wait delay cycles routines:
 | 
			
		||||
@ -28,57 +29,81 @@
 | 
			
		||||
 *  DELAY_US(count): Delay execution in microseconds
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifndef MARLIN_DELAY_H
 | 
			
		||||
#define MARLIN_DELAY_H
 | 
			
		||||
 | 
			
		||||
#include "../../core/macros.h"
 | 
			
		||||
#include "../../core/millis_t.h"
 | 
			
		||||
 | 
			
		||||
#if defined(__arm__) || defined(__thumb__)
 | 
			
		||||
 | 
			
		||||
  // https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
 | 
			
		||||
  #if __CORTEX_M == 7
 | 
			
		||||
 | 
			
		||||
  #define nop() __asm__ __volatile__("nop;\n\t":::)
 | 
			
		||||
    // Cortex-M7 can use the cycle counter of the DWT unit
 | 
			
		||||
    // http://www.anthonyvh.com/2017/05/18/cortex_m-cycle_counter/
 | 
			
		||||
 | 
			
		||||
  FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
 | 
			
		||||
    #if ARCH_PIPELINE_RELOAD_CYCLES < 2
 | 
			
		||||
      #define EXTRA_NOP_CYCLES A("nop")
 | 
			
		||||
    #else
 | 
			
		||||
      #define EXTRA_NOP_CYCLES ""
 | 
			
		||||
    #endif
 | 
			
		||||
    FORCE_INLINE static void enableCycleCounter() {
 | 
			
		||||
      CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
 | 
			
		||||
 | 
			
		||||
    __asm__ __volatile__(
 | 
			
		||||
      A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
 | 
			
		||||
      L("1")
 | 
			
		||||
      A("subs %[cnt],#1")
 | 
			
		||||
      EXTRA_NOP_CYCLES
 | 
			
		||||
      A("bne 1b")
 | 
			
		||||
      : [cnt]"+r"(cy)   // output: +r means input+output
 | 
			
		||||
      :                 // input:
 | 
			
		||||
      : "cc"            // clobbers:
 | 
			
		||||
    );
 | 
			
		||||
  }
 | 
			
		||||
      // Unlock DWT.
 | 
			
		||||
      DWT->LAR = 0xC5ACCE55;
 | 
			
		||||
 | 
			
		||||
  // Delay in cycles
 | 
			
		||||
  FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
 | 
			
		||||
 | 
			
		||||
    if (__builtin_constant_p(x)) {
 | 
			
		||||
      #define MAXNOPS 4
 | 
			
		||||
 | 
			
		||||
      if (x <= (MAXNOPS)) {
 | 
			
		||||
        switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
 | 
			
		||||
      }
 | 
			
		||||
      else { // because of +1 cycle inside delay_4cycles
 | 
			
		||||
        const uint32_t rem = (x - 1) % (MAXNOPS);
 | 
			
		||||
        switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
 | 
			
		||||
        if ((x = (x - 1) / (MAXNOPS)))
 | 
			
		||||
          __delay_4cycles(x); // if need more then 4 nop loop is more optimal
 | 
			
		||||
      }
 | 
			
		||||
      #undef MAXNOPS
 | 
			
		||||
      DWT->CYCCNT = 0;
 | 
			
		||||
      DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
 | 
			
		||||
    }
 | 
			
		||||
    else if ((x >>= 2))
 | 
			
		||||
      __delay_4cycles(x);
 | 
			
		||||
  }
 | 
			
		||||
  #undef nop
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE volatile uint32_t getCycleCount() { return DWT->CYCCNT; }
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE static void DELAY_CYCLES(const uint32_t x) {
 | 
			
		||||
      const uint32_t endCycles = getCycleCount() + x;
 | 
			
		||||
      while (PENDING(getCycleCount(), endCycles)) { }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  #else
 | 
			
		||||
 | 
			
		||||
    // https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
 | 
			
		||||
 | 
			
		||||
    #define nop() __asm__ __volatile__("nop;\n\t":::)
 | 
			
		||||
 | 
			
		||||
    FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
 | 
			
		||||
      #if ARCH_PIPELINE_RELOAD_CYCLES < 2
 | 
			
		||||
        #define EXTRA_NOP_CYCLES A("nop")
 | 
			
		||||
      #else
 | 
			
		||||
        #define EXTRA_NOP_CYCLES ""
 | 
			
		||||
      #endif
 | 
			
		||||
 | 
			
		||||
      __asm__ __volatile__(
 | 
			
		||||
        A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
 | 
			
		||||
        L("1")
 | 
			
		||||
        A("subs %[cnt],#1")
 | 
			
		||||
        EXTRA_NOP_CYCLES
 | 
			
		||||
        A("bne 1b")
 | 
			
		||||
        : [cnt]"+r"(cy)   // output: +r means input+output
 | 
			
		||||
        :                 // input:
 | 
			
		||||
        : "cc"            // clobbers:
 | 
			
		||||
      );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Delay in cycles
 | 
			
		||||
    FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
 | 
			
		||||
 | 
			
		||||
      if (__builtin_constant_p(x)) {
 | 
			
		||||
        #define MAXNOPS 4
 | 
			
		||||
 | 
			
		||||
        if (x <= (MAXNOPS)) {
 | 
			
		||||
          switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
 | 
			
		||||
        }
 | 
			
		||||
        else { // because of +1 cycle inside delay_4cycles
 | 
			
		||||
          const uint32_t rem = (x - 1) % (MAXNOPS);
 | 
			
		||||
          switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
 | 
			
		||||
          if ((x = (x - 1) / (MAXNOPS)))
 | 
			
		||||
            __delay_4cycles(x); // if need more then 4 nop loop is more optimal
 | 
			
		||||
        }
 | 
			
		||||
        #undef MAXNOPS
 | 
			
		||||
      }
 | 
			
		||||
      else if ((x >>= 2))
 | 
			
		||||
        __delay_4cycles(x);
 | 
			
		||||
    }
 | 
			
		||||
    #undef nop
 | 
			
		||||
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
#elif defined(__AVR__)
 | 
			
		||||
 | 
			
		||||
@ -144,5 +169,3 @@
 | 
			
		||||
 | 
			
		||||
// Delay in microseconds
 | 
			
		||||
#define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) )
 | 
			
		||||
 | 
			
		||||
#endif // MARLIN_DELAY_H
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user