// 18 channels of R/C-style PWM input (nominal 1ms to 2ms positive pulses at nominally 50Hz to 100Hz)
// inputs are on D2-D13, A0-A5,
// outputs inverted SBUS on TX1.  Note: many DIY-MORE strong boards have 0 and 1 labelled wrongly (swapped) on the silk screen
// ceptimus April 2022

struct edgeEvent {
  uint16_t tcnt; // a snapshot of TCNT1 when the interrupt occurred
  uint8_t pins; // a snapshot of the 8 inputs when the interrupt occurred
  uint8_t id; // 0, 1, or 2 to identify which pinchange interrupt added the event
};

edgeEvent q[64];  // a FIFO of edge events.  64 of them, so that the index used by the asm interrupt routines to write to the queue can be simple byte (64 * 4-byte event = 256 bytes)
volatile uint8_t qInput = 0;  // the index used by the asm interrupt routines.  Each interrupt increases its value by 4
uint8_t qOutput = 0; // the index used by the (non-interrupt) code which reads events from the queue

uint16_t startEdge[18]; // timeStamps (in half-microsecond units) for leading edges of channel pulses
uint16_t pulseWidth[18]; // latest received pulseWidth for each channel in half microsecond units

// SBUS format is 25-byte frames at 100kBaud 8E2, a frame with no gaps takes exactly 3ms: ((8 bits + 1 start + 1 parity + 2 stop) * 25) = 300
uint8_t sbusFrame[25];
uint8_t serialBuffer[25]; // copy of sbusFrame for serial output.  Must be a copy to prevent glitches in data when PWM pulses terminate during transmit

void setup() {
  // using 16-bit Timer/Counter1 to provide 16-bit timestamps with half-microsecond resolution.  counts from 0x0000 to 0xFFFF and wraps around every 32768 microseconds 
  TCCR1A = 0x00;
  TCCR1B = 0x02;
  
  DDRB   &= 0b11000000; // port B (pins D8 - D13) inputs
  PORTB  |= 0b00111111; // input pullup to avoid phantom edges from unused PWM inputs
  PCMSK0 |= 0b00111111; // allow pinchange interrupts for pins D8 - D13 (handled by PCINT0)
  DDRC   &= 0b11000000; // port C (pins A0 - A5) inputs
  PORTC  |= 0b00111111; // input pullup to avoid phantom edges from unused PWM inputs
  PCMSK1 |= 0b00111111; // allow pinchange interrupts for pins A0 - A5 (handled by PCINT1)
  DDRD   &= 0b00000011; // port D (pins D2 - D7) inputs
  PORTD  |= 0b11111100; // input pullup to avoid phantom edges from unused PWM inputs
  PCMSK2 |= 0b11111100; // allow pinchange interrupts for pins D2 - D7 (handled by PCINT2)
  PCICR  |= 0b00000111; // enable pinchange interrupts PCINT0, PCINT1, PCINT2

  delay(50); // settle time for any start-up interrupts to fire before initializing defaults

  // 1st byte of an SBUS frame is 0x0F, last byte of frame is 0x00.  bytes 2 to 23 hold 16 channels of 11 bits each, packed together.  byte 24 holds digital channels 17 and 18 plus a couple of other flags
  sbusFrame[0] = 0x0F;
  sbusFrame[23] = sbusFrame[24] = 0x00;
  
  // all 18 PWM channels may not be connected: default all channels to nominal 'centre'
  for (uint8_t channel = 0; channel < 18; channel++) {
    pulseWidth[channel] = 3000; // SBUS values are 11-bit 0 to 2047, corresponding to half-microsecond pulse widths 1976 to 4023 (988us to 2011us)
    encodeSBUS(channel, pulseWidth[channel]);
  }
  qInput = 0;
  
  // serial port setup for 100000 baud, 8E2. not using Serial.begin() to avoid enabling Serial interrupts which would make PWM edge detection slightly more jittery
#define BAUD 100000
#define UBRR_DOUBLESPEED (F_CPU/8/BAUD-1)
  UBRR0H = (uint8_t)UBRR_DOUBLESPEED >> 8;
  UBRR0L = (uint8_t)UBRR_DOUBLESPEED;
  UCSR0A = (1<<U2X0); // double USART transmission speed
  UCSR0B = (1<<TXEN0); // enable transmit
  UCSR0C = (2<<UPM00)|(1<<USBS0)|(3<<UCSZ00); // even parity, 2 stop bits, 8 data bits

  // not using millis() so timer0 can be disabled to remove another potential source of jitter on the PWM signals
  TIMSK0 &= ~_BV(TOIE0); // disable timer0 overflow interrupt
}

void loop() {
  static uint16_t previousFrameTimer = 0; // remembers TCNT1 value (half-microsecond timestamp) when previous SBUS frame was transmitted
  static uint8_t serialBufferIndex = 0; // counts 0 to 24 while transmitting an SBUS frame
  static uint8_t prevB = 0, prevC = 0, prevD = 0; // state of pins for ports B, C, D on previous event for that port
  while (qOutput != qInput) { // process any events placed in the queue by the interrupt handlers
    edgeEvent event = q[qOutput >> 2]; // divide by 4 to convert a byte offset to an edgeEvent index
    uint8_t changed; // which bit(s) changed since previous event on same port
    uint8_t mask; // bitMask to scan changed bit(s)
    uint8_t channel;
    switch (event.id) {
    case 0:  // port B: pins D8 - D13
      changed = prevB ^ event.pins;
      prevB = event.pins;
      mask = 0x01;
      for (channel = 6; channel < 12; channel++) {
        if (changed & mask) { // pin [channel - 6] in this group of 6 changed state
          if (prevB & mask) { // pin went high - remember start time
            startEdge[channel] = event.tcnt;
          } else { // pin went low, so store pulseWidth since it went high
            pulseWidth[channel] = event.tcnt - startEdge[channel];
            encodeSBUS(channel, pulseWidth[channel]);
          }
        }
        mask <<= 1;
      }
      break;
    case 1:  // port C: pins A0 - A5
      changed = prevC ^ event.pins;
      prevC = event.pins;
      mask = 0x01;
      for (channel = 12; channel < 18; channel++) {
        if (changed & mask) { // pin [channel - 12] in this group of 6 changed state
          if (prevC & mask) { // pin went high - remember start time
            startEdge[channel] = event.tcnt;
          } else { // pin went low, so store pulseWidth since it went high
            pulseWidth[channel] = event.tcnt - startEdge[channel];
            encodeSBUS(channel, pulseWidth[channel]);
          }
        }
        mask <<= 1;
      }
      break;
    default: // port D: pins D2 - D7
      changed = prevD ^ event.pins;
      prevD = event.pins;
      mask = 0x04;
      for (channel = 0; channel < 6; channel++) {
        if (changed & mask) { // pin [channel] in this group of 6 changed state
          if (prevD & mask) { // pin went high - remember start time
            startEdge[channel] = event.tcnt;
          } else { // pin went low, so store pulseWidth since it went high
            pulseWidth[channel] = event.tcnt - startEdge[channel];
            encodeSBUS(channel, pulseWidth[channel]);
          }
        }
        mask <<= 1;
      }
      break;
    }
    qOutput += 4;
  }

  if (UCSR0A & (1<<UDRE0)) { // serial transmit data register empty - a byte can be written to the serial port
    if (serialBufferIndex < 25) { // there are bytes still to transmit in the current frame
      UDR0 = serialBuffer[serialBufferIndex]; // write byte to transmit data buffer register
      serialBufferIndex++;
    } else { // previous frame transmission complete.  is it time to send a new frame?
      // send the latest SBUS data when at least 12200us have elapsed since previous send
      noInterrupts();
      uint16_t currentTimer = TCNT1; // capture 16-bit timer value without allowing an interrupt to spoil 16-bit atomic read
      interrupts();
      uint16_t elapsed = currentTimer - previousFrameTimer;
      if (elapsed >= 24400) {
        for (uint8_t i = 0; i < 25; i++) {
          serialBuffer[i] = sbusFrame[i];
        }
        serialBufferIndex = 0;
        previousFrameTimer = currentTimer;
      }
    }
  }
}

void encodeSBUS(uint8_t channel, uint16_t pulseWidth) { // channel 0 - 17, pulseWidth 1976 to 4023 in half-microseconds
  uint16_t sbus;
  if (pulseWidth <= 1976) {
    sbus = 0;;
  } else if (pulseWidth >= 4023) {
    sbus = 2047;
  } else {
    sbus = pulseWidth - 1976;
  }
  if (channel == 16) { // 1st of 2 'switch' or 'digital' channels
    if (sbus >= 1024) {
      sbusFrame[23] |= 0x01;
    } else {
      sbusFrame[23] &= ~0x01;
    }
  } else if (channel == 17) { // 2nd of 2 switch channels
    if (sbus >= 1024) {
      sbusFrame[23] |= 0x02;
    } else {
      sbusFrame[23] &= ~0x02;
    }    
  } else { // 'normal' servo channel: each channel is 11 bits but they are closely packed together.  
    uint8_t k = channel > 7 ? 12 : 1; // offset into sbusFrame array: 1 for channels 0-7, 12 for channels 8-15
    switch (channel &7 ) { // pattern repeats (except for k-offset) after 8 channels
      case 0:
        sbusFrame[0+k] = (uint8_t)(sbus & 0x00FF);
        sbusFrame[1+k] = (sbusFrame[1+k] & 0xF8) | (uint8_t)(sbus >> 8);
        break;
      case 1:
        sbusFrame[1+k] = (sbusFrame[1+k] & 0x07) | (uint8_t)(sbus << 3);
        sbusFrame[2+k] = (sbusFrame[1+k] & 0xC0) | (uint8_t)(sbus >> 5);
        break;
      case 2:
        sbusFrame[2+k] = (sbusFrame[2+k] & 0x3F) | (uint8_t)(sbus << 6);
        sbusFrame[3+k] = (uint8_t)(sbus >> 2);
        sbusFrame[4+k] = (sbusFrame[4+k] & 0xFE) | (uint8_t)(sbus >> 10);
        break;
      case 3:
        sbusFrame[4+k] = (sbusFrame[4+k] & 0x01) | (uint8_t)(sbus << 1);
        sbusFrame[5+k] = (sbusFrame[5+k] & 0xF0) | (uint8_t)(sbus >> 7);
        break;
      case 4:
        sbusFrame[5+k] = (sbusFrame[5+k] & 0x0F) | (uint8_t)(sbus << 4);
        sbusFrame[6+k] = (sbusFrame[6+k] & 0x80) | (uint8_t)(sbus >> 4);
        break;
      case 5:
        sbusFrame[6+k] = (sbusFrame[6+k] & 0x7F) | (uint8_t)(sbus << 7);
        sbusFrame[7+k] = (uint8_t)(sbus >> 1);
        sbusFrame[8+k] = (sbusFrame[8+k] & 0xFC) | (uint8_t)(sbus >> 9);
        break;
      case 6:
        sbusFrame[8+k] = (sbusFrame[8+k] & 0x03) | (uint8_t)(sbus << 2);
        sbusFrame[9+k] = (sbusFrame[9+k] & 0xE0) | (uint8_t)(sbus >> 6);
        break;
      case 7:
        sbusFrame[9+k] = (sbusFrame[9+k] & 0x1F) | (uint8_t)(sbus << 5);
        sbusFrame[10+k] = (uint8_t)(sbus >> 3);
        break;      
    }
  }
}

ISR(PCINT0_vect, ISR_NAKED) { // pin change interrupt for Port B: pins D8 - D13
  asm (
    "push r31           \n" // preserve Z
    "push r30           \n"
    "push r25           \n" // preserve R25
    "push r24           \n" // preserve R24
    "in   r24, __SREG__ \n"
    "push r24           \n" // preserve SREG // 11 cycles of asm to here

    "ldi r30, lo8(q)    \n" // Z points to the start of the queue
    "ldi r31, hi8(q)    \n"
    "lds r24,(qInput)   \n"
    "ldi r25, 0         \n"
    "add r30, r24       \n" // Z now points to event within the queue
    "adc r31, r25       \n"
    "ldi r25, 4         \n"
    "add r24, r25       \n" // bump qInput ready for next event
    "sts (qInput), r24  \n"
    "lds r24, 0x84      \n" // TCNT1 lo
    "lds r25, 0x85      \n" // TCNT1 hi
    "st  Z+, r24        \n"
    "st  Z+, r25        \n" // write the 16-bit TCNT1 timestamp to the event
    "in  r24, 0x03      \n" // read PINB
    "st  Z+, r24        \n" // write snapshot of pins to event
    "ldi r24, 0         \n" // id for Port B events
    "st  Z+, r24        \n" // write id to event  // + 28 cycles of asm to here
    
    "pop  r24           \n"
    "out  __SREG__, r24 \n" // restore SREG
    "pop  r24           \n" // restore R24
    "pop  r25           \n" // restore R25
    "pop  r30           \n" // restore Z
    "pop  r31           \n"
    "reti               \n" // + 15 cycles of asm to here.  Total interrupt cycles 54 + interrupt call overhead (7 cycles?) - so about 61 cycles total or just under 4us with 16MHz clock 
  );
}

ISR(PCINT1_vect, ISR_NAKED) { // pin change interrupt for Port C: pins A0 - A5
  asm (
    "push r31           \n" // preserve Z
    "push r30           \n"
    "push r25           \n" // preserve R25
    "push r24           \n" // preserve R24
    "in   r24, __SREG__ \n"
    "push r24           \n" // preserve SREG // 11 cycles of asm to here

    "ldi r30, lo8(q)    \n" // Z points to the start of the queue
    "ldi r31, hi8(q)    \n"
    "lds r24,(qInput)   \n"
    "ldi r25, 0         \n"
    "add r30, r24       \n" // Z now points to event within the queue
    "adc r31, r25       \n"
    "ldi r25, 4         \n"
    "add r24, r25       \n" // bump qInput ready for next event
    "sts (qInput), r24  \n"
    "lds r24, 0x84      \n" // TCNT1 lo
    "lds r25, 0x85      \n" // TCNT1 hi
    "st  Z+, r24        \n"
    "st  Z+, r25        \n" // write the 16-bit TCNT1 timestamp to the event
    "in  r24, 0x06      \n" // read PINC
    "st  Z+, r24        \n" // write snapshot of pins to event
    "ldi r24, 1         \n" // id for Port C events
    "st  Z+, r24        \n" // write id to event  // + 28 cycles of asm to here
    
    "pop  r24           \n"
    "out  __SREG__, r24 \n" // restore SREG
    "pop  r24           \n" // restore R24
    "pop  r25           \n" // restore R25
    "pop  r30           \n" // restore Z
    "pop  r31           \n"
    "reti               \n" // + 15 cycles of asm to here.  Total interrupt cycles 54 + interrupt call overhead (7 cycles?) - so about 61 cycles total or just under 4us with 16MHz clock 
  );
}

ISR(PCINT2_vect, ISR_NAKED) { // pin change interrupt for Port D: pins D2 - D7
  asm (
    "push r31           \n" // preserve Z
    "push r30           \n"
    "push r25           \n" // preserve R25
    "push r24           \n" // preserve R24
    "in   r24, __SREG__ \n"
    "push r24           \n" // preserve SREG // 11 cycles of asm to here

    "ldi r30, lo8(q)    \n" // Z points to the start of the queue
    "ldi r31, hi8(q)    \n"
    "lds r24,(qInput)   \n"
    "ldi r25, 0         \n"
    "add r30, r24       \n" // Z now points to event within the queue
    "adc r31, r25       \n"
    "ldi r25, 4         \n"
    "add r24, r25       \n" // bump qInput ready for next event
    "sts (qInput), r24  \n"
    "lds r24, 0x84      \n" // TCNT1 lo
    "lds r25, 0x85      \n" // TCNT1 hi
    "st  Z+, r24        \n"
    "st  Z+, r25        \n" // write the 16-bit TCNT1 timestamp to the event
    "in  r24, 0x09      \n" // read PIND
    "st  Z+, r24        \n" // write snapshot of pins to event
    "ldi r24, 2         \n" // id for Port D events
    "st  Z+, r24        \n" // write id to event  // + 28 cycles of asm to here
    
    "pop  r24           \n"
    "out  __SREG__, r24 \n" // restore SREG
    "pop  r24           \n" // restore R24
    "pop  r25           \n" // restore R25
    "pop  r30           \n" // restore Z
    "pop  r31           \n"
    "reti               \n" // + 15 cycles of asm to here.  Total interrupt cycles 54 + interrupt call overhead (7 cycles?) - so about 61 cycles total or just under 4us with 16MHz clock 
  );
}
