Arduino 1K Bootloader

The bootloader is based originally on the Diecimilla bootloader that shipped with Arduino 13 (although 13 wasn't out yet), then gutted and turned inside out until it was small enough.

Switching to C

The original bootloader contains a gnarly block of assembly code beginning with while(bit_is_set(EECR,EEPE)); asm volatile( around line 540, this continues on for another 100 lines.

DCB found a nicely written avrlibc reference bootloader and swapped out large swaths of assembler for C. Which looked nicer, but didn't garner enough size savings. A number of individuals hacked and snipped at the bootloader, but it was still too large.

Hitting the compiler turbo button

S. Abeyasinghe points out that by using some compile options, they can shrink a stripped down bootloader from 1090 bytes to 938 bytes!

avr-gcc -c -g -Os -mmcu=atmega168 -DF_CPU=16000000L -funsigned-char -fno-split-wide-types -fno-inline-small-functions -mcall-prologues -ffunction-sections -fdata-sections -Wall "-DMAX_TIME_COUNT=F_CPU>>4" -DNUM_LED_FLASHES=1 -o ATmegaBOOT_168.o ATmegaBOOT_168.c
avr-gcc -Os -mmcu=atmega168 -Wl,--relax -Wl,-gc-sections -Wl,--section-start=.text=0x3800 -nostartfiles -o ATmegaBOOT_168_diecimila.elf ATmegaBOOT_168.o
avr-objcopy -j .text -j .data -O ihex ATmegaBOOT_168_diecimila.hex ATmegaBOOT_168_diecimila.elf

At this point we have a functioning minimalist bootloader, but without any creature comforts like status LEDs.

Making sensible trade-offs

Now we have a very minimal STK500 bootloader, but it's missing most of the user-friendly features, has no makefile, and is impossible to diff against the current official bootloader. Thankfully there are a few bytes of breathing room. A number of software hackers and members of the Arduino team revise the bootloader until it's features were back up to par. Bugs were added, bugs were found, bugs were squashed.

The resulting diff is below or you can grab the full code.

--- C:/arduino-0013/hardware/bootloaders/atmega168/ATmegaBOOT_168.c      Fri Feb 06 13:56:00 2009
+++ C:/Documents and Settings/spiffed.AIU/Desktop/Arduino-loader-168-328/ATmegaBOOT_168.c   Mon Feb 09 10:59:41 2009
@@ -6,6 +6,11 @@
 /*                                                        */
 /* ATmegaBOOT.c                                           */
 /*                                                        */
+/* 20090201: hacked for 1k bootloader for ATmega168/328   */
+/*           swapped bulk of assembler for examples at:   */
+/* http://www.stderr.org/doc/avr-libc/avr-libc-user-manual/group__avr__boot.html   */
+/*           by D. Brink, dcb AT opengauge.org            */
+/*  with updated compiler options by S. Abeyasinghe spoink AT gmail.com */
 /* 20070626: hacked for Arduino Diecimila (which auto-    */
 /*           resets when a USB connection is made to it)  */
 /*           by D. Mellis                                 */
@@ -66,10 +71,11 @@
 #include <avr/interrupt.h>
 #include <avr/wdt.h>
 #include <util/delay.h>
+#include <avr/boot.h>
 
 /* the current avr-libc eeprom functions do not support the ATmega168 */
 /* own eeprom write/read functions are used instead */
-#if !defined(__AVR_ATmega168__) || !defined(__AVR_ATmega328P__)
+#if !defined(__AVR_ATmega168__) && !defined(__AVR_ATmega328P__)
 #include <avr/eeprom.h>
 #endif
 
@@ -213,15 +219,27 @@
 #endif
 
 
+/* define null response */
+#define NULL_RESPONSE 0xFA
+
+
+// ladyada mods - start after programming done & on power cycle!
+#if defined(__AVR_ATmega168__) || defined(__AVR_ATmega328P__)
+#define QUICKSTART_MODS 1
+#endif
+
+
 /* function prototypes */
 void putch(char);
 char getch(void);
-void getNch(uint8_t);
+static inline void getNch(uint8_t); // static inline saves a few bytes
 void byte_response(uint8_t);
-void nothing_response(void);
+static inline void nothing_response(void);
 char gethex(void);
 void puthex(char);
 void flash_led(uint8_t);
+static inline void boot_program_page(uint32_t, uint8_t*);
+int main (void) __attribute__ ((naked,section (".init9")));
 
 /* some variables */
 union address_union {
@@ -234,9 +252,9 @@
        uint8_t  byte[2];
 } length;
 
-struct flags_struct {
-     unsigned eeprom : 1;
-     unsigned rampz  : 1;
+struct flags_struct { // changed from a packed struct to save some bytes
+     uint8_t eeprom;
+     uint8_t rampz;
 } flags;
 
 uint8_t buff[256];
@@ -248,28 +266,55 @@
 uint8_t bootuart = 0;
 
 uint8_t error_count = 0;
+uint8_t firstchar = 0;
+unsigned int pagenumber = 0;
 
 void (*app_start)(void) = 0x0000;
 
 
+void boot_program_page (uint32_t page, uint8_t *buf)
+{
+     uint16_t i;
+
+     eeprom_busy_wait ();
+
+     boot_page_erase (page);
+     boot_spm_busy_wait ();      // Wait until the memory is erased.
+
+     for (i=0; i<SPM_PAGESIZE; i+=2)
+     {
+             // Set up little-endian word.
+
+             uint16_t w = *buf++;
+             w += (*buf++) << 8;
+
+             boot_page_fill (page + i, w);
+     }
+
+     boot_page_write (page);     // Store buffer in flash page.
+     boot_spm_busy_wait();       // Wait until the memory is written.
+
+     // Reenable RWW-section again. We need this if we want to jump back
+     // to the application after bootloading.
+
+     boot_rww_enable ();
+}
+
+
 /* main program starts here */
 int main(void)
 {
+     asm volatile ( "clr __zero_reg__" );
+     SP=RAMEND;
+
        uint8_t ch,ch2;
        uint16_t w;
 
-#ifdef WATCHDOG_MODS
-     ch = MCUSR;
-     MCUSR = 0;
-
+#ifdef QUICKSTART_MODS
+     //ch = MCUSR;
+     //MCUSR = 0;
        WDTCSR |= _BV(WDCE) | _BV(WDE);
        WDTCSR = 0;
-
-     // Check if the WDT was used to reset, in which case we dont bootload and skip straight to the code. woot.
-     if (! (ch &  _BV(EXTRF))) // if its a not an external reset...
-             app_start();  // skip bootloader
-#else
-     asm volatile("nop\n\t");
 #endif
 
        /* set pin direction for bootloader pin and enable pullup */
@@ -346,14 +391,14 @@
        UCSRB = _BV(TXEN)|_BV(RXEN);        
 #elif defined(__AVR_ATmega168__) || defined(__AVR_ATmega328P__)
        UBRR0L = (uint8_t)(F_CPU/(BAUD_RATE*16L)-1);
-     UBRR0H = (F_CPU/(BAUD_RATE*16L)-1) >> 8;
+     //UBRR0H = (F_CPU/(BAUD_RATE*16L)-1) >> 8;     // always 0 for 19200 baud or higher
        UCSR0B = (1<<RXEN0) | (1<<TXEN0);
        UCSR0C = (1<<UCSZ00) | (1<<UCSZ01);
 
        /* Enable internal pull-up resistor on pin D0 (RX), in order
        to supress line noise that prevents the bootloader from
        timing out (DAM: 20070509) */
-     DDRD &= ~_BV(PIND0);
+     //DDRD &= ~_BV(PIND0);        // already 0 by default
        PORTD |= _BV(PIND0);
 #elif defined __AVR_ATmega8__
        /* m8 */
@@ -379,7 +424,7 @@
        // 4x for UART0, 5x for UART1
        flash_led(NUM_LED_FLASHES + bootuart);
 #else
-     flash_led(NUM_LED_FLASHES);
+     flash_led(NUM_LED_FLASHES * 2);
 #endif
 
        /* 20050803: by DojoCorp, this is one of the parts provoking the
@@ -397,7 +442,10 @@
 
        /* Hello is anyone home ? */ 
        if(ch=='0') {
+             firstchar = 1;       // we got an appropriate bootloader instr.
                nothing_response();
+     } else if (firstchar == 0) {
+             app_start();
        }
 
 
@@ -465,10 +513,10 @@
        /* Leave programming mode  */
        else if(ch=='Q') {
                nothing_response();
-#ifdef WATCHDOG_MODS
-             // autoreset via watchdog (sneaky!)
-             WDTCSR = _BV(WDE);
-             while (1); // 16 ms
+#ifdef QUICKSTART_MODS
+             flash_led(2);
+             // start immediately -ada
+             app_start();
 #endif
        }
 
@@ -529,119 +577,8 @@
                                }                 
                        }
                        else {                                            //Write to FLASH one page at a time
-                             if (address.byte[1]>127) address_high = 0x01;  //Only possible with m128, m256 will need 3rd address byte. FIXME
-                             else address_high = 0x00;
-#ifdef __AVR_ATmega128__
-                             RAMPZ = address_high;
-#endif
-                             address.word = address.word << 1;                //address * 2 -> byte location
-                             /* if ((length.byte[0] & 0x01) == 0x01) length.word++;        //Even up an odd number of bytes */
-                             if ((length.byte[0] & 0x01)) length.word++;   //Even up an odd number of bytes
-                             cli();                                        //Disable interrupts, just to be sure
-                             // HACKME: EEPE used to be EEWE
-                             while(bit_is_set(EECR,EEPE));                       //Wait for previous EEPROM writes to complete
-                             asm volatile(
-                                      "clr      r17             \n\t"      //page_word_count
-                                      "lds      r30,address     \n\t"      //Address of FLASH location (in bytes)
-                                      "lds      r31,address+1  \n\t"
-                                      "ldi      r28,lo8(buff) \n\t"      //Start of buffer array in RAM
-                                      "ldi      r29,hi8(buff) \n\t"
-                                      "lds      r24,length      \n\t"      //Length of data to be written (in bytes)
-                                      "lds      r25,length+1   \n\t"
-                                      "length_loop:             \n\t"      //Main loop, repeat for number of words in block                                                                                                                 
-                                      "cpi      r17,0x00        \n\t"      //If page_word_count=0 then erase page
-                                      "brne     no_page_erase   \n\t"                                               
-                                      "wait_spm1:               \n\t"
-                                      "lds      r16,%0          \n\t"      //Wait for previous spm to complete
-                                      "andi     r16,1           \n\t"
-                                      "cpi      r16,1           \n\t"
-                                      "breq     wait_spm1       \n\t"
-                                      "ldi      r16,0x03        \n\t"      //Erase page pointed to by Z
-                                      "sts      %0,r16          \n\t"
-                                      "spm                      \n\t"                                                       
-#ifdef __AVR_ATmega163__
-                                      ".word 0xFFFF             \n\t"
-                                      "nop                      \n\t"
-#endif
-                                      "wait_spm2:               \n\t"
-                                      "lds      r16,%0          \n\t"      //Wait for previous spm to complete
-                                      "andi     r16,1           \n\t"
-                                      "cpi      r16,1           \n\t"
-                                      "breq     wait_spm2       \n\t"                                                                       
-
-                                      "ldi      r16,0x11        \n\t"      //Re-enable RWW section
-                                      "sts      %0,r16          \n\t"                                                                       
-                                      "spm                      \n\t"
-#ifdef __AVR_ATmega163__
-                                      ".word 0xFFFF             \n\t"
-                                      "nop                      \n\t"
-#endif
-                                      "no_page_erase:           \n\t"                                                       
-                                      "ld       r0,Y+           \n\t"      //Write 2 bytes into page buffer
-                                      "ld       r1,Y+           \n\t"                                                       
-                                                              
-                                      "wait_spm3:               \n\t"
-                                      "lds      r16,%0          \n\t"      //Wait for previous spm to complete
-                                      "andi     r16,1           \n\t"
-                                      "cpi      r16,1           \n\t"
-                                      "breq     wait_spm3       \n\t"
-                                      "ldi      r16,0x01        \n\t"      //Load r0,r1 into FLASH page buffer
-                                      "sts      %0,r16          \n\t"
-                                      "spm                      \n\t"
-                                                              
-                                      "inc      r17             \n\t"      //page_word_count++
-                                      "cpi r17,%1               \n\t"
-                                      "brlo     same_page       \n\t"      //Still same page in FLASH
-                                      "write_page:              \n\t"
-                                      "clr      r17             \n\t"      //New page, write current one first
-                                      "wait_spm4:               \n\t"
-                                      "lds      r16,%0          \n\t"      //Wait for previous spm to complete
-                                      "andi     r16,1           \n\t"
-                                      "cpi      r16,1           \n\t"
-                                      "breq     wait_spm4       \n\t"
-#ifdef __AVR_ATmega163__
-                                      "andi     r30,0x80        \n\t"      // m163 requires Z6:Z1 to be zero during page write
-#endif                                                                                                                
-                                      "ldi      r16,0x05        \n\t"      //Write page pointed to by Z
-                                      "sts      %0,r16          \n\t"
-                                      "spm                      \n\t"
-#ifdef __AVR_ATmega163__
-                                      ".word 0xFFFF             \n\t"
-                                      "nop                      \n\t"
-                                      "ori      r30,0x7E        \n\t"      // recover Z6:Z1 state after page write (had to be zero during write)
-#endif
-                                      "wait_spm5:               \n\t"
-                                      "lds      r16,%0          \n\t"      //Wait for previous spm to complete
-                                      "andi     r16,1           \n\t"
-                                      "cpi      r16,1           \n\t"
-                                      "breq     wait_spm5       \n\t"                                                                       
-                                      "ldi      r16,0x11        \n\t"      //Re-enable RWW section
-                                      "sts      %0,r16          \n\t"                                                                       
-                                      "spm                      \n\t"                                                       
-#ifdef __AVR_ATmega163__
-                                      ".word 0xFFFF             \n\t"
-                                      "nop                      \n\t"
-#endif
-                                      "same_page:               \n\t"                                                       
-                                      "adiw     r30,2          \n\t"      //Next word in FLASH
-                                      "sbiw     r24,2          \n\t"      //length-2
-                                      "breq     final_write     \n\t"      //Finished
-                                      "rjmp     length_loop     \n\t"
-                                      "final_write:             \n\t"
-                                      "cpi      r17,0           \n\t"
-                                      "breq     block_done      \n\t"
-                                      "adiw     r24,2          \n\t"      //length+2, fool above check on length after short page write
-                                      "rjmp     write_page      \n\t"
-                                      "block_done:              \n\t"
-                                      "clr      __zero_reg__    \n\t"      //restore zero register
-#if defined(__AVR_ATmega168__)  || defined(__AVR_ATmega328P__)
-                                      : "=m" (SPMCSR) : "M" (PAGE_SIZE) : "r0","r16","r17","r24","r25","r28","r29","r30","r31"
-#else
-                                      : "=m" (SPMCR) : "M" (PAGE_SIZE) : "r0","r16","r17","r24","r25","r28","r29","r30","r31"
-#endif
-                                      );
-                             /* Should really add a wait for RWW section to be enabled, don't actually need it since we never */
-                             /* exit the bootloader without a power cycle anyhow */
+                             boot_program_page(pagenumber,(uint8_t *)&buff);
+                             pagenumber+=SPM_PAGESIZE;
                        }
                        putch(0x14);
                        putch(0x10);
@@ -661,8 +598,8 @@
                else flags.rampz = 0;
 #endif
                address.word = address.word << 1;                // address * 2 -> byte location
+             flags.eeprom = 0;
                if (getch() == 'E') flags.eeprom = 1;
-             else flags.eeprom = 0;
                if (getch() == ' ') {                         // Command terminator
                        putch(0x14);
                        for (w=0;w < length.word;w++) {                      // Can handle odd and even lengths okay
@@ -679,10 +616,12 @@
                                }
                                else {
 
-                                     if (!flags.rampz) putch(pgm_read_byte_near(address.word));
 #if defined __AVR_ATmega128__
+                                     if (!flags.rampz) putch(pgm_read_byte_near(address.word));
                                        else putch(pgm_read_byte_far(address.word + 0x10000));
                                        // Hmmmm, yuck  FIXME when m256 arrvies
+#else
+                                     putch(pgm_read_byte_near(address.word));
 #endif
                                        address.word++;
                                }
@@ -905,16 +844,23 @@
        }
        return 0;
 #elif defined(__AVR_ATmega168__)  || defined(__AVR_ATmega328P__)
-     uint32_t count = 0;
+     uint16_t count = 0;
+     
+     LED_PORT &= ~_BV(LED);
+     
        while(!(UCSR0A & _BV(RXC0))){
                /* 20060803 DojoCorp:: Addon coming from the previous Bootloader*/               
                /* HACKME:: here is a good place to count times*/
                count++;
-             if (count > MAX_TIME_COUNT)
+             _delay_us(384);
+             if (count > (MAX_TIME_COUNT>>8))
                        app_start();
        }
+     
+     LED_PORT |= _BV(LED);
+     
        return UDR0;
-#else
+#else 
        /* m8,16,32,169,8515,8535,163 */
        uint32_t count = 0;
        while(!(UCSRA & _BV(RXC))){
@@ -958,7 +904,8 @@
 {
        if (getch() == ' ') {
                putch(0x14);
-             putch(val);
+             if (val != NULL_RESPONSE)
+                     putch(val);
                putch(0x10);
        } else {
                if (++error_count == MAX_ERROR_COUNT)
@@ -969,21 +916,14 @@
 
 void nothing_response(void)
 {
-     if (getch() == ' ') {
-             putch(0x14);
-             putch(0x10);
-     } else {
-             if (++error_count == MAX_ERROR_COUNT)
-                     app_start();
-     }
+     byte_response(NULL_RESPONSE);
 }
 
+
 void flash_led(uint8_t count)
 {
        while (count--) {
-             LED_PORT |= _BV(LED);
-             _delay_ms(100);
-             LED_PORT &= ~_BV(LED);
+             LED_PORT ^= _BV(LED);       // toggle!
                _delay_ms(100);
        }
 }