Arduino 1K Bootloader

The bootloader is based originally on the Diecimilla bootloader that shipped with Arduino 13 (although 13 wasn't out yet), then gutted and turned inside out until it was small enough.

Switching to C

The original bootloader contains a gnarly block of assembly code beginning with while(bit_is_set(EECR,EEPE)); asm volatile( around line 540, this continues on for another 100 lines.

DCB found a nicely written avrlibc reference bootloader and swapped out large swaths of assembler for C. Which looked nicer, but didn't garner enough size savings. A number of individuals hacked and snipped at the bootloader, but it was still too large.

Hitting the compiler turbo button

S. Abeyasinghe points out that by using some compile options, they can shrink a stripped down bootloader from 1090 bytes to 938 bytes!

avr-gcc -c -g -Os -mmcu=atmega168 -DF_CPU=16000000L -funsigned-char -fno-split-wide-types -fno-inline-small-functions -mcall-prologues -ffunction-sections -fdata-sections -Wall "-DMAX_TIME_COUNT=F_CPU>>4" -DNUM_LED_FLASHES=1 -o ATmegaBOOT_168.o ATmegaBOOT_168.c
avr-gcc -Os -mmcu=atmega168 -Wl,--relax -Wl,-gc-sections -Wl,--section-start=.text=0x3800 -nostartfiles -o ATmegaBOOT_168_diecimila.elf ATmegaBOOT_168.o
avr-objcopy -j .text -j .data -O ihex ATmegaBOOT_168_diecimila.hex ATmegaBOOT_168_diecimila.elf

At this point we have a functioning minimalist bootloader, but without any creature comforts like status LEDs.

Making sensible trade-offs

Now we have a very minimal STK500 bootloader, but it's missing most of the user-friendly features, has no makefile, and is impossible to diff against the current official bootloader. Thankfully there are a few bytes of breathing room. A number of software hackers and members of the Arduino team revise the bootloader until it's features were back up to par. Bugs were added, bugs were found, bugs were squashed.

The resulting diff is below or you can grab the full code.

--- C:/arduino-0013/hardware/bootloaders/atmega168/ATmegaBOOT_168.c	Fri Feb 06 13:56:00 2009
+++ C:/Documents and Settings/spiffed.AIU/Desktop/Arduino-loader-168-328/ATmegaBOOT_168.c	Mon Feb 09 10:59:41 2009
@@ -6,6 +6,11 @@
 /*                                                        */
 /* ATmegaBOOT.c                                           */
 /*                                                        */
+/* 20090201: hacked for 1k bootloader for ATmega168/328   */
+/*           swapped bulk of assembler for examples at:   */
+/* http://www.stderr.org/doc/avr-libc/avr-libc-user-manual/group__avr__boot.html   */
+/*           by D. Brink, dcb AT opengauge.org            */
+/*  with updated compiler options by S. Abeyasinghe spoink AT gmail.com */
 /* 20070626: hacked for Arduino Diecimila (which auto-    */
 /*           resets when a USB connection is made to it)  */
 /*           by D. Mellis                                 */
@@ -66,10 +71,11 @@
 #include <avr/interrupt.h>
 #include <avr/wdt.h>
 #include <util/delay.h>
+#include <avr/boot.h>
 
 /* the current avr-libc eeprom functions do not support the ATmega168 */
 /* own eeprom write/read functions are used instead */
-#if !defined(__AVR_ATmega168__) || !defined(__AVR_ATmega328P__)
+#if !defined(__AVR_ATmega168__) && !defined(__AVR_ATmega328P__)
 #include <avr/eeprom.h>
 #endif
 
@@ -213,15 +219,27 @@
 #endif
 
 
+/* define null response */
+#define NULL_RESPONSE 0xFA
+
+
+// ladyada mods - start after programming done & on power cycle!
+#if defined(__AVR_ATmega168__) || defined(__AVR_ATmega328P__)
+#define QUICKSTART_MODS 1
+#endif
+
+
 /* function prototypes */
 void putch(char);
 char getch(void);
-void getNch(uint8_t);
+static inline void getNch(uint8_t); // static inline saves a few bytes
 void byte_response(uint8_t);
-void nothing_response(void);
+static inline void nothing_response(void);
 char gethex(void);
 void puthex(char);
 void flash_led(uint8_t);
+static inline void boot_program_page(uint32_t, uint8_t*);
+int main (void) __attribute__ ((naked,section (".init9")));
 
 /* some variables */
 union address_union {
@@ -234,9 +252,9 @@
 	uint8_t  byte[2];
 } length;
 
-struct flags_struct {
-	unsigned eeprom : 1;
-	unsigned rampz  : 1;
+struct flags_struct { // changed from a packed struct to save some bytes
+	uint8_t eeprom;
+	uint8_t rampz;
 } flags;
 
 uint8_t buff[256];
@@ -248,28 +266,55 @@
 uint8_t bootuart = 0;
 
 uint8_t error_count = 0;
+uint8_t firstchar = 0;
+unsigned int pagenumber = 0;
 
 void (*app_start)(void) = 0x0000;
 
 
+void boot_program_page (uint32_t page, uint8_t *buf)
+{
+	uint16_t i;
+
+	eeprom_busy_wait ();
+
+	boot_page_erase (page);
+	boot_spm_busy_wait ();      // Wait until the memory is erased.
+
+	for (i=0; i<SPM_PAGESIZE; i+=2)
+	{
+		// Set up little-endian word.
+
+		uint16_t w = *buf++;
+		w += (*buf++) << 8;
+
+		boot_page_fill (page + i, w);
+	}
+
+	boot_page_write (page);     // Store buffer in flash page.
+	boot_spm_busy_wait();       // Wait until the memory is written.
+
+	// Reenable RWW-section again. We need this if we want to jump back
+	// to the application after bootloading.
+
+	boot_rww_enable ();
+}
+
+
 /* main program starts here */
 int main(void)
 {
+	asm volatile ( "clr __zero_reg__" );
+	SP=RAMEND;
+
 	uint8_t ch,ch2;
 	uint16_t w;
 
-#ifdef WATCHDOG_MODS
-	ch = MCUSR;
-	MCUSR = 0;
-
+#ifdef QUICKSTART_MODS
+	//ch = MCUSR;
+	//MCUSR = 0;
 	WDTCSR |= _BV(WDCE) | _BV(WDE);
 	WDTCSR = 0;
-
-	// Check if the WDT was used to reset, in which case we dont bootload and skip straight to the code. woot.
-	if (! (ch &  _BV(EXTRF))) // if its a not an external reset...
-		app_start();  // skip bootloader
-#else
-	asm volatile("nop\n\t");
 #endif
 
 	/* set pin direction for bootloader pin and enable pullup */
@@ -346,14 +391,14 @@
 	UCSRB = _BV(TXEN)|_BV(RXEN);	
 #elif defined(__AVR_ATmega168__) || defined(__AVR_ATmega328P__)
 	UBRR0L = (uint8_t)(F_CPU/(BAUD_RATE*16L)-1);
-	UBRR0H = (F_CPU/(BAUD_RATE*16L)-1) >> 8;
+	//UBRR0H = (F_CPU/(BAUD_RATE*16L)-1) >> 8;     // always 0 for 19200 baud or higher
 	UCSR0B = (1<<RXEN0) | (1<<TXEN0);
 	UCSR0C = (1<<UCSZ00) | (1<<UCSZ01);
 
 	/* Enable internal pull-up resistor on pin D0 (RX), in order
 	to supress line noise that prevents the bootloader from
 	timing out (DAM: 20070509) */
-	DDRD &= ~_BV(PIND0);
+	//DDRD &= ~_BV(PIND0);        // already 0 by default
 	PORTD |= _BV(PIND0);
 #elif defined __AVR_ATmega8__
 	/* m8 */
@@ -379,7 +424,7 @@
 	// 4x for UART0, 5x for UART1
 	flash_led(NUM_LED_FLASHES + bootuart);
 #else
-	flash_led(NUM_LED_FLASHES);
+	flash_led(NUM_LED_FLASHES * 2);
 #endif
 
 	/* 20050803: by DojoCorp, this is one of the parts provoking the
@@ -397,7 +442,10 @@
 
 	/* Hello is anyone home ? */ 
 	if(ch=='0') {
+		firstchar = 1;       // we got an appropriate bootloader instr.
 		nothing_response();
+	} else if (firstchar == 0) {
+		app_start();
 	}
 
 
@@ -465,10 +513,10 @@
 	/* Leave programming mode  */
 	else if(ch=='Q') {
 		nothing_response();
-#ifdef WATCHDOG_MODS
-		// autoreset via watchdog (sneaky!)
-		WDTCSR = _BV(WDE);
-		while (1); // 16 ms
+#ifdef QUICKSTART_MODS
+		flash_led(2);
+		// start immediately -ada
+		app_start();
 #endif
 	}
 
@@ -529,119 +577,8 @@
 				}			
 			}
 			else {					        //Write to FLASH one page at a time
-				if (address.byte[1]>127) address_high = 0x01;	//Only possible with m128, m256 will need 3rd address byte. FIXME
-				else address_high = 0x00;
-#ifdef __AVR_ATmega128__
-				RAMPZ = address_high;
-#endif
-				address.word = address.word << 1;	        //address * 2 -> byte location
-				/* if ((length.byte[0] & 0x01) == 0x01) length.word++;	//Even up an odd number of bytes */
-				if ((length.byte[0] & 0x01)) length.word++;	//Even up an odd number of bytes
-				cli();					//Disable interrupts, just to be sure
-				// HACKME: EEPE used to be EEWE
-				while(bit_is_set(EECR,EEPE));			//Wait for previous EEPROM writes to complete
-				asm volatile(
-					 "clr	r17		\n\t"	//page_word_count
-					 "lds	r30,address	\n\t"	//Address of FLASH location (in bytes)
-					 "lds	r31,address+1	\n\t"
-					 "ldi	r28,lo8(buff)	\n\t"	//Start of buffer array in RAM
-					 "ldi	r29,hi8(buff)	\n\t"
-					 "lds	r24,length	\n\t"	//Length of data to be written (in bytes)
-					 "lds	r25,length+1	\n\t"
-					 "length_loop:		\n\t"	//Main loop, repeat for number of words in block							 							 
-					 "cpi	r17,0x00	\n\t"	//If page_word_count=0 then erase page
-					 "brne	no_page_erase	\n\t"						 
-					 "wait_spm1:		\n\t"
-					 "lds	r16,%0		\n\t"	//Wait for previous spm to complete
-					 "andi	r16,1           \n\t"
-					 "cpi	r16,1           \n\t"
-					 "breq	wait_spm1       \n\t"
-					 "ldi	r16,0x03	\n\t"	//Erase page pointed to by Z
-					 "sts	%0,r16		\n\t"
-					 "spm			\n\t"							 
-#ifdef __AVR_ATmega163__
-					 ".word 0xFFFF		\n\t"
-					 "nop			\n\t"
-#endif
-					 "wait_spm2:		\n\t"
-					 "lds	r16,%0		\n\t"	//Wait for previous spm to complete
-					 "andi	r16,1           \n\t"
-					 "cpi	r16,1           \n\t"
-					 "breq	wait_spm2       \n\t"									 
-
-					 "ldi	r16,0x11	\n\t"	//Re-enable RWW section
-					 "sts	%0,r16		\n\t"						 			 
-					 "spm			\n\t"
-#ifdef __AVR_ATmega163__
-					 ".word 0xFFFF		\n\t"
-					 "nop			\n\t"
-#endif
-					 "no_page_erase:		\n\t"							 
-					 "ld	r0,Y+		\n\t"	//Write 2 bytes into page buffer
-					 "ld	r1,Y+		\n\t"							 
-								 
-					 "wait_spm3:		\n\t"
-					 "lds	r16,%0		\n\t"	//Wait for previous spm to complete
-					 "andi	r16,1           \n\t"
-					 "cpi	r16,1           \n\t"
-					 "breq	wait_spm3       \n\t"
-					 "ldi	r16,0x01	\n\t"	//Load r0,r1 into FLASH page buffer
-					 "sts	%0,r16		\n\t"
-					 "spm			\n\t"
-								 
-					 "inc	r17		\n\t"	//page_word_count++
-					 "cpi r17,%1	        \n\t"
-					 "brlo	same_page	\n\t"	//Still same page in FLASH
-					 "write_page:		\n\t"
-					 "clr	r17		\n\t"	//New page, write current one first
-					 "wait_spm4:		\n\t"
-					 "lds	r16,%0		\n\t"	//Wait for previous spm to complete
-					 "andi	r16,1           \n\t"
-					 "cpi	r16,1           \n\t"
-					 "breq	wait_spm4       \n\t"
-#ifdef __AVR_ATmega163__
-					 "andi	r30,0x80	\n\t"	// m163 requires Z6:Z1 to be zero during page write
-#endif							 							 
-					 "ldi	r16,0x05	\n\t"	//Write page pointed to by Z
-					 "sts	%0,r16		\n\t"
-					 "spm			\n\t"
-#ifdef __AVR_ATmega163__
-					 ".word 0xFFFF		\n\t"
-					 "nop			\n\t"
-					 "ori	r30,0x7E	\n\t"	// recover Z6:Z1 state after page write (had to be zero during write)
-#endif
-					 "wait_spm5:		\n\t"
-					 "lds	r16,%0		\n\t"	//Wait for previous spm to complete
-					 "andi	r16,1           \n\t"
-					 "cpi	r16,1           \n\t"
-					 "breq	wait_spm5       \n\t"									 
-					 "ldi	r16,0x11	\n\t"	//Re-enable RWW section
-					 "sts	%0,r16		\n\t"						 			 
-					 "spm			\n\t"					 		 
-#ifdef __AVR_ATmega163__
-					 ".word 0xFFFF		\n\t"
-					 "nop			\n\t"
-#endif
-					 "same_page:		\n\t"							 
-					 "adiw	r30,2		\n\t"	//Next word in FLASH
-					 "sbiw	r24,2		\n\t"	//length-2
-					 "breq	final_write	\n\t"	//Finished
-					 "rjmp	length_loop	\n\t"
-					 "final_write:		\n\t"
-					 "cpi	r17,0		\n\t"
-					 "breq	block_done	\n\t"
-					 "adiw	r24,2		\n\t"	//length+2, fool above check on length after short page write
-					 "rjmp	write_page	\n\t"
-					 "block_done:		\n\t"
-					 "clr	__zero_reg__	\n\t"	//restore zero register
-#if defined(__AVR_ATmega168__)  || defined(__AVR_ATmega328P__)
-					 : "=m" (SPMCSR) : "M" (PAGE_SIZE) : "r0","r16","r17","r24","r25","r28","r29","r30","r31"
-#else
-					 : "=m" (SPMCR) : "M" (PAGE_SIZE) : "r0","r16","r17","r24","r25","r28","r29","r30","r31"
-#endif
-					 );
-				/* Should really add a wait for RWW section to be enabled, don't actually need it since we never */
-				/* exit the bootloader without a power cycle anyhow */
+				boot_program_page(pagenumber,(uint8_t *)&buff);
+				pagenumber+=SPM_PAGESIZE;
 			}
 			putch(0x14);
 			putch(0x10);
@@ -661,8 +598,8 @@
 		else flags.rampz = 0;
 #endif
 		address.word = address.word << 1;	        // address * 2 -> byte location
+		flags.eeprom = 0;
 		if (getch() == 'E') flags.eeprom = 1;
-		else flags.eeprom = 0;
 		if (getch() == ' ') {		                // Command terminator
 			putch(0x14);
 			for (w=0;w < length.word;w++) {		        // Can handle odd and even lengths okay
@@ -679,10 +616,12 @@
 				}
 				else {
 
-					if (!flags.rampz) putch(pgm_read_byte_near(address.word));
 #if defined __AVR_ATmega128__
+					if (!flags.rampz) putch(pgm_read_byte_near(address.word));
 					else putch(pgm_read_byte_far(address.word + 0x10000));
 					// Hmmmm, yuck  FIXME when m256 arrvies
+#else
+					putch(pgm_read_byte_near(address.word));
 #endif
 					address.word++;
 				}
@@ -905,16 +844,23 @@
 	}
 	return 0;
 #elif defined(__AVR_ATmega168__)  || defined(__AVR_ATmega328P__)
-	uint32_t count = 0;
+	uint16_t count = 0;
+	
+	LED_PORT &= ~_BV(LED);
+	
 	while(!(UCSR0A & _BV(RXC0))){
 		/* 20060803 DojoCorp:: Addon coming from the previous Bootloader*/               
 		/* HACKME:: here is a good place to count times*/
 		count++;
-		if (count > MAX_TIME_COUNT)
+		_delay_us(384);
+		if (count > (MAX_TIME_COUNT>>8))
 			app_start();
 	}
+	
+	LED_PORT |= _BV(LED);
+	
 	return UDR0;
-#else
+#else 
 	/* m8,16,32,169,8515,8535,163 */
 	uint32_t count = 0;
 	while(!(UCSRA & _BV(RXC))){
@@ -958,7 +904,8 @@
 {
 	if (getch() == ' ') {
 		putch(0x14);
-		putch(val);
+		if (val != NULL_RESPONSE)
+			putch(val);
 		putch(0x10);
 	} else {
 		if (++error_count == MAX_ERROR_COUNT)
@@ -969,21 +916,14 @@
 
 void nothing_response(void)
 {
-	if (getch() == ' ') {
-		putch(0x14);
-		putch(0x10);
-	} else {
-		if (++error_count == MAX_ERROR_COUNT)
-			app_start();
-	}
+	byte_response(NULL_RESPONSE);
 }
 
+
 void flash_led(uint8_t count)
 {
 	while (count--) {
-		LED_PORT |= _BV(LED);
-		_delay_ms(100);
-		LED_PORT &= ~_BV(LED);
+		LED_PORT ^= _BV(LED);       // toggle!
 		_delay_ms(100);
 	}
 }