
#include "macros.inc"

#define dest_hi r25
#define dest_lo r24
; #define val_hi r23
#define val_lo r22
#define len_hi r21
#define len_lo r20

; void *memset(void *dest, int val, size_t len)

	.text
	.global	_U(memset)
	.type	_U(memset), @function
_U(memset):
	LOAD_X(dest_lo, dest_hi)
#if OPTIMIZE_SPEED
; 11 words, (12 + len * 4 - (len & 1)) cycles
	sbrs	len_lo, 0
	rjmp	.memset_start
	rjmp	.memset_odd
.memset_loop:
	st	X+, val_lo
.memset_odd:
	st	X+, val_lo
.memset_start:
	subi	len_lo, lo8(2)
	sbci	len_hi, hi8(2)
#else
; 8 words, (11 + len * 6) cycles
	rjmp	.memset_start
.memset_loop:
	st	X+, val_lo
.memset_start:
	subi	len_lo, lo8(1)
	sbci	len_hi, hi8(1)
#endif
	brcc	.memset_loop
; return dest (unchanged)
	ret
.memset_end:
	.size	_U(memset), .memset_end - _U(memset)

