;
; Runtime code for scc65.
; assemble with atari.m65 and global.m65

	.globl	__start		; program lo memory boundary
__start:
;
; this is the first piece of actual generated code...
;
	jmp	start		; so we don't need a start vector
;
; routines for inc/dec'ing sp
;

;	.globl	decsp2
;decsp2:				; dec sp by 2
;	pha			; save a
;	lda	sp		; get sp lo
;	sec
;	sbc	#2		; sub 2
;	sta	sp
;	lda	sp+1		; get sp hi
;	sbc	#0		; sub carry
;	sta	sp+1
;	pla			; get a back
;	rts

	.globl	addysp		; add Y to SP
addysp:
	pha			; save A
	clc
	tya			; get the value
	adc	sp		; add lo byte
	sta	sp		; put it back
	bcc	addysp_1	; if no carry, we're done
	inc	sp+1		; inc hi byte
addysp_1:
	pla			; get A back
;	rts			; done
	jmp	tstax		; return condition codes

;	.globl	incsp1
;incsp1:
;	ldy	#1
;	bne	addysp

	.globl	incsp2		; inc sp by 2
incsp2:
; do this by hand, cause it gets used a lot
;	ldy	#2
;	bne	addysp
	inc	sp
	bne	*+4
	inc	sp+1
; might as well do incsp1 here, as we have the code...
	.globl	incsp1
incsp1:
	inc	sp
	bne	*+4
	inc	sp+1
	rts
		
	.globl	incsp3		; inc sp by 3
incsp3:
	ldy	#3
	bne	addysp

	.globl	incsp4		; inc sp by 4
incsp4:
	ldy	#4
	bne	addysp

	.globl	incsp5		; inc sp by 5
incsp5:
	ldy	#5
	bne	addysp

	.globl	incsp6		; inc sp by 6
incsp6:
	ldy	#6
	bne	addysp

	.globl	incsp7		; inc sp by 7
incsp7:
	ldy	#7
	bne	addysp

	.globl	incsp8		; inc sp by 8
incsp8:
	ldy	#8
	bne	addysp

	.globl	subysp		; sub Y from SP
subysp:
	pha			; save A
	sty	tmp1		; save the value
	lda	sp		; get lo byte
	sec
	sbc	tmp1		; sub y val
	sta	sp		; put it back
; wrong!
;	bcs	subysp_1	; if carry, we're done
;	dec	sp+1		; dec hi byte
	lda	sp+1
	sbc	#0
	sta	sp+1
subysp_1:
	pla			; get A back
	rts			; done

	.globl	decsp1
decsp1:
	ldy	#1
	bne	subysp

	.globl	decsp2
decsp2:
;
; do this one by hand, cause it gets used a lot
;
;	ldy	#2
;	bne	subysp
	pha
	sec
	lda	sp
	sbc	#2
	sta	sp
	lda	sp+1
	sbc	#0
	sta	sp+1
	pla
	rts

	.globl	decsp3
decsp3:
	ldy	#3
	bne	subysp

	.globl	decsp4
decsp4:
	ldy	#4
	bne	subysp

	.globl	decsp5
decsp5:
	ldy	#5
	bne	subysp

	.globl	decsp6
decsp6:
	ldy	#6
	bne	subysp

	.globl	decsp7
decsp7:
	ldy	#7
	bne	subysp

	.globl	decsp8
decsp8:
	ldy	#8
	bne	subysp

;
; ops for loading/indexing stack slots
;
	.globl	ldaxysp		; load AX from SP@(Y)
ldaxysp:
	lda	(sp),y		; get lo byte
	pha
	iny
	lda	(sp),y		; get hi byte
	tax			; into x
	pla			; get lo byte back
;	rts
	jmp	tstax		; return cc

;	.globl	ldsrysp		; load SREG from SP@(Y)
;ldsrysp:
;	pha			; save A
;	lda	(sp),y		; get lo byte
;	sta	sreg		; store it
;	iny
;	lda	(sp),y		; get hi byte
;	sta	sreg+1		; store it
;	pla			; get A back
;	jsr	ldaxysp		; just do the things we optimized
;	jsr	swapsreg	;  out.
;	rts

	.globl	ldaysp		; load A from SP@(Y)
ldaysp:				; zzz maybe do this on line?
	ldx	#0
	lda	(sp),y
	bpl	ldaysp_1	; pos...
	ldx	#$FF
ldaysp_1:
	rts

	.globl	ldauysp		; load A unsigned from SP@(Y)
ldauysp:			; zzz maybe do this on line?
	ldx	#0
	lda	(sp),y
	rts

	.globl	locysp		; compute location of SP@(Y) in AX
locysp:
	ldx	sp+1		; get hi byte
	clc			; set up for add
	tya			; get offset
	adc	sp		; compute lo byte
	bcc	locysp_1
	inx			; add in carry
locysp_1:
	rts

	.globl	plocysp		; push addr y(sp)
plocysp:
	jsr	locysp
	jmp	pushax
	
;
; routines for inc/dec'ing AX
;
	.globl	incax8
incax8:
	ldy	#8
	jmp	indexax
	.globl	incax7
incax7:
	ldy	#7
	jmp	indexax
	.globl	incax6
incax6:
	ldy	#6
	jmp	indexax
	.globl	incax5
incax5:
	ldy	#5
	jmp	indexax
	.globl	incax4
incax4:
	ldy	#4
	jmp	indexax
	.globl	incax3
incax3:
	ldy	#7
	jmp	indexax

	.globl	incax2		; inc AX by 2
incax2:
	jsr	incax1		; inc it once, and fall thru

	.globl	incax1		; inc AX by 1
incax1:
	tay			; use Y as a temp
	iny
	tya
	bne	incax1_1	; if not zero, we're done
	inx			; inc top half
incax1_1:
;	rts			; done
	jmp	tstax		; return cond codes

;	.globl	incaxi1		; load AX indirect, and increment by 1
;incaxi1:
;	jsr	ldaxi
;	jmp	incax1

;	.globl	incaxi2		; load AX indirect, and increment by 2
;incaxi2:
;	jsr	ldaxi
;	jmp	incax2

;	.globl	inci1spp	; load AX indirect, inc 1, store thru SP@+
;inci1spp:
;	jsr	incaxi1
;	jmp	staxspp

;	.globl	inci2spp	; load AX indirect, inc 2, store thru SP@+
;inci2spp:
;	jsr	incaxi2
;	jmp	staxspp
	
;
	.globl	decax2		; dec AX by 2
decax2:
	jsr	decax1		; dec it once, and fall thru

	.globl	decax1		; dec AX by 1
decax1:
	tay			; use Y as a temp
	dey
	tya
	cmp	#$FF		; wrap?
	bne	decax1_1	; if not -1, we're done
	dex			; dec top half
decax1_1:
;	rts			; done
	jmp	tstax		; return cond codes

;	.globl	decaxi1		; load AX indirect, and decrement by 1
;decaxi1:
;	jsr	ldaxi
;	jmp	decax1

;	.globl	decaxi2		; load AX indirect, and decrement by 2
;decaxi2:
;	jsr	ldaxi
;	jmp	decax2

;	.globl	deci1spp	; load AX indirect, dec 1, store thru SP@+
;deci1spp:
;	jsr	decaxi1
;	jmp	staxspp

;	.globl	deci2spp	; load AX indirect, dec 2, store thru SP@+
;deci2spp:
;	jsr	decaxi2
;	jmp	staxspp
	
;
; push/pop things on stack
;
	.globl	push0
push0:
	lda	#0
	tax
; ...
	.globl	pushax		; push AX
pushax:
	jsr	decsp2		; dec sp
	ldy	#0		; get index
	sta	(sp),y		; store lo byte
	pha			; save it
	txa			; get hi byte
	iny			; bump idx
	sta	(sp),y		; store hi byte
	pla			; get A back
	rts			; done

	.globl	popax		; pop stack into AX
popax:
	ldy	#0
	lda	(sp),y		; get lo byte
	pha			; stash it
	iny
	lda	(sp),y		; get hi byte
	tax			; into x
	pla			; get lo byte back
	jmp	incsp2		; bump stack and return
	
	.globl	popsreg		; pop stack into SREG
popsreg:
	pha			; save A
	ldy	#0
	lda	(sp),y		; get lo byte
	sta	sreg		; store it
	iny
	lda	(sp),y		; get hi byte
	sta	sreg+1		; store it
	pla			; get A back
	jmp	incsp2		; bump stack and return
	
	.globl	pushwysp	; push a word from SP@(Y)
pushwysp:
;	pha			; save A
;	txa
;	pha			; save X
	lda	(sp),y		; get lo byte
	pha			; Save it
	iny			; bump idx
	lda	(sp),y		; get hi byte
	tax			; into X
	pla			; get back lo byte
	jsr	pushax		; push that
;	pla			; get original X
;	tax			; into X
;	pla			; get back original A
	rts

	.globl	pushbysp	; push a byte from SP@(Y)
pushbysp:
	lda	(sp),y		; get lo byte
	ldx	#0
	jsr	pushax		; push that
	rts

;
; Various kinds of store operators
;
	.globl	staxspidx	; store AX at SP@@(Y)
staxspidx:
	jsr	staspic		; use common part
	pha
	iny
	lda	tmp2
	sta	(ptr1),y
	tax
	pla
	jmp	tstax
	.globl	staspidx
staspidx:
	jsr	staspic		; use common part
	ldx	tmp2
	jmp	tstax
staspic:
	sta	tmp1
	stx	tmp2
	sty	tmp3
	jsr	popax		; get the pointer
	sta	ptr1
	stx	ptr1+1
	ldy	tmp3
	lda	tmp1
	sta	(ptr1),y
	rts	

	.globl	staxysp
staxysp:
	sta	(sp),y
	pha
	txa
	iny
	sta	(sp),y
	pla
	jmp	tstax
	.globl	staysp
staysp:
	sta	(sp),y
	jmp	tstax

;	.globl	stasreg		; store A thru SREG
;stasreg:
;	ldy	#0
;	sta	(sreg),y
;	rts
;	.globl	staxsreg	; store AX thru SREG
;staxsreg:
;	ldy	#0
;	sta	(sreg),y	; store lo byte
;	pha			; save A
;	iny			; bump idx
;	txa			; get hi byte
;	sta	(sreg),y	; store it
;	pla			; get A back
;;	rts			; done
;	jmp	tstax		; return cc

	.globl	staxspp		; store AX thru (sp), and pop
staxspp:
	pha			; save A
	txa
	pha			; save X
	jsr	popax		; pop addr into AX
	sta	ptr1		; save into a pointer
	stx	ptr1+1
	ldy	#1
	pla			; get hi byte back
	sta	(ptr1),y	; store it
	tax			; back into X
	pla			; get lo byte
	dey
	sta	(ptr1),y	; store it
	jmp	tstax		; return cc

	.globl	staspp		; store A thru (sp), and pop
staspp:
	pha			; save A
	txa
	pha			; save X
	jsr	popax		; pop addr into AX
	sta	ptr1		; save into a pointer
	stx	ptr1+1
	pla			; get X
	tax
	pla			; get lo byte
	ldy	#0
	sta	(ptr1),y	; store it
	jmp	tstax		; return cc

	
;
; Operations on AX
;
	.globl	aslax		; shift AX left 1
aslax:
	asl	A
	pha
	txa
	rol	A
	tax
	pla
	rts

;	.globl	asrax		; shift AX right 1
;asrax:
;	pha
;	txa
;	pha
;	rol	A		; get carry set right
;	pla
;	ror	A		; do the real rotate
;	tax
;	pla
;	ror	A
;	rts

	.globl	ldaxi		; load AX indirect thru AX
ldaxi:
	sta	tmpptr
	stx	tmpptr+1
	ldy	#1
	lda	(tmpptr),y
	tax
	dey
	lda	(tmpptr),y
;	rts
	jmp	tstax		; return cond codes

;
; load A indirect thru AX
;
	.globl	ldai
ldai:
	sta	tmpptr		; set pointer
	stx	tmpptr+1
	ldy	#0		; set idx
	lda	(tmpptr),y	; get byte
	bpl	ldai_1		; pos...
	ldx	#$FF
	rts
ldai_1:
	ldx	#0
;	rts
	jmp	tstax		; return cond codes
;
;	ldaui: load A (unsigned) from (AX)
;
	.globl	ldaui
ldaui:
	sta	tmpptr		; set pointer
	stx	tmpptr+1
	ldx	#0		; set idx
	lda	(tmpptr,x)	; get byte
	rts

	.globl	negax		; negate ax
negax:
	pha			; save A
	txa			; get X
	eor	#$FF		; invert
	tax			; back to X
	pla			; get A back
	eor	#$FF		; invert
	clc
	adc	#1		; inc
	bcc	negax_1
	inx			; bump X
negax_1:
	rts

	.globl	lnegax		; logical complement AX
lnegax:
	stx	tmp3
	ora	tmp3
	beq	lneg1		; it's zero, so return 1
	lda	#0
	tax
	rts
lneg1:
	ldx	#0
	lda	#1
	rts

	.globl	complax		; one's complement AX
complax:
	eor	#$FF		; Not A
	pha
	txa
	eor 	#$FF		; Not X
	tax
	pla
	rts

	.globl	indexax		; index in Y, add Y to AX
indexax:
	sta	tmp1
	tya
	clc
	adc	tmp1
	bcc	*+3
	inx
	rts

	.globl	plocidx		; push location of AX@(Y)
plocidx:
	jsr	indexax
	jmp	pushax

;
; test (AX)
;
;	.globl	tstaxi
;tstaxi:
;	jsr	ldaxi
				; and fall thru...
;
; test AX for nonzero-ness.  return result in CC
;
	.globl	tstax
tstax:
;	cmp	#0
;	beq	tstax_0
;	bpl	tstax_p
;				; A was negative, try X
;tstax_0:			; A was 0, try X
;	cpx	#0
;tstax_9:
;	rts
;tstax_p:			; A was positive
;	cpx	#0		; try X
;	bpl	tstax

	cpx	#0		; test X
	beq	tstax_x0	; 0, go see what A has
	rts
tstax_x0:			; X was 0, try A
	cmp	#0		; test A
	bpl	tstax_9		; pos or zero is ok, just return
	ldy	#1		; force 'positive'
tstax_9:
	rts			; done, just return


	.globl	ldaxidx		; load AX from (AX)Y
ldaxidx:
	jsr	indexax		; compute address
	jmp	ldaxi		; load indirect

	.globl	ldaidx		; load A from (AX)Y
ldaidx:
	jsr	indexax		; compute address
	jmp	ldai		; load indirect

	.globl	ldauidx		; load A unsigned from (AX)Y
ldauidx:
	jsr	indexax		; compute address
	jmp	ldaui		; load indirect

	.globl	pushwidx	; push word at (AX)Y
pushwidx:
	jsr	indexax		; index
	jsr	ldaxi
	jmp	pushax

	.globl	pushbidx	; push byte at (AX)Y
pushbidx:
	jsr	indexax		; index
	jsr	ldai
	jmp	pushax

;
; operations on SREG
;
	.globl	asltos
asltos:
	jsr	popsreg		; for optimized code
	.globl	aslsreg		; shift SREG left AX times, result in AX
aslsreg:
	cpx	#0		; X nonzero?
	bne	aslsreg_0	; no, just return 0
	tay			; use Y as counter
	lda	sreg+1
	sta	tmp1		; use A and tmp1 as register
	lda	sreg
	cpy	#0		; shift count 0?
	beq	aslsreg_9	; done shifting
aslsreg_1:
	asl	A		; shift A
	rol	tmp1
	dey			; dec counter
	bne	aslsreg_1
aslsreg_9:
	ldx	tmp1		; get hi byte
;	rts			; done
	jmp	tstax
aslsreg_0:
	lda	#0
	tax
;	rts		
	jmp	tstax		; return status for optimized code

	.globl	asrtos
asrtos:
	jsr	popsreg		; for optimized code
	.globl	asrsreg		; shift SREG right by AX, result in AX
asrsreg:
	cpx	#0		; X nonzero?
	bne	aslsreg_0	; no, just return 0
;	tax			; use X as counter
;	php			; save Z flag, for zero counter
;	lda	sreg
;	sta	tmp1		; use tmp1 and A as register
;	lda	sreg+1
;	and	#$80		; keep top bit
;	sta	tmp2
;	plp			; get Z flag back
;	beq	asrsreg_9	; done shifting
;asrsreg_1:
;	lsr	A		; shift hi byte
;	ror	tmp1		; and lo byte
;	ora	tmp2		; keep top bit
;	dex			; dec counter
;	bne	asrsreg_1
;asrsreg_9:
;	tax			; put hi byte in X
;	lda	tmp1		; and get lo byte
;	rts

;	this isn't right... test after loading AX...
;	cmp	#0		; shift count 0?
;	beq	aslsreg_0	; yes, return 0
	tay			; use Y as shift counter
	lda	sreg
	ldx	sreg+1		; get the value into AX
	cpy	#0
	beq	asrsreg_9	; zero, return now
;	cpx	#0		; test AX for minus-ness
;	bpl	*+5
;	jsr	negax
	stx	tmp1		; leave hi byte in tmp1
asrsreg_1:
	ldx	tmp1		; get hi byte,
	cpx	#$80		;  compare, to set carry bit if neg
	ror	tmp1		; shift hi byte, preserving hi bit
	ror	A		; shift lo byte
	dey			; dec shift count
	bne	asrsreg_1	; 0? done
	ldx	tmp1		; get hi byte
;	ldy	sreg+1		; original value negative?
;	bpl	*+5
;	jsr	negax		; negate result
asrsreg_9:
	jmp	tstax		; return status for optimized code

	.globl	addtos
addtos:
	jsr	popsreg		; for optimized code
	.globl	addsreg		; add SREG to AX
addsreg:
	clc
	adc	sreg		; add lo byte
	pha
	txa
	adc	sreg+1		; add hi byte
	tax
	pla
;	rts
	jmp	tstax		; return status for optimized code

;	.globl	addimm		; add immediate following word
;addimm:
;	sta	tmp1		; save A
;	stx	tmp1+1		; and X
;	pla			; get return addr
;	sta	ptr1
;	pla
;	sta	ptr1+1
;	ldy	#1
;	lda	(ptr1),y	; get lo byte
;	clc
;	adc	tmp1		; add to old A value
;	sta	tmp1		; put it back
;	iny
;	lda	(ptr1),y	; get hi byte
;	adc	tmp1+1
;	sta	tmp1+1
;	clc			; adjust return addr
;	lda	ptr1
;	adc	#3		; so we can jump thru it
;	sta	ptr1
;	bcc	*+4
;	inc	ptr1+1
;	lda	tmp1		; get A back
;	ldx	tmp1+1
;	jmp	(ptr1)

	.globl	subtos
subtos:
	jsr	popsreg		; for optimized code
	.globl	subsreg		; sub AX from SREG, result in AX
subsreg:
	sta	tmp1
	stx	tmp2		; save AX
	lda	sreg
	sec
	sbc	tmp1		; sub lo byte
	pha
	lda	sreg+1
	sbc	tmp2		; sub hi byte
	tax
	pla
;	rts
	jmp	tstax		; return status for optimized code

;	.globl	swapsreg	; swap AX and SREG
;swapsreg:
;	sta	tmp1
;	stx	tmp2
;	lda	sreg		; get lo byte
;	ldx	sreg+1		; get hi byte
;	ldy	tmp1
;	sty	sreg
;	ldy	tmp2
;	sty	sreg+1
;	rts

	.globl	ortos
ortos:
	jsr	popsreg		; for optimized code
	.globl	orsreg		; OR sreg into AX
orsreg:
	ora	sreg
	pha
	txa
	ora	sreg+1
	tax
	pla
;	rts
	jmp	tstax		; return status for optimized code

	.globl	xortos
xortos:
	jsr	popsreg		; for optimized code
	.globl	xorsreg		; XOR sreg into AX
xorsreg:
	eor	sreg
	pha
	txa
	eor	sreg+1
	tax
	pla
;	rts
	jmp	tstax		; return status for optimized code

	.globl	andtos
andtos:
	jsr	popsreg		; for optimized code
	.globl	andsreg		; AND sreg into AX
andsreg:
	and	sreg
	pha
	txa
	and	sreg+1
	tax
	pla
;	rts
	jmp	tstax		; return status for optimized code

;	.globl	ldsr		; load sreg from following word
;ldsr:
;	sta	tmp1		; save A
;	pla			; get return addr
;	sta	ptr1
;	pla
;	sta	ptr1+1
;	ldy	#1
;	lda	(ptr1),y	; get lo byte
;	sta	sreg
;	iny
;	lda	(ptr1),y	; get hi byte
;	sta	sreg+1
;	clc			; adjust return addr
;	lda	ptr1
;	adc	#3		; so we can jump thru it
;	sta	ptr1
;	bcc	*+4
;	inc	ptr1+1
;	lda	tmp1		; get A back
;	jmp	(ptr1)

;
; comparisons
;
	.globl	axzerop
axzerop:
	cmp	#0
	bne	return1
	cpx	#0
	bne	return1
	beq	return0

	.globl	toseqax
toseqax:
	jsr	popsreg		; for optimized code
	.globl	sregeqax	; SREG == AX
sregeqax:
	cmp	sreg		; A == sreg lo?
	bne	return0		; nope, return 0
	cpx	sreg+1		; X == sreg hi?
	bne	return0
	beq	return1

	.globl	tosneax
tosneax:
	jsr	popsreg		; for optimized code
	.globl	sregneax	; SREG != AX
sregneax:
	cmp	sreg		; A == sreg lo?
	bne	return1		; nope, return 1
	cpx	sreg+1		; X == sreg hi?
	bne	return1
	beq	return0

	.globl	tosltax
tosltax:
	jsr	popsreg		; for optimized code
	.globl	sregltax	; SREG < AX
sregltax:			; really AX > SREG...
	cpx	sreg+1		; X < sreg hi?
	bmi	return0		; X < SR^ , return 0
	bne	return1		; not =, so >; return 1
	cmp	sreg		; A < sreg lo?
	bcc	return0		; A < SR\, return 0
	beq	return0
	bcs	return1

	.globl	tosultax
tosultax:
	jsr	popsreg		; for optimized code
	.globl	sregultax	; SREG u< AX
sregultax:			; AX u> SREG
	cpx	sreg+1
	bcc	return0		; tos^ u< ax^ , return 0
	bne	return1		; if ne, must be u>; return 1
	cmp	sreg
	bcc	return0
	beq	return0
	bcs	return1

	.globl	tosleax
tosleax:
	jsr	popsreg		; for optimized code
	.globl	sregleax	; SREG <= AX
sregleax:			; AX >= SR
	cpx	sreg+1
	bmi	return0		; X < SR^, return 0
	bne	return1		; X > SR^, return 1
	cmp	sreg
	bcc	return0		; u> , return 0
	bcs	return1

	.globl	tosuleax
tosuleax:
	jsr	popsreg		; for optimized code
	.globl	sreguleax	; SREG u<= AX
sreguleax:
	cpx	sreg+1
	bcc	return0		; X < SR^, return 0
	bne	return1		; X > SR^, return 1
	cmp	sreg
	bcc	return0
	bcs	return1

;
; return functions for comparison ops.  these guys are careful
; to leave the condition codes correct for the AX value.  Compiler
; depends on that when optimizing; beware!
;
return1:
	ldx	#0
	lda	#1
	rts

return0:
	lda	#0
	tax
	rts

	.globl	tosgtax
tosgtax:
	jsr	popsreg		; for optimized code
	.globl	sreggtax	; SREG > AX
sreggtax:			; AX < SR
	cpx	sreg+1
	bmi	return1		; < , return 1
	bne	return0		; not =, so >; return 0
	cmp	sreg
	bcc	return1		; < , return 1
	bcs	return0

	.globl	tosugtax
tosugtax:
	jsr	popsreg		; for optimized code
	.globl	sregugtax	; SREG u> AX
sregugtax:
	cpx	sreg+1
	bcc	return1		; < , return 1
	bne	return0		; not =, so >; return 0
	cmp	sreg
	bcc	return1		; < , return 1
	bcs	return0

	.globl	tosgeax
tosgeax:
	jsr	popsreg		; for optimized code
	.globl	sreggeax	; SREG >= AX
sreggeax:			; AX <= SR
	cpx	sreg+1
	bmi	return1		; < , return 1
	bne	return0		; not =, so >; return 0
	cmp	sreg
	bcc	return1		; < , return 1
	beq	return1
	bcs	return0

	.globl	tosugeax
tosugeax:
	jsr	popsreg		; for optimized code
	.globl	sregugeax	; AX u<= SREG
sregugeax:
	cpx	sreg+1
	bcc	return1		; < , return 1
	bne	return0		; not =, so >; return 0
	cmp	sreg
	bcc	return1		; < , return 1
	beq	return1
	bcs	return0

; kludgey constant for DIV, MOD
;onehalf:
;	.byte	$3F,$49,$99,$99,$99,$99


;
; function ops
;

	.globl	enterfun0
enterfun0:
	ldy	#0
	beq	enterfun

	.globl	enterfun1
enterfun1:
	ldy	#1
	bne	enterfun

	.globl	enterfun2
enterfun2:
	ldy	#2
	bne	enterfun

	.globl	enterfun3
enterfun3:
	ldy	#3
	bne	enterfun

	.globl	enterfun4
enterfun4:
	ldy	#4
	bne	enterfun

	.globl	enterfun5
enterfun5:
	ldy	#5
;	bne	enterfun

;
; expect frame size in Y, push fp
;
	.globl	enterfun
enterfun:
	tya			; get arg count
;	asl	A
;	clc
;	adc	sp		; add to sp
;	pha
;	lda	sp+1
;	adc	#0
;	tax
;	pla
	ldx	#0		; just push arg count
	jmp	pushax

	.globl	exitfun		; exit a function.  pop stack and rts
exitfun:
	pha			; save A a sec
	ldy	#0
	lda	(sp),y		; that's the pushed arg count
	asl	A		; loses big for large arg counts...
	tay
	iny
	iny			; count the word the count's stored in
	pla			; get it back
	jmp	addysp		; pop that many word-sized things


;
; random stuff
;

;
; call value on in AX
;
	.globl	callax
callax:
;	jsr	popax		; pop function ptr
;	jsr	tos2ax		; get tos
	sta	tmp1
	stx	tmp1+1
	jmp	(tmp1)		; jump there

;
; swap AX with TOS
;
	.globl	swapstk
swapstk:
	sta	tmpptr
	stx	tmpptr+1
	ldy	#1		; index
	lda	(sp),y
	tax
	lda	tmpptr+1
	sta	(sp),y
	dey
	lda	(sp),y
	pha
	lda	tmpptr
	sta	(sp),y
	pla
	rts			; whew!

	.globl	pushwaxi
pushwaxi:			; push word (ax)
	jsr	ldaxi
	jmp	pushax

	.globl	pushbaxi	; push byte at (ax)
pushbaxi:
	jsr	ldai
	jmp	pushax

;	.globl	ldaxysp
;ldaxysp:
;	jsr	locysp
;	jmp	ldaxi


;
; or ax with tos, pop
;
;	.globl	oraxtos
;oraxtos:
;	ldy	#0
;	ora	(sp),y
;	pha
;	txa
;	iny
;	ora	(sp),y
;	tax
;	pla
;	jmp	incsp2
;
; eor ax with tos, pop
;
;	.globl	eoraxtos
;eoraxtos:
;	ldy	#0
;	eor	(sp),y
;	pha
;	txa
;	iny
;	eor	(sp),y
;	tax
;	pla
;	jmp	inc2sp

;
; and ax with tos, pop
;
;	.globl	andaxtos
;andaxtos:
;	ldy	#0
;	and	(sp),y
;	pha
;	txa
;	iny
;	and	(sp),y
;	tax
;	pla
;	jmp	inc2sp

;
; add Y to sp, leave result in AX
;
;	.globl	addrysp
;addrysp:
;	tya			; get offset
;	clc			; set up for...
;	adc	sp		;  add low byte
;	pha
;	lda	sp+1		; get hi byte
;	adc	#0		; add carry
;	tax			; xfer to x
;	pla			; get a back
;	rts			; done

;
; handler for case jump inst.
;
	.globl	casejump
casejump:
;
; case table after the call to casejmp.  val in AX.
; table is of the form <addr,val>,<addr,val>,...,0.
; rts from casejmp for default case, else pop return addr
; addr and jmp to address
;
	sta	tmp3
	stx	tmp4		; save value
	pla			; get return addr
	sta	ptr1
	pla
	sta	ptr1+1		; store in ptr 1
	inc	ptr1		; and adjust it, because of way 6502 works
	bne	*+4
	inc	ptr1+1
case1:
	ldy	#1		; first see if at end
	lda	(ptr1),y	; test address byte hi
	bne	case2		; nope, not yet
	dey
	lda	(ptr1),y	; test address byte lo
	bne	case2
; oops, we're out of cases.  compute return address; 2 + ptr1
	clc
	lda	ptr1		; get lo byte
	adc	#2		; inc past the 0
	sta	ptr1
	bcc	*+4		; deal with carry
	inc	ptr1+1
	jmp	(ptr1)		; go there

case2:				; test case value against this value
	ldy	#3
	lda	(ptr1),y	; value hi
	cmp	tmp4		; match switchval hi?
	bne	case3		; no match, try next
	dey
	lda	(ptr1),y	; val lo
	cmp	tmp3		; match switchval lo?
	bne	case3
; match!
	dey			; point at hi byte of addr
	lda	(ptr1),y
	sta	ptr2+1		; stuff in ptr to jump thru
	dey			; point at lo byte of addr
	lda	(ptr1),y
	sta	ptr2
	jmp	(ptr2)		; go there

case3:				; no match, try next case clause
	clc
	lda	ptr1		; get case ptr
	adc	#4		; add 4
	sta	ptr1		; put it back
	bcc	*+4
	inc	ptr1+1		; deal with carry
	jmp	case1		; and go try again

;
; save and restore AX
;
	.globl	saveax
saveax:
	sta	svax
	stx	svax+1
	rts
	.globl	restax
restax:
	lda	svax
	ldx	svax+1
	jmp	tstax

;
; support routines for runtime
;
;tos2ax:
;	ldy	#0
;ytos2ax:
;	iny
;	lda	(sp),y
;	tax
;	dey
;	lda	(sp),y
;	rts

;	.globl	popax
;popax:
;	jsr	tos2ax
;	jsr	inc2sp
;	rts


	
;
; mult and div, using fp routines
;
;	.globl	umul
;umul:				; unsigned multiply
;	sta	fr0		; set up the first num
;	stx	fr0+1
;	jsr	ifp		; make a float out of it
;	jsr	fmove		; move to fr1
;	jsr	popax		; get second arg
;	sta	fr0		; set this one up
;	stx	fr0+1
;	jsr	ifp		; floatify...
;	jsr	fmul		; mult them
;	jsr	fpi		; back to int
;	lda	fr0
;	ldx	fr0+1		; load it up
;	rts			; and return

	.globl	multos
multos:
	jsr	popsreg
	.globl	smul		; signed multiply AX by SREG
smul:
	ldy	#0
	sty	tmp3		; zap negative flag
	cpx	#0
	bpl	*+7
	inc	tmp3
	jsr	negax
	sta	tmpptr		; use sreg and tmpptr
	stx	tmpptr+1	;  as regs
	lda	sreg
	ldx	sreg+1
	bpl	xsmul0
	dec	tmp3
	jsr	negax
	sta	sreg
	stx	sreg+1
xsmul0:
	lda	#0
	sta	tmp1		; tmp1/2 as accumulator
	sta	tmp1+1
	lda	#1		; use A as bit mask against SREG
xsmul1:
	bit	sreg		; this bit in sreg set?
	beq	xsmul2		; nope, try next
	jsr	xsmadd		; add AX val to accum
xsmul2:
	asl	tmpptr		; shift AX val left one
	rol	tmpptr+1
	asl	A		; shift bitmask left one
	bcc	xsmul1		; if bit not off end, keep working on lo byte
	lda	#1		; set up mask for hi byte
xsmul3:
	bit	sreg+1		; this bit in sreg set?
	beq	xsmul4		; nope, try next
	jsr	xsmadd		; add AX val to accum
xsmul4:
	asl	tmpptr		; shift AX val left one
	rol	tmpptr+1
	asl	A		; shift bitmask left one
	bcc	xsmul3		; if not off end, keep working on hi byte
	ldx	tmp1+1		; load up accum
	lda	tmp1
	ldy	tmp3		; negate flag?
	beq	xsmul9
	jsr	negax
xsmul9:
	rts			; done!

xsmadd:				; helper fun	
	pha			; save bit mask
	clc
	lda	tmpptr		; add AX value
	adc	tmp1		;  to accumulator
	sta	tmp1
	lda	tmpptr+1
	adc	tmp1+1
	sta	tmp1+1
	pla			; get bitmask back
	rts			; done



xsdiv0:				; error return
	lda	#0
	tax
	sta	sreg
	sta	sreg+1
	rts

	.globl	divtos
divtos:
	jsr	popsreg		; pop other value into sreg
	.globl	sdiv		; signed divide SREG by AX, 
sdiv:				;   result in AX, remainder in SREG
	ldy	#0
	sty	tmp3		; zap denominator-negative flag
	sty	tmp4		; and numerator-negative flag
	jsr	tstax		; AX zero?
	beq	xsdiv0		; yup, give up now
	bpl	xsdiv1		; positive, it's ok
	inc	tmp3		; denom negative
	jsr	negax		; negate it
xsdiv1:
	sta	ptr1		; use tmpptr1 and SREG as regs
	stx	ptr1+1
	lda	sreg
	ldx	sreg+1		; load up sreg
	jsr	tstax		; what sort of value?
	beq	xsdiv0		; zero, we lose
	bpl	xsdiv2		; positive, it's ok
	inc	tmp4		; set numerator-negative flag
	jsr	negax		; negate it
	sta	sreg
	stx	sreg+1
xsdiv2:
	ldy	#1		; bit mask
	sty	ptr2
	dey
	sty	ptr2+1
	sty	tmp1		; use tmp1 as accum
	sty	tmp1+1
;
; shift ptr1 and ptr2 left until ptr1 is greater than SREG,
; then shift right until it's <=
;
xsdiv3:
	lda	ptr1+1		; compare
	cmp	sreg+1		;  p1 to sreg
	beq	xsdiv3a		; eq, try second byte
	bcs	xsdiv4		; >, we're done shifting left
	bcc	xsdiv3b		; <, shift left
xsdiv3a:
	lda	ptr1
	cmp	sreg		; compare lo byte
	beq	xsdiv5		; =, skip the right shift
	bcs	xsdiv4		; >, go right one
xsdiv3b:
	asl	ptr1		; shift p1 left 1
	rol	ptr1+1
	asl	ptr2		; shift bitmask left one
	rol	ptr2+1
	jmp	xsdiv3		; round again.
xsdiv4:				; shift p1, mask right one
	lsr	ptr1+1		; shift p1
	ror	ptr1
	lsr	ptr2+1		; shift bitmask
	ror	ptr2
	bcs	xsdiv7a		; bit fell out end?!? ok, do exit processing
;
; whew! tmpptr1 (the divisor register) and tmpptr2 (the quotient bitmask)
; are now all set to enter the divide loop
;
xsdiv5:				; compare sreg to p1.  if >=, subtract
	lda	sreg+1
	cmp	ptr1+1
	beq	xsdiv5a		; try lo byte
	bcs	xsdiv6		; >, go subtract
	bcc	xsdiv7		; <, go shift right
xsdiv5a:
	lda	sreg
	cmp	ptr1
	bcc	xsdiv7		; < go shift right
xsdiv6:				; subtract p1 from sreg, and set bit in accum
	sec
	lda	sreg		; subtract 
	sbc	ptr1		;  p1 
	sta	sreg		;   from 
	lda	sreg+1		;    sreg
	sbc	ptr1+1
	sta	sreg+1
	lda	ptr2		; or 
	ora	tmp1		;  bitmask
	sta	tmp1		;   into 
	lda	ptr2+1		;    quotient
	ora	tmp1+1
	sta	tmp1+1
xsdiv7:
	lsr	ptr1+1		; shift p1 right
	ror	ptr1
	lsr	ptr2+1
	ror	ptr2
	bcc	xsdiv5		; if no carry, round again
xsdiv7a:
;
; done!
;
	lda	tmp4		; negate remainder?
	beq	xsdiv8
	lda	sreg
	ldx	sreg+1
	jsr	negax
	sta	sreg
	stx	sreg+1
xsdiv8:
	lda	tmp1
	ldx	tmp1+1
	ldy	tmp3		; numerator neg...
	cpy	tmp4		; same as denom-neg?
	beq	xsdiv9		; yes, skip the negate
	jsr	negax
xsdiv9:
	rts

	.globl	modtos
modtos:
	jsr	divtos
	lda	sreg
	ldx	sreg+1
	jmp	tstax

;
; library routines
;

	.globl	_strcpy
_strcpy:
	jsr	popax
	sta	ptr1
	stx	ptr1+1
	jsr	popax
	sta	ptr2
	stx	ptr2+1
	ldy	#0
strc1:
	lda	(ptr1),y
	sta	(ptr2),y
	beq	strc9
	iny
	bne	strc1
	inc	ptr1+1
	inc	ptr2+1
	bne	strc1
strc9:
	rts

	.globl	_strlen
_strlen:
	jsr	popax
	sta	ptr1
	stx	ptr1+1
	ldx	#0		; YX used as counter
	ldy	#0
strlen1:
	lda	(ptr1),y
	beq	strlen9
	iny
	bne	strlen1
	inc	ptr1+1
	inx
	bne	strlen1
strlen9:
	tya			; get low byte of counter, hi's all set
	rts

;
; find (char * str, int len, char c) -> index
;
;_find:
;	jsr	tos2ax
;	sta	ptr1
;	stx	ptr1+1
;	ldy	#2
;	jsr	ytos2ax
;	sta	tmp1
;	stx	tmp1+1
;	ldy	#4
;	jsr	ytos2ax			; gets char in A
;	ldx	#0
;	stx	ptr2			; use as counter here
;	stx	ptr2+1
;	ldy	#0
;find1:
;	cmp	(ptr1),y		; match?
;	beq	find9			; yup, return
; zzz
;find9:
;	rts



;
; startup
;
;argv:
;	.word	0,0,0,0,0,0,0,0
;	.word	0,0,0,0,0,0,0,0
start:
	tsx
	stx	origsp		; save system stk ptr
;
;	lda	#0		; init stk ptr
;	sta	sp
;	lda	#$80		; $80 for debugging...
;	sta	sp+1
;
	lda	memtop		; memtop for real code
	sta	sp
	lda	memtop+1
	sta	sp+1
;
; if running under SpartaDos, pass the command line to _main
;
	lda	$0700		; check the sparta flag
	cmp	#'S'
	beq	startcmd	; nope, check for dos xl
;
; Try to guess what OS we're running under.
; Use the algorithm suggested by Dick Curzon.
; Look thru $0A.  Should see a jmp, another jmp,
; and something that's not a jump.
;
	ldy	#0
	lda	(cpaloc),y
	cmp	#$4C		; a jmp?
	bne	start0		; nope, give up
	ldy	#3
	lda	(cpaloc),y
	cmp	#$4C		; a jmp?
	bne	start0		; nope
	ldy	#6
	lda	(cpaloc),y
	cmp	#$4C		; a jmp?
	beq	start0		; if yes, it's mydos or something

startcmd:
;
; parse command line etc.  for now, assume Sparta or Dos XL
;
	clc
	lda	cpaloc		; get pointer to
	adc	#cpcmdb		; cmd buf
	pha			; save lo byte
	lda	cpaloc+1
	adc	#0
	tax			; get hi byte
	pla			; get lo byte back
	jmp	start1
start0:
; no command line, just pass null
	lda	#0
	tax
start1:
	jsr	pushax		; push the ptr
	ldy	#1		; 1 arg
	jsr	__main
;
; fall thru to exit...
;	rts			; done!
;
	.globl	_exit
_exit:
	ldx	origsp
	txs
;	rts
	jmp	(dosvec)
;
