jep-hack/engine/16/macros.asm
Zeta_Null 2f8a41f833 First Commit
Upload literally everything from the pokecrystal16 expand-move-ID branch
2023-09-10 12:35:35 -04:00

466 lines
10 KiB
NASM

; Some functions are defined as macros so they can be reused for multiple 16-bit tables
; (they are not parameterized due to performance constraints; loads and non-evicting stores should be as fast as possible)
MACRO ___conversion_table_load
; macro arguments: WRAM prefix, constant prefix
; in: a: 8-bit index
; out: hl: 16-bit index; a: clobbered
ld l, a
ld h, $ff
cp \2_MINIMUM_RESERVED_INDEX
ret nc
inc h
and a
ret z
if (\2_ENTRIES + 1) < \2_MINIMUM_RESERVED_INDEX
cp \2_ENTRIES + 1
ld l, h ;return zero for invalid entries (since they will never be allocated)
ret nc
endc
add a, a
ld l, a
if \2_ENTRIES >= $80
; if there are $80 entries or more, we have to deal with carry
adc HIGH(\1Entries)
sub l
ld h, a
else
ld h, HIGH(\1Entries)
endc
ldh a, [rSVBK]
push af
ld a, BANK(\1)
ldh [rSVBK], a
ld a, [hli]
ld h, [hl]
ld l, a
pop af
ldh [rSVBK], a
ret
ENDM
MACRO ___conversion_table_store
; macro arguments: WRAM prefix, constant prefix
; in: hl: 16-bit index
; out: a: 8-bit index; hl: clobbered
; This macro must be followed by garbage collection code! The code will call the function at the end of the macro
; when garbage collection is needed. That code must preserve de and rSVBK.
ld a, l
cp \2_MINIMUM_RESERVED_INDEX
jr c, .not_reserved
inc h
ret z
dec h
.not_reserved
or h
ret z
; we know it's a real index (not zero or a negative reserved value), so check if it is already in the table
ldh a, [rSVBK]
push de
push bc
push af
ld e, l
ld d, h
ld a, BANK(\1)
ldh [rSVBK], a
if \2_CACHE_SIZE
; if we have a cache, check it first, as it is the fastest way
if \2_CACHE_SIZE > 1
ld a, l
xor h
and \2_CACHE_SIZE - 1
add a, LOW(\1EntryCache)
ld l, a
ld h, HIGH(\1EntryCache)
ld a, [hl]
else
ld a, [\1EntryCache]
endc
and a
jr z, .cache_miss
cp \2_ENTRIES + 1
jr nc, .cache_miss
ld c, a
add a, a
ld l, a
if \2_ENTRIES >= $80
; if there are at least $80 entries, we have to deal with carry
adc HIGH(\1Entries)
sub l
ld h, a
else
ld h, HIGH(\1Entries)
endc
ld a, [hli]
cp e
jr nz, .cache_miss
ld a, [hl]
cp d
jp z, .done
.cache_miss
endc
; it's not in the cache (perhaps because we don't even have one), so look for the index in the table
ld hl, \1Entries
ld b, h ;not $FF = don't add to the recent indexes list
DEF ___unroll = 8
if \2_ENTRIES % ___unroll
; Duff's device, gbz80 edition
; note that the block inside the rept is 8 bytes long
db $18, 8 * (___unroll - (\2_ENTRIES % ___unroll)) ;jr <number of bytes skipped>
endc
.search_loop
rept ___unroll
ld a, [hli]
cp e
ld a, [hli]
jr nz, :+
cp d
jr z, .found
:
endr
ld a, l
cp LOW(\1EntriesEnd)
jr nz, .search_loop
if \2_ENTRIES >= $80
bit 0, h
; if HIGH(\1EntriesEnd) is odd, we jump on z; otherwise, we jump on nz
; so we have to encode a "jr z/nz, .search_loop" instruction according to that value - jr z = $28, jr nz = $20
db 8 * (HIGH(\1EntriesEnd) & 1) + $20
db .search_loop - (@ + 1)
endc
; not found - we have to allocate the 16-bit ID on the table and return the new 8-bit ID for it
call .allocate ;returns hl = position + 2
push hl
dec hl
ld a, d
ld [hld], a
ld [hl], e
if \2_ENTRIES >= $80
ld hl, \1UsedSlots
else
ld l, LOW(\1UsedSlots)
endc
inc [hl]
pop hl
ld b, -1 ;$FF = add to the recent indexes list
.found
; hl points past the entry
ld a, l
if \2_ENTRIES >= $7F
; if there enough entries, the offset to \1 is 9 bits wide, capping at $1FE for the maximum $FE entries
; so we set the carry to the value of the top bit of the offset, and then rra this bit into a
srl h
; if HIGH(\1) is even, srl h is correct; otherwise, we have to invert it. Since we can't conditionally include a
; ccf instruction (as it would create a circular dependency between assembler and linker), we have no choice but
; to execute a ccf on odd HIGH(\1) and a nop on even HIGH(\1) by inserting the correct instruction dynamically
; using a db. Note that ccf = $3F and nop = $00.
db (HIGH(\1) & 1) * $3F
rra
else
; with less than $7F entries, the offset is 8 bits wide and always even, so handling it is simpler
rrca
endc
dec a
ld c, a
; if there is a cache, store the found ID in it
if \2_CACHE_SIZE > 1
ld a, e
xor d
and \2_CACHE_SIZE - 1
add a, LOW(\1EntryCache)
ld l, a
ld h, HIGH(\1EntryCache)
ld [hl], c
elif \2_CACHE_SIZE == 1
ld [\1EntryCache], a
endc
; if the found ID was just allocated, record it as a recent allocation
inc b
jr nz, .done
ld hl, \1LastAllocatedIndex
ld a, [hl]
inc a
if \2_SAVED_RECENT_INDEXES & (\2_SAVED_RECENT_INDEXES - 1)
; if the number of recent indexes is not a power of 2, constrain the index to range via a simple comparison...
cp \2_SAVED_RECENT_INDEXES
jr c, .no_recent_overflow
xor a
.no_recent_overflow
else
; ...but if it is a power of 2, it's simpler to just use an and
and \2_SAVED_RECENT_INDEXES - 1
endc
ld [hl], a
add a, LOW(\1LastAllocated)
ld l, a
ld h, HIGH(\1LastAllocated)
ld [hl], c
.done
pop af
ldh [rSVBK], a
ld a, c
pop bc
pop de
ret
.allocate
; check if the table isn't already full
ld hl, \1
ld a, [hli]
cp \2_ENTRIES
jr nc, .no_room
; try starting at the last allocated position + 1
ld c, l ;fallback position in case we don't get a valid one; l = LOW(\1LastAllocatedIndex) = 1 here
ld a, [hl]
add a, LOW(\1LastAllocated)
ld l, a
ld h, HIGH(\1LastAllocated)
ld a, [hl]
and a
jr z, .start_allocation_loop
cp \2_ENTRIES + 1
jr nc, .start_allocation_loop
ld c, a
.start_allocation_loop
; go from position c until the end of the table, trying to find an empty position
ld a, \2_ENTRIES + 1
sub c
ld b, a
ld l, c
sla l
ld h, HIGH(\1Entries)
if \2_ENTRIES >= $80
jr nc, .allocation_loop
inc h
endc
.allocation_loop
ld a, [hli]
or [hl]
inc hl ;even if there is no chance of carry, we need to preserve flags
ret z
dec b
jr nz, .allocation_loop
; no room was found; if we didn't start at c = 1, try with the rest of the table
ld b, c
ld c, 1
if \2_ENTRIES >= $7F
ld hl, \1Entries
else
ld l, LOW(\1Entries)
endc
dec b
jr nz, .allocation_loop
.no_room
; the table is really full; free up slots and try again (WARNING: the loop won't exit until some slots are freed)
call .collect_garbage
ld c, 1
jr .start_allocation_loop
.collect_garbage
; as stated at the top, this macro must be followed by garbage-collection code, which must preserve de and rSVBK
; all other registers are free/clobbers; this code will be called as a function when needed
ENDM
MACRO ___conversion_bitmap_initialize
; macro arguments: WRAM prefix, constant prefix, bit setting function (must preserve de; may be a local label)
; falls through; clobbers all registers (make sure to push de before invoking!)
xor a
ld hl, wConversionTableBitmap
DEF ___unroll = 4
ld c, (\2_ENTRIES + 8 * ___unroll - 1) / (8 * ___unroll)
.initialization_clear_loop
rept ___unroll
ld [hli], a
endr
dec c
jr nz, .initialization_clear_loop
if \2_LOCKED_ENTRIES
ld de, \1LockedEntries
else
ld de, \1LastAllocated
endc
.initialization_locked_loop
ld a, [de]
call \3
inc e
jr nz, .initialization_locked_loop
ENDM
MACRO ___conversion_bitmap_check_structs
; macro arguments: struct pointer, struct length, struct count, bit setting function
; may clobber anything; falls through
DEF ___unroll = 8
if (\3) <= ___unroll
FOR ___iteration, \3
ld a, [(\1) + ___iteration * (\2)]
call \4
endr
else
ld de, \1
ld hl, wTempLoopCounter
ld [hl], ((\3) + ___unroll - 1) / ___unroll
if (\3) % ___unroll
; again, Duff's device - the body of the rept is 10 bytes long
db $18, 10 * (___unroll - ((\3) % ___unroll))
endc
.check_loop\@
rept ___unroll
ld a, [de]
ld hl, \2
add hl, de
ld d, h
ld e, l
call \4
endr
ld hl, wTempLoopCounter
dec [hl]
jr nz, .check_loop\@
endc
ENDM
MACRO ___conversion_bitmap_check_values
; macro arguments: bit setting function, address, address, address...
for ___unroll, 2, _NARG + 1
ld a, [\<___unroll>]
call \1
endr
ENDM
MACRO ___conversion_bitmap_free_unused
; macro arguments: WRAM prefix, constant prefix
ld bc, \2_ENTRIES >> 3
ld de, wConversionTableBitmap
ld a, [de]
inc e
ld hl, \1Entries
.value_removal_loop
push de
ld e, a
; no ___unroll here since we rely on there being 8 bits per byte
FOR ___iteration, 8
srl e
ld a, [hli]
jr nc, :+
or [hl]
jr nz, :++
:
xor a
ld [hld], a
ld [hl], a
dec b
:
set 0, l ;so hl points to the second byte of the entry regardless of whether it was cleared
if (\2_ENTRIES >= $80) && (___iteration == 6)
inc hl ;the only iteration with any chance of carry is this one
else
inc l
endc
endr
pop de
ld a, [de]
inc e
dec c
jr nz, .value_removal_loop
if \2_ENTRIES & 7
; handle the few remaining entries that couldn't be handled by the loop
ld e, a
FOR ___iteration, \2_ENTRIES & 7
; same loop as above
srl e
ld a, [hli]
db $30, 3 ;jr nc, <skip 3 bytes>
or [hl]
db $20, 4 ;jr nz, <skip 4 bytes>
xor a
ld [hld], a
ld [hl], a
dec b
if (___iteration + 1) < (\2_ENTRIES & 7)
; no point incrementing the pointer if it is the last iteration
set 0, l
inc l ;no overflow is possible here
endc
endr
endc
; b contains minus the number of cleared entries now
ld a, \2_ENTRIES
add a, b
ld [\1UsedSlots], a
ENDM
MACRO ___conversion_bitmap_set
; macro argument: constant prefix
; in: a: index - sets the corresponding bit in wConversionTableBitmap if the index is in range
dec a
cp \1_ENTRIES
ret nc
; what follows duplicates FlagAction in part, but calling the function is slow
swap a
rlca
ld b, a
and $1f
add a, LOW(wConversionTableBitmap)
ld l, a
ld a, b
ld h, HIGH(wConversionTableBitmap)
ld c, [hl]
inc c
ret z
rlca
add a, a
ld b, a
sbc a
and 3
inc a
sla b
jr nc, .bitmap_set_skip_shift
add a, a
.bitmap_set_skip_shift
bit 2, b
jr z, .bitmap_set_skip_swap
swap a
.bitmap_set_skip_swap
or [hl]
ld [hl], a
ret
ENDM
MACRO ___conversion_table_lock_ID
; macro arguments: WRAM prefix, constant prefix
; in: h = 8-bit index or zero (to clear), l = position
; out: a = original h, hl = clobbered, carry = set if error
ld a, l
cp \2_LOCKED_ENTRIES
ccf
ld a, h
ret c
cp \2_ENTRIES + 1
ccf
ret c
ldh a, [rSVBK]
push af
ld a, BANK(\1)
ldh [rSVBK], a
ld a, LOW(\1LockedEntries)
add a, l
ld l, a
ld a, h
ld h, HIGH(\1LockedEntries)
ld [hl], a
ld h, a
pop af ;carry was clear when pushed, so it remains clear
ldh [rSVBK], a
ld a, h
ret
ENDM