NES: Add support for various tilemap layout (mirroring) configurations (#783)

* NES: Add support for various tilemap layout (mirroring) configurations

- Introduce platform configuration, mainly to define tilemap layout NES_TILEMAP_[F|H|V|S] in platform_cfg.s
  + Add NES_TILEMAP_S|H|V|F setting for tilemap layout, and hard-code to NES_TILEMAP_S for now
  + Add NES_LOMEM setting to enable current default of using part of stack instead of BSS for attribute shadow buffer
  + Edit Makefile to copy platform_cfg.s (if it exists) to platform directory after build

- Updates to attribute shadow buffer and dirty bits:
  + Add NUM_NT define for number of nametables used by a layout, and AT_SHADOW_WIDTH/_HEIGHT to denote the variable dimensions
  + Add convenience defines NT_2W / NT_2H to quickly test whether tilemap layout is two screens wide / high
  + Define _attribute_shadow and _attribute_row/_column_dirty in terms of NUM_NT
  + Update flush_attributes to support all layouts
  + Update get_bkg_xy_addr / set_bkg_tile_xy to support all layouts
  + Update set_bkg_attribute_xy[_nes16x16] to support all layouts
  + Update set_bkg_attributes[_nes16x16] to support all layouts, and correctly wrap to next AT in both directions
  + Update set_bkg_submap_attributes[_nes16x16] to support all layouts
  + Update set_bkg_submap to support all layouts, and contain common inner subroutine .set_bkg_common
  + Replace set_bkg_tiles with simpler implementation calling .set_bkg_common, and correctly wraps to next NT in both directions
  + Add set_bkg_based_tiles / set_bkg_based_submap implementations using the new common subroutine

- Updates to C include files:
  + Define DEVICE_SCREEN_BUFFER_WIDTH/_HEIGHT based on NES_TILEMAP_ setting, doubling high-level size of WIDTH / HEIGHT conditionally
  + Add typedefs scroll_x_t / scroll_y_t as uint8_t or uint16_t based on NES_TILEMAP_ setting
  + Make move_bkg use scroll_x/y_t typedefs, set 9th scroll bit in shadow_PPUCTRL where needed, and compensate for 239->0 y wrapping

- Updates to examples:
  + Update large_map example to use platform-agnostic settings for scroll wrapping and offset
  + Update rle_map to use a uint16_t for scroll position, to support NES_TILEMAP_H and NES_TILEMAP_F settings
This commit is contained in:
Michel Iwaniec
2025-05-19 12:44:45 +01:00
committed by GitHub
parent 2186bc60ec
commit 62de8abb87
15 changed files with 1004 additions and 545 deletions

View File

@@ -264,6 +264,9 @@ gbdk-lib-install-platforms:
for port in $(PORTS); do \
if [ -d "$(GBDKLIBDIR)/libc/targets/$$port/$$plat" ]; then \
cp $(GBDKLIBDIR)/libc/targets/$$port/$$plat/global.s $(BUILDDIR)/lib/$$plat/global.s; \
if [ -f "$(GBDKLIBDIR)/libc/targets/$$port/$$plat/platform_cfg.s" ]; then \
cp $(GBDKLIBDIR)/libc/targets/$$port/$$plat/platform_cfg.s $(BUILDDIR)/lib/$$plat/platform_cfg.s; \
fi \
fi \
done \
done

View File

@@ -9,18 +9,13 @@
#define camera_max_y ((bigmap_mapHeight - DEVICE_SCREEN_HEIGHT) * 8)
#define camera_max_x ((bigmap_mapWidth - DEVICE_SCREEN_WIDTH) * 8)
#if defined(SEGA)
#define WRAP_SCROLL_Y(y) ((y) % 224u)
// For SMS, artifacts are already invisible as screen buffer size is larger than screen size
#define SCROLL_Y_OFFSET 0
#elif defined(NINTENDO)
#define WRAP_SCROLL_Y(y) y
// For GB, artifacts are already invisible as screen buffer size is larger than screen size
#define SCROLL_Y_OFFSET 0
#else
#define WRAP_SCROLL_Y(y) ((y) % 240u)
// For other systems assume height of 240 and adjust Y-scroll 4 pixels down to partly hide artifacts in NTSC overscan
#define WRAP_SCROLL_Y(y) ((y) % (DEVICE_SCREEN_BUFFER_HEIGHT * 8))
// For systems where screen buffer height is equal to screen height, adjust Y-scroll 4 pixels down to partly hide artifacts in NTSC overscan
#if DEVICE_SCREEN_BUFFER_HEIGHT == DEVICE_SCREEN_HEIGHT
#define SCROLL_Y_OFFSET 4
#else
#define SCROLL_Y_OFFSET 0
#endif
#if defined(SEGA)

View File

@@ -16,7 +16,7 @@ INCBIN(map_compressed, "res/map.bin.rle")
INCBIN_EXTERN(map_compressed)
uint8_t data[MAP_DATA_HEIGHT]; // Collision map buffer
uint8_t scrollpos = 0; // Scroll position in pixels
uint16_t scrollpos = 0; // Scroll position in pixels
uint8_t datapos = 0; // x position in tiles inside the collision map
void main(void) {

View File

@@ -42,8 +42,33 @@ __REG(0x4014) OAMDMA;
#define DEVICE_SCREEN_Y_OFFSET 0
#define DEVICE_SCREEN_WIDTH 32
#define DEVICE_SCREEN_HEIGHT 30
#if defined(NES_TILEMAP_F)
// Full tilemap
#define DEVICE_SCREEN_BUFFER_WIDTH 64
#define DEVICE_SCREEN_BUFFER_HEIGHT 60
typedef uint16_t scroll_x_t;
typedef uint16_t scroll_y_t;
#elif defined(NES_TILEMAP_H)
// Horizontally arranged tilemap
#define DEVICE_SCREEN_BUFFER_WIDTH 64
#define DEVICE_SCREEN_BUFFER_HEIGHT 30
typedef uint16_t scroll_x_t;
typedef uint8_t scroll_y_t;
#elif defined(NES_TILEMAP_V)
// Vertically arranged tilemap
#define DEVICE_SCREEN_BUFFER_WIDTH 32
#define DEVICE_SCREEN_BUFFER_HEIGHT 60
typedef uint8_t scroll_x_t;
typedef uint16_t scroll_y_t;
#else
// Single-screen tilemap
#define DEVICE_SCREEN_BUFFER_WIDTH 32
#define DEVICE_SCREEN_BUFFER_HEIGHT 30
typedef uint8_t scroll_x_t;
typedef uint8_t scroll_y_t;
#endif
#define DEVICE_SCREEN_MAP_ENTRY_SIZE 1
#define DEVICE_SPRITE_PX_OFFSET_X 0
#define DEVICE_SPRITE_PX_OFFSET_Y -1

View File

@@ -822,8 +822,6 @@ inline void set_bkg_submap_attributes(uint8_t x, uint8_t y, uint8_t w, uint8_t h
}
extern uint8_t _map_tile_offset;
/** Sets a rectangular region of Background Tile Map.
The offset value in __base_tile__ is added to
the tile ID for each map entry.
@@ -842,11 +840,7 @@ extern uint8_t _map_tile_offset;
@see set_bkg_tiles for more details
*/
inline void set_bkg_based_tiles(uint8_t x, uint8_t y, uint8_t w, uint8_t h, const uint8_t *tiles, uint8_t base_tile) {
_map_tile_offset = base_tile;
set_bkg_tiles(x, y, w, h, tiles);
_map_tile_offset = 0;
}
inline void set_bkg_based_tiles(uint8_t x, uint8_t y, uint8_t w, uint8_t h, const uint8_t *tiles, uint8_t base_tile);
/** Sets a rectangular area of the Background Tile Map using a sub-region
@@ -881,8 +875,6 @@ void set_bkg_submap(uint8_t x, uint8_t y, uint8_t w, uint8_t h, const uint8_t *m
#define set_tile_submap set_bkg_submap
extern uint8_t _submap_tile_offset;
/** Sets a rectangular area of the Background Tile Map using a sub-region
from a source tile map. The offset value in __base_tile__ is added to
the tile ID for each map entry.
@@ -902,11 +894,7 @@ extern uint8_t _submap_tile_offset;
@see set_bkg_submap for more details
*/
inline void set_bkg_based_submap(uint8_t x, uint8_t y, uint8_t w, uint8_t h, const uint8_t *map, uint8_t map_w, uint8_t base_tile) {
_submap_tile_offset = base_tile;
set_bkg_submap(x, y, w, h, map, map_w);
_submap_tile_offset = 0;
}
inline void set_bkg_based_submap(uint8_t x, uint8_t y, uint8_t w, uint8_t h, const uint8_t *map, uint8_t map_w, uint8_t base_tile);
/** Copies a rectangular region of Background Tile Map entries into a buffer.
@@ -982,8 +970,22 @@ uint8_t get_bkg_tile_xy(uint8_t x, uint8_t y) NO_OVERLAY_LOCALS;
@see SHOW_BKG, HIDE_BKG
*/
inline void move_bkg(uint8_t x, uint8_t y) {
bkg_scroll_x = x, bkg_scroll_y = y;
inline void move_bkg(scroll_x_t x, scroll_y_t y) {
// store low 8 bits to shadow scroll registers
bkg_scroll_x = (uint8_t)x;
bkg_scroll_y = (uint8_t)(y >= 240 ? (y - 240) : y);
// store 9th bit of x and y in shadow PPUCTRL register
#if DEVICE_SCREEN_BUFFER_WIDTH > 32 && DEVICE_SCREEN_BUFFER_HEIGHT > 30
uint8_t msb_x = (uint8_t)((x >> 8) & 1);
uint8_t msb_y = (uint8_t)(y >= 240 ? 1 : 0);
shadow_PPUCTRL = (shadow_PPUCTRL & 0xFC) | (msb_y << 1) | msb_x;
#elif DEVICE_SCREEN_BUFFER_WIDTH > 32
uint8_t msb_x = (uint8_t)((x >> 8) & 1);
shadow_PPUCTRL = (shadow_PPUCTRL & 0xFC) | msb_x;
#elif DEVICE_SCREEN_BUFFER_HEIGHT > 30
uint8_t msb_y = (uint8_t)(y >= 240 ? 1 : 0);
shadow_PPUCTRL = (shadow_PPUCTRL & 0xFC) | (msb_y << 1);
#endif
}
@@ -997,7 +999,7 @@ inline void move_bkg(uint8_t x, uint8_t y) {
@see move_bkg
*/
inline void scroll_bkg(int8_t x, int8_t y) {
bkg_scroll_x += x, bkg_scroll_y += y;
move_bkg(bkg_scroll_x + x, bkg_scroll_y + y);
}

View File

@@ -10,8 +10,6 @@
; OAM CPU page
_shadow_OAM = 0x200
; Attribute shadow (64 bytes, leaving 56 bytes available for CPU stack)
_attribute_shadow = 0x188
.macro WRITE_PALETTE_SHADOW
lda #>0x3F00
@@ -68,8 +66,8 @@ _shadow_PPUCTRL:: .ds 1
_shadow_PPUMASK:: .ds 1
_bkg_scroll_x:: .ds 1
_bkg_scroll_y:: .ds 1
_attribute_row_dirty:: .ds 1
_attribute_column_dirty:: .ds 1
_attribute_row_dirty:: .ds NUM_NT
_attribute_column_dirty:: .ds NUM_NT
__oam_valid_display_on:: .ds 1
__SYSTEM:: .ds 1
__hblank_writes_index:: .ds 1
@@ -77,6 +75,13 @@ __hblank_writes_index:: .ds 1
.define __crt0_NMITEMP "___SDCC_m6502_ret4"
.area _BSS
.ifdef NES_LOMEM
; For LOMEM configuration use part of stack page for attribute shadow, leaving 56 bytes for subroutine calls
_attribute_shadow = 0x188
.else
; Otherwise allocate attribute shadow in data segment, with 64 bytes for each NT/AT
_attribute_shadow:: .ds (64*NUM_NT)
.endif
__crt0_paletteShadow:: .ds 25
.mode:: .ds 1
__lcd_isr_PPUCTRL:: .ds (2*.MAX_DEFERRED_ISR_CALLS)

View File

@@ -1,10 +1,41 @@
.include "global.s"
.area GBDKOVR (PAG, OVR)
.x_save: .ds 1
.attribute_row_dirty: .ds 1
.attribute_column_dirty: .ds 1
.area _HOME
_flush_shadow_attributes::
ldx #0
.ifndef NES_TILEMAP_S
1$:
.endif
stx *.x_save
lda *_attribute_row_dirty,x
beq 2$
sta *.attribute_row_dirty
ldy .xy_shift_tab,x
jsr _flush_shadow_attributes_rows
jmp _flush_shadow_attributes_columns
ldx *.x_save
2$:
lda *_attribute_column_dirty,x
beq 3$
sta *.attribute_column_dirty
ldy .xy_shift_tab,x
jsr _flush_shadow_attributes_columns
3$:
ldx *.x_save
lda #0
sta _attribute_row_dirty,x
sta _attribute_column_dirty,x
.ifndef NES_TILEMAP_S
inx
cpx #NUM_NT
bne 1$
.endif
rts
;
; Writes every row of attributes from _shadow_attributes that's been marked
@@ -13,24 +44,23 @@ _flush_shadow_attributes::
_flush_shadow_attributes_rows:
lda #<PPU_AT0
sta *.tmp
lda #>PPU_AT0
lda .ppu_hi_tab,x
sta *.tmp+1
ldy #0
_flush_shadow_attributes_row_loop:
lsr *_attribute_row_dirty
lsr *.attribute_row_dirty
bcc 1$
jmp _flush_shadow_attributes_update_row
1$:
beq _flush_shadow_attributes_end
_flush_shadow_attributes_next_row:
; Y += 8
; Y += AT_SHADOW_WIDTH
tya
clc
adc #8
adc #AT_SHADOW_WIDTH
tay
; .tmp += 8
; .tmp += ATTRIBUTE_PACKED_WIDTH
lda *.tmp
adc #8
adc #ATTRIBUTE_PACKED_WIDTH
sta *.tmp
jmp _flush_shadow_attributes_row_loop
_flush_shadow_attributes_end:
@@ -40,6 +70,7 @@ _flush_shadow_attributes_end:
; Flushes all dirty rows of _attribute_shadow by writing them to PPU memory
;
_flush_shadow_attributes_update_row:
stx *REGTEMP+3
; Update all 8 bytes of row for now, as each row in _attribute_row_dirty only stores 1 bit
; TODO: Could store 8 bytes and update range, at expense of 7 more bytes.
lda *.tmp+1
@@ -54,6 +85,7 @@ _flush_shadow_attributes_update_row:
i = i + 1
.endm
jsr .ppu_stripe_end
ldx *REGTEMP+3
jmp _flush_shadow_attributes_next_row
;
@@ -64,11 +96,10 @@ _flush_shadow_attributes_update_row:
_flush_shadow_attributes_columns:
lda #<PPU_AT0
sta *.tmp
lda #>PPU_AT0
lda .ppu_hi_tab,x
sta *.tmp+1
ldy #0
_flush_shadow_attributes_columns_loop:
lsr *_attribute_column_dirty
lsr *.attribute_column_dirty
bcc 1$
jmp _flush_shadow_attributes_update_column
1$:
@@ -87,11 +118,11 @@ _flush_shadow_attributes_columns_end:
tax
lda *.tmp
clc
adc #(8*i)
adc #(ATTRIBUTE_PACKED_WIDTH*i)
jsr .ppu_stripe_begin_vertical
lda _attribute_shadow+8*i,y
lda _attribute_shadow+AT_SHADOW_WIDTH*i,y
jsr .ppu_stripe_write_byte
lda _attribute_shadow+8*i+32,y
lda _attribute_shadow+AT_SHADOW_WIDTH*i+(AT_SHADOW_WIDTH*4),y
jsr .ppu_stripe_write_byte
jsr .ppu_stripe_end
.endm
@@ -100,6 +131,7 @@ _flush_shadow_attributes_columns_end:
; Flushes all dirty rows of _attribute_shadow by writing them to PPU memory
;
_flush_shadow_attributes_update_column:
stx *REGTEMP+3
; Update all 8 bytes of column for now, as each column in _attribute_column_dirty only stores 1 bit
; As PPU has no increment-by-8 feature, split writes into 4 separate stripes 2 bytes each
; TODO: Could make a dedicated unrolled transfer routine in nmi handler that writes all 8 bytes as one stripe.
@@ -108,4 +140,57 @@ _flush_shadow_attributes_update_column:
WRITEVERT
i = i + 1
.endm
ldx *REGTEMP+3
jmp _flush_shadow_attributes_columns_next_column
; Shift MSB of attribute X / Y (attribute table index) from bits 1 and 0 to 7 and 3
.ifdef NES_TILEMAP_F
.xy_shift_tab:
.db 0b00000000
.db 0b00001000
.db 0b10000000
.db 0b10001000
.endif
.ifdef NES_TILEMAP_S
.xy_shift_tab:
.db 0b00000000
.db 0b00000000
.db 0b00000000
.db 0b00000000
.endif
.ifdef NES_TILEMAP_H
.xy_shift_tab:
.db 0b00000000
.db 0b00001000
.endif
.ifdef NES_TILEMAP_V
.xy_shift_tab:
.db 0b00000000
.db 0b01000000
.endif
; Get hi address of attribute table index
.ifdef NES_TILEMAP_F
.ppu_hi_tab:
.db >PPU_AT0
.db >PPU_AT1
.db >PPU_AT2
.db >PPU_AT3
.endif
.ifdef NES_TILEMAP_S
.ppu_hi_tab:
.db >PPU_AT0
.db >PPU_AT0
.db >PPU_AT0
.db >PPU_AT0
.endif
.ifdef NES_TILEMAP_H
.ppu_hi_tab:
.db >PPU_AT0
.db >PPU_AT1
.endif
.ifdef NES_TILEMAP_V
.ppu_hi_tab:
.db >PPU_AT0
.db >PPU_AT2
.endif

View File

@@ -1,3 +1,5 @@
.include "platform_cfg.s"
;; Maximum number of times LCD ISR can be repeatedly called
.MAX_LCD_ISR_CALLS = 4
;; Total number is +1 to support VBL ISR with the same logic
@@ -22,11 +24,49 @@
.SELECT = 0x20
.START = 0x10
;; Screen dimensions (in tiles)
;; Screen dimensions (in tiles)
.DEVICE_SCREEN_WIDTH = 32
.DEVICE_SCREEN_HEIGHT = 30
;; Buffer dimensions (in tiles)
;; Dependent on tilemap layout
.ifdef NES_TILEMAP_F
NUM_NT = 4
NT_2W = 1
NT_2H = 1
AT_SHADOW_WIDTH = 16
AT_SHADOW_HEIGHT = 16
.DEVICE_SCREEN_BUFFER_WIDTH = 64
.DEVICE_SCREEN_BUFFER_HEIGHT = 60
.endif
.ifdef NES_TILEMAP_H
NUM_NT = 2
NT_2W = 1
NT_2H = 0
AT_SHADOW_WIDTH = 16
AT_SHADOW_HEIGHT = 8
.DEVICE_SCREEN_BUFFER_WIDTH = 64
.DEVICE_SCREEN_BUFFER_HEIGHT = 30
.endif
.ifdef NES_TILEMAP_V
NUM_NT = 2
NT_2W = 0
NT_2H = 1
AT_SHADOW_WIDTH = 8
AT_SHADOW_HEIGHT = 16
.DEVICE_SCREEN_BUFFER_WIDTH = 32
.DEVICE_SCREEN_BUFFER_HEIGHT = 60
.endif
.ifdef NES_TILEMAP_S
NUM_NT = 1
NT_2W = 0
NT_2H = 0
AT_SHADOW_WIDTH = 8
AT_SHADOW_HEIGHT = 8
.DEVICE_SCREEN_BUFFER_WIDTH = 32
.DEVICE_SCREEN_BUFFER_HEIGHT = 30
.endif
.MAXCURSPOSX = 31
.MAXCURSPOSY = 29
@@ -36,6 +76,17 @@
;; NAMETABLES
PPU_NT0 = 0x2000
PPU_AT0 = 0x23C0
PPU_NT1 = 0x2400
PPU_AT1 = 0x27C0
PPU_NT2 = 0x2800
PPU_AT2 = 0x2BC0
PPU_NT3 = 0x2C00
PPU_AT3 = 0x2FC0
NT_WIDTH = 32
NT_HEIGHT = 30
AT_WIDTH = 8
AT_HEIGHT = 8
ATTRIBUTE_WIDTH = 16
ATTRIBUTE_HEIGHT = 15

View File

@@ -0,0 +1,33 @@
;;;
;;; Platform/mapper specific configuration settings.
;;;
;
; Tilemap layout (nametable mirroring) setting.
;
; Available settings:
;
; S: Single-screen layout/mirroring.
; H: Horizontal layout (vertical mirroring)
; V: Vertical layout (horizontal mirroring)
; F: Four-screen layout no mirroring)
;
; Only *one* of these should be enabled.
; The same define should also be passed to LCC for compile-time settings in the C include files.
;
NES_TILEMAP_S = 1
;NES_TILEMAP_H = 1
;NES_TILEMAP_V = 1
;NES_TILEMAP_F = 1
;
; LOMEM setting
;
; This places the 64 bytes of attribute shadow (assuming single-screen layout) in the stack area
; saving 64 bytes of RAM memory for user variables, at the expense of a reduced stack for function calls.
;
; This setting is *only* valid when NES_TILEMAP_S = 1 (using it with other tilmap layouts will corrupt the stack)
;
NES_LOMEM = 1

View File

@@ -3,7 +3,7 @@
.include "global.s"
.area GBDKOVR (PAG, OVR)
_set_bkg_attribute_xy_nes16x16_PARM_3:: .ds 1
_set_bkg_attribute_xy_nes16x16_PARM_3:: .ds 1
.x_odd: .ds 1
.y_odd: .ds 1
.val: .ds 1
@@ -15,13 +15,23 @@ _set_bkg_attribute_xy_nes16x16::
ror *.x_odd
tay
txa
.ifne NT_2H
cmp #(NT_HEIGHT/2)
bcc 0$
sbc #(NT_HEIGHT/2) ; Assumes carry set by cmp
ora #2*AT_HEIGHT
0$:
.endif
lsr
ror *.y_odd
pha
asl
asl
asl
and #0x38
.ifne NT_2W
asl
.endif
and #(AT_SHADOW_WIDTH*AT_SHADOW_HEIGHT-1)
ora .identity,y
tay
lda *_set_bkg_attribute_xy_nes16x16_PARM_3
@@ -51,10 +61,57 @@ _set_bkg_attribute_xy_nes16x16::
; Set dirty bit for row.
; Assume writing rows, as the potential to optimize column writing is limited anyway.
pla
tay
lda .bitmask_dirty_tab,y
.ifne NT_2H
and #AT_HEIGHT-1
.endif
tax
lda .bitmask_dirty_tab,x
; Merge A with current attribute_row_dirty flag
.ifdef NES_TILEMAP_S
ora *_attribute_row_dirty
sta *_attribute_row_dirty
.endif
.ifdef NES_TILEMAP_H
pha
ldx #0
tya
and #AT_WIDTH
beq 10$
inx
10$:
pla
ora *_attribute_row_dirty,x
sta *_attribute_row_dirty,x
.endif
.ifdef NES_TILEMAP_V
pha
ldx #0
tya
and #(AT_HEIGHT*AT_SHADOW_WIDTH)
beq 10$
inx
10$:
pla
ora *_attribute_row_dirty,x
sta *_attribute_row_dirty,x
.endif
.ifdef NES_TILEMAP_F
pha
ldx #0
tya
and #AT_WIDTH
beq 10$
inx
10$:
cpy #(AT_HEIGHT*AT_SHADOW_WIDTH)
bcc 11$
inx
inx
11$:
pla
ora *_attribute_row_dirty,x
sta *_attribute_row_dirty,x
.endif
rts
.mask_tab:

View File

@@ -8,9 +8,11 @@
.ypos: .ds 1
.num_columns: .ds 1
.num_rows: .ds 1
.src: .ds 2
.dst: .ds 2
.height_second_pass: .ds 1
.height_first_pass: .ds 1
.attribute_x_odd: .ds 1
.starty_second_pass: .ds 1
.x_bits: .ds 1
.attribute_y_odd: .ds 1
.attribute_num_columns_odd: .ds 1
.attribute_num_rows_odd: .ds 1
@@ -19,63 +21,122 @@
.define .width "_set_bkg_attributes_nes16x16_PARM_3"
.define .height "_set_bkg_attributes_nes16x16_PARM_4"
.define .tiles "_set_bkg_attributes_nes16x16_PARM_5"
.define .src "_set_bkg_attributes_nes16x16_PARM_5"
;
; Fast version writing directly to PPU memory.
; Does not handle unaligned x & y and assumes even number of columns / rows
;
_set_bkg_attributes_nes16x16_fast::
1$:
lda *.ypos
asl
asl
asl
ora *.xpos
sta *.tmp
lda #>PPU_AT0
sta PPUADDR
lda *.tmp
ora #<PPU_AT0
sta PPUADDR
ldx *.tmp
ldy #0
2$:
lda [*.src],y
sta PPUDATA
sta _attribute_shadow,x
.macro INC_X ?lbl
; Increment dst index
inx
txa
bit *.x_bits
bne lbl
sec
sbc #AT_SHADOW_WIDTH
tax
lbl:
; Increment src index
iny
cpy *.num_columns
bne 2$
; .src += y
tya
clc
adc *.src
sta *.src
lda #0
adc *.src+1
sta *.src+1
inc *.ypos
dec *.num_rows
bne 1$
rts
.endm
_set_bkg_attributes_nes16x16::
pha
.ifne NT_2H
; Two vertical nametables -> wrap around to next
lda #0x10
.else
; One vertical nametable -> wrap around to self
lda #0x00
.endif
sta *.starty_second_pass
txa
ldy #(.DEVICE_SCREEN_HEIGHT/2) ; Row end
cmp #(.DEVICE_SCREEN_HEIGHT/2)
bcc 1$
sbc #(.DEVICE_SCREEN_HEIGHT/2) ; Assumes carry set
.ifne NT_2H
ldy #0x00
sty *.starty_second_pass
.endif
ldy #(2*.DEVICE_SCREEN_HEIGHT/2) ; Row end
1$:
clc
adc *.height
cmp #17
bcs .set_bkg_attributes_nes16x16_wrap_two_passes
; No wrap - just do single pass
pla
jmp _set_bkg_attributes_nes16x16_impl
.set_bkg_attributes_nes16x16_wrap_two_passes:
tya
; num_rows_first = MIN(row_end-y, h);
sec
sbc .identity,x
cmp *.height
bcc 1$
lda *.height
1$:
sta *.height_first_pass
lda *.height
sec
sbc *.height_first_pass
sta *.height_second_pass
; First pass
lda *.height_first_pass
beq 11$
sta *.height
pla
pha
jsr _set_bkg_attributes_nes16x16_impl
11$:
; Second pass
lda *.height_second_pass
beq 3$
lsr *.height_first_pass
sta *.height
bcs 2$
; num_rows_first was even, so attribute data for second pass starts at expected address
; Start second pass at AT==16 (next attribute table)
lda *.starty_second_pass
tax
pla
jmp __set_bkg_attributes_nes16x16_impl_skip_y_adjustment
2$:
; num_rows_first was odd, so attribute data for second pass is half-way through expected address
; Start second pass at AT y==15 to skip one half-row of attribute data (AT y==15 is always hidden)
lda *.starty_second_pass
.ifne NT_2H
; Two vertical nametables -> wrap around to last row of next nametable
eor #0x1F
.else
; One vertical nametable -> wrap around to last row of this nametable
eor #0x0F
.endif
tax
pla
jmp __set_bkg_attributes_nes16x16_impl_skip_y_adjustment
3$:
pla
rts
_set_bkg_attributes_nes16x16_impl::
; Adjust Y-coordinate to skip last unused half-row of 8x8 attribute table
cpx #(2*AT_HEIGHT-1)
bcc 42$
inx
42$:
__set_bkg_attributes_nes16x16_impl_skip_y_adjustment::
stx *.ypos
lsr ; Make xpos count 32x32 areas / full bytes
ror *.attribute_x_odd ; ...and potentially mark x as odd-numbered
sta *.xpos
stx *.ypos
lsr *.ypos ; Make ypos count 32x32 areas / full bytes
ror *.attribute_y_odd ; ...and potentially mark y as odd-numbered
lda *.tiles
sta *.src
lda *.tiles+1
sta *.src+1
lda *.height
lsr ; Make num_rows count 32x32 areas / full bytes
ror *.attribute_num_rows_odd ; ...and mark num_rows as odd-numbered
sta *.num_rows
lda #AT_SHADOW_WIDTH-1
sta *.x_bits
;
lda *.width
lsr ; Make num_columns count 32x32 areas / full bytes
@@ -86,14 +147,15 @@ _set_bkg_attributes_nes16x16::
asl
asl
asl
.ifne NT_2W
asl
.endif
ora *.xpos
clc
adc #<_attribute_shadow
sta *.dst
lda #>_attribute_shadow
adc #0
sta *.dst+1
pha
tax
jsr .attribute_set_dirty
pla
tax
; Branch into distinct routines based on whether x / y are aligned
; (even or odd width / height are handled internally by these routines)
bit *.attribute_y_odd
@@ -116,66 +178,51 @@ _set_bkg_attributes_nes16x16::
; Unaligned Y, aligned X
jmp unaligned_xy
; Boilerplate code for row-loop
.macro INC_SRC_AND_DST
; src += num_columns
lda *.src
clc
adc *.num_columns
sta *.src
lda *.src+1
adc #0
sta *.src+1
; dst += 8 (will never wrap boundary)
lda *.dst
adc #8
sta *.dst
.endm
;
; Version for when x & y are both aligned to attribute byte-grid
;
.macro ALIGNED_XY_RIGHT_EDGE at_mask at_mask_i ?lbl
bit *.attribute_num_columns_odd
bpl lbl
lda [*.dst],y
lda _attribute_shadow,x
and #at_mask_i
sta *.tmp
lda [*.src],y
and #at_mask
ora *.tmp
sta [*.dst],y
sta _attribute_shadow,x
lbl:
.endm
;
aligned_xy:
lda *.num_rows
beq aligned_xy_row_loop_end
aligned_xy_row_loop:
ldy #0
jsr .reset_x_coord
aligned_xy_column_loop:
lda [*.src],y
sta [*.dst],y
iny
sta _attribute_shadow,x
INC_X
cpy *.num_columns
bne aligned_xy_column_loop
; If columns were odd, we have one more byte where only left part should be modified
ALIGNED_XY_RIGHT_EDGE ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_TL, ATTRIBUTE_MASK_BR+ATTRIBUTE_MASK_TR
INC_SRC_AND_DST
dec *.num_rows
jsr .inc_row
bne aligned_xy_row_loop
aligned_xy_row_loop_end:
; If rows were odd, we have one additional row where only top part should be modified
bit *.attribute_num_rows_odd
bpl 1$
ldy #0
jsr .reset_x_coord
2$:
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
sta *.tmp
lda [*.src],y
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
ora *.tmp
sta [*.dst],y
iny
sta _attribute_shadow,x
INC_X
cpy *.num_columns
bne 2$
; If columns were odd, we have one more byte where only top corner should be modified
@@ -190,13 +237,13 @@ aligned_xy_column_loop:
.macro UNALIGNED_X_RIGHT_EDGE at_mask at_mask_i ?lbl
bit *.attribute_num_columns_odd
bmi lbl
lda [*.dst],y
lda _attribute_shadow,x
and #at_mask_i
sta *.tmp
lda *p
and #at_mask
ora *.tmp
sta [*.dst],y
sta _attribute_shadow,x
lbl:
.endm
unaligned_x:
@@ -206,11 +253,13 @@ unaligned_x:
; Odd columns get aligned at right edge - but we need to round num_rows upwards!
inc *.num_columns
8$:
lda *.num_rows
beq unaligned_x_row_loop_end
unaligned_x_row_loop:
ldy #0
jsr .reset_x_coord
; As we're writing output +1 X coordinate to the right, first byte in attribute shadow must be a read-modify-write
; Fill p with old values for TL / BL, to initialize it for subsequent code.
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_BL
sta *p
unaligned_x_column_loop:
@@ -222,28 +271,28 @@ unaligned_x_column_loop:
and #ATTRIBUTE_MASK_TR+ATTRIBUTE_MASK_BR
; ...and combined with p providing left half
ora *p
sta [*.dst],y
sta _attribute_shadow,x
; While right half of src (top-right, bottom right) is saved in p for *next* byte as *left* half
lda [*.src],y
lsr
lsr
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_BL
sta *p
iny
INC_X
cpy *.num_columns
bne unaligned_x_column_loop
; If columns were NOT odd, we have one more byte where only left part should be modified
UNALIGNED_X_RIGHT_EDGE ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_TL, ATTRIBUTE_MASK_BR+ATTRIBUTE_MASK_TR
INC_SRC_AND_DST
dec *.num_rows
jsr .inc_row
bne unaligned_x_row_loop
unaligned_x_row_loop_end:
; If rows were odd, we have one additional row where only top part should be modified
bit *.attribute_num_rows_odd
bpl 5$
ldy #0
jsr .reset_x_coord
; As we're writing output +1 X coordinate to the right, first byte in attribute shadow must be a read-modify-write
; Fill p with old values for TL / BL / BR, to initialize it for subsequent code.
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
sta *p
2$:
@@ -255,14 +304,14 @@ unaligned_x_column_loop:
and #ATTRIBUTE_MASK_TR
; ...and combined with p providing left half
ora *p
sta [*.dst],y
sta _attribute_shadow,x
; While right half of src (top-right, bottom left, bottom right) is saved in p for *next* byte as *left* half
lda [*.src],y
lsr
lsr
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
sta *p
iny
INC_X
cpy *.num_columns
bne 2$
; If columns were odd, we have one more byte where only top-left corner should be modified
@@ -276,7 +325,7 @@ unaligned_x_column_loop:
.macro UNALIGNED_Y_RIGHT_EDGE at_mask at_mask_i ?lbl
bit *.attribute_num_columns_odd
bpl lbl
lda [*.dst],y
lda _attribute_shadow,x
and #at_mask_i
sta *.tmp
lda [*.src],y
@@ -288,7 +337,7 @@ unaligned_x_column_loop:
ora *.tmp
ora *pRow,y
and #at_mask
sta [*.dst],y
sta _attribute_shadow,x
lda [*.src],y
lsr
lsr
@@ -309,16 +358,18 @@ unaligned_y:
; Even rows means additional row needed, but don't round num_rows upwards.
; As we're writing output +1 Y coordinate down, first row in attribute shadow must be a read-modify-write
; Fill pRow with old values for TL / TR, to initialize it for subsequent code.
ldy *.num_columns
dey
jsr .reset_x_coord
unaligned_y_row_init_loop:
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
sta *pRow,y
dey
bpl unaligned_y_row_init_loop
INC_X
cpy *.num_columns
bne unaligned_y_row_init_loop
lda *.num_rows
beq unaligned_y_row_loop_end
unaligned_y_row_loop:
ldy #0
jsr .reset_x_coord
unaligned_y_column_loop:
lda [*.src],y
; Shift to move down one attribute coordinate, as y is unaligned
@@ -329,7 +380,7 @@ unaligned_y_column_loop:
asl
and #ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
ora *pRow,y
sta [*.dst],y
sta _attribute_shadow,x
; ...and bottom part is saved as top part for *next* row
lda [*.src],y
lsr
@@ -339,28 +390,28 @@ unaligned_y_column_loop:
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
sta *pRow,y
;
iny
INC_X
cpy *.num_columns
bne unaligned_y_column_loop
; If columns were odd, we have one more byte where only left part should be modified
UNALIGNED_Y_RIGHT_EDGE ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_TL, ATTRIBUTE_MASK_BR+ATTRIBUTE_MASK_TR
INC_SRC_AND_DST
dec *.num_rows
jsr .inc_row
bne unaligned_y_row_loop
unaligned_y_row_loop_end:
; If rows were NOT odd, we have one additional row where only top part should be modified
; pRow should be used as source data
bit *.attribute_num_rows_odd
bmi 1$
ldy #0
jsr .reset_x_coord
2$:
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
sta *.tmp
lda *pRow,y
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
ora *.tmp
sta [*.dst],y
iny
sta _attribute_shadow,x
INC_X
cpy *.num_columns
bne 2$
; If columns were odd, we have one more byte where only top-left corner should be modified
@@ -374,7 +425,7 @@ unaligned_y_column_loop:
.macro UNALIGNED_XY_RIGHT_EDGE at_mask at_mask_i ?lbl
bit *.attribute_num_columns_odd
bmi lbl
lda [*.dst],y
lda _attribute_shadow,x
and #at_mask_i
sta *.tmp
; top-left taken from previous row...
@@ -389,7 +440,7 @@ unaligned_y_column_loop:
and #ATTRIBUTE_MASK_BL
ora *.tmp
and #at_mask
sta [*.dst],y
sta _attribute_shadow,x
; ...finally, save *bottom-left in previous byte moved to top-left*
lda *p
lsr
@@ -417,30 +468,21 @@ unaligned_xy:
; Even rows means additional row needed, but don't round num_rows upwards.
; As we're writing output +1 Y coordinate down, first row in attribute shadow must be a read-modify-write
; Fill pRow with old values for TL / TR, to initialize it for subsequent code. Shift them in X by +1
ldy #0
lda [*.dst],y
and #ATTRIBUTE_MASK_TL
jsr .reset_x_coord
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_BL
sta *p
unaligned_xy_row_init_loop:
lda [*.dst],y
; TL -> TR
asl
asl
and #ATTRIBUTE_MASK_TR
; Combine with TR -> TL from previous
ora *p
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
sta *pRow,y
lda [*.dst],y
; TR -> TL for next
lsr
lsr
and #ATTRIBUTE_MASK_TL
sta *p
iny
INC_X
cpy *.num_columns
bne unaligned_xy_row_init_loop
lda *.num_rows
beq unaligned_xy_row_loop_end
unaligned_xy_row_loop:
ldy #0
jsr .reset_x_coord
unaligned_xy_column_loop:
lda [*.src],y
; Shift to move down one attribute coordinate, and right one attribute coordinate, as x and y are both unaligned
@@ -461,7 +503,7 @@ unaligned_xy_column_loop:
asl
and #ATTRIBUTE_MASK_BL
ora *.tmp
sta [*.dst],y
sta _attribute_shadow,x
; ...finally, bottom-left part is saved for *next* row, but moved to top right and combined with *bottom-left in previous byte moved to top-left*
lda [*.src],y
lsr
@@ -480,28 +522,28 @@ unaligned_xy_column_loop:
lda [*.src],y
sta *p
;
iny
INC_X
cpy *.num_columns
bne unaligned_xy_column_loop
; If columns were NOT odd, we have one more byte where only left part should be modified
UNALIGNED_XY_RIGHT_EDGE ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_TL, ATTRIBUTE_MASK_BR+ATTRIBUTE_MASK_TR
INC_SRC_AND_DST
dec *.num_rows
jsr .inc_row
bne unaligned_xy_row_loop
unaligned_xy_row_loop_end:
; If rows were NOT odd, we have one additional row where only top part should be modified
; pRow should be used as source data - it has already been pre-shifted correctly
bit *.attribute_num_rows_odd
bmi 1$
ldy #0
jsr .reset_x_coord
2$:
lda [*.dst],y
lda _attribute_shadow,x
and #ATTRIBUTE_MASK_BL+ATTRIBUTE_MASK_BR
sta *.tmp
lda *pRow,y
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
ora *.tmp
sta [*.dst],y
iny
sta _attribute_shadow,x
INC_X
cpy *.num_columns
bne 2$
; If columns were NOT odd, we have one more byte where only top-left corner should be modified
@@ -510,25 +552,129 @@ unaligned_xy_column_loop:
rts
.attribute_set_dirty:
; A = min(7, .num_rows + .attribute_num_rows_odd) << 3
; A = min(AT_HEIGHT-1, .num_rows + .attribute_num_rows_odd) << 3
lda *.attribute_num_rows_odd
cmp #0x80
lda *.num_rows
adc #0
cmp #7
cmp #AT_HEIGHT-1
bcc 1$
lda #7
lda #AT_HEIGHT-1
1$:
sta *.tmp
; X = A | ypos
; Y = A | ypos
lda *.ypos
.ifne NT_2H
and #AT_HEIGHT-1
; Special-case: if we are at last half-row, then ypos actually applies to *next* nametable
; So treat it as row == 0 for next nametable
cmp #AT_HEIGHT-1
bcc 2$
bit *.attribute_y_odd
bpl 2$
txa
adc #AT_SHADOW_WIDTH
and #(AT_SHADOW_WIDTH*AT_SHADOW_HEIGHT-1)
tax
lda #0
2$:
.endif
asl
asl
asl
ora *.tmp
tax
lda .row_dirty_table,x
tay
.ifdef NES_TILEMAP_S
lda .row_dirty_table,y
ora *_attribute_row_dirty
sta *_attribute_row_dirty
.endif
.ifdef NES_TILEMAP_H
ldx #0
jsr .mark_left_and_right_at_dirty
.endif
.ifdef NES_TILEMAP_V
cpx #(AT_SHADOW_WIDTH*AT_SHADOW_HEIGHT/2)
lda #0
rol
tax
lda .row_dirty_table,y
ora *_attribute_row_dirty,x
sta *_attribute_row_dirty,x
.endif
.ifdef NES_TILEMAP_F
cpx #(AT_SHADOW_WIDTH*AT_SHADOW_HEIGHT/2)
lda #0
rol
asl
tax
jsr .mark_left_and_right_at_dirty
.endif
rts
.ifne NT_2W
;
; Marks left and right attribute table dirty, depending on MSB of xpos and width of attributes to write
;
; Input: X = 0 for NES_TILEMAP_H
; 0 or 2 for NES_TILEMAP_F, indexing dirty flags for top/bottom attribute tables
;
.mark_left_and_right_at_dirty:
.define .flip_xpos_msb "DPTR"
.define .x_wrapped_around "DPTR+1"
lda #AT_WIDTH
sta *.flip_xpos_msb
; Store wrapped-around flag to same bit as xpos MSB (AT_WIDTH)
lda *.xpos
sec ; +1 to account for rounding odd coordinates upwards
adc *.num_columns
eor *.xpos
and #AT_WIDTH
sta *.x_wrapped_around
; First loop iteration: Mark left AT dirty if xpos < AT_WIDTH or wrap-around occurred
; Second loop iteration: Mark right AT dirty if xpos >= AT_WIDTH or wrap-around occurred
9$:
lda *.xpos
eor *.flip_xpos_msb
ora *.x_wrapped_around
and #AT_WIDTH
beq 10$
lda .row_dirty_table,y
ora *_attribute_row_dirty,x
sta *_attribute_row_dirty,x
10$:
inx
lda *.flip_xpos_msb
eor #AT_WIDTH
sta *.flip_xpos_msb
beq 9$
rts
.endif
.reset_x_coord:
ldy #0
txa
and #((AT_SHADOW_HEIGHT - 1) * AT_SHADOW_WIDTH)
ora *.xpos
tax
rts
.inc_row:
; src += num_columns
lda *.src
clc
adc *.num_columns
sta *.src
lda *.src+1
adc #0
sta *.src+1
; Increment Y-coordinate of attribute shadow index
txa
adc #AT_SHADOW_WIDTH
and #(AT_SHADOW_WIDTH*AT_SHADOW_HEIGHT-1)
tax
; Decrement num_rows for caller loop
dec *.num_rows
rts
;
@@ -558,7 +704,7 @@ unaligned_xy_column_loop:
.db 0b00111110
.db 0b01111110
.db 0b11111110
.db 0b11111110
.db 0b11111111
; Y = 2
.db 0b00000100
.db 0b00001100
@@ -566,50 +712,50 @@ unaligned_xy_column_loop:
.db 0b00111100
.db 0b01111100
.db 0b11111100
.db 0b11111100
.db 0b11111100
.db 0b11111101
.db 0b11111111
; Y = 3
.db 0b00001000
.db 0b00011000
.db 0b00111000
.db 0b01111000
.db 0b11111000
.db 0b11111000
.db 0b11111000
.db 0b11111000
.db 0b11111001
.db 0b11111011
.db 0b11111111
; Y = 4
.db 0b00010000
.db 0b00110000
.db 0b01110000
.db 0b11110000
.db 0b11110000
.db 0b11110000
.db 0b11110000
.db 0b11110000
.db 0b11110001
.db 0b11110011
.db 0b11110111
.db 0b11111111
; Y = 5
.db 0b00100000
.db 0b01100000
.db 0b11100000
.db 0b11100000
.db 0b11100000
.db 0b11100000
.db 0b11100000
.db 0b11100000
.db 0b11100001
.db 0b11100011
.db 0b11100111
.db 0b11101111
.db 0b11111111
; Y = 6
.db 0b01000000
.db 0b11000000
.db 0b11000000
.db 0b11000000
.db 0b11000000
.db 0b11000000
.db 0b11000000
.db 0b11000000
.db 0b11000001
.db 0b11000011
.db 0b11000111
.db 0b11001111
.db 0b11011111
.db 0b11111111
; Y = 7
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000000
.db 0b10000001
.db 0b10000011
.db 0b10000111
.db 0b10001111
.db 0b10011111
.db 0b10111111
.db 0b11111111

View File

@@ -1,134 +1,42 @@
.include "global.s"
; NOTE: This overlay arrangement MUST match that of set_bkg_based_submap
.area GBDKOVR (PAG, OVR)
_set_bkg_tiles_PARM_3:: .ds 1
_set_bkg_tiles_PARM_4:: .ds 1
_set_bkg_tiles_PARM_5:: .ds 2
.xpos: .ds 1
.ypos: .ds 1
.num_rows: .ds 1
.src_tiles: .ds 2
_set_bkg_tiles_PARM_3::
_set_bkg_based_tiles_PARM_3:: .ds 1
_set_bkg_tiles_PARM_4::
_set_bkg_based_tiles_PARM_4:: .ds 1
_set_bkg_tiles_PARM_5::
_set_bkg_based_tiles_PARM_5:: .ds 2
_set_bkg_based_tiles_PARM_6:: .ds 1
.padding:: .ds 1
.xpos: .ds 1
.ypos: .ds 1
.num_rows: .ds 1
.src_tiles: .ds 2
.remainder: .ds 1
.ppuhi: .ds 1
.area _ZP
__map_tile_offset:: .ds 1
.define .width "_set_bkg_submap_PARM_3"
.define .height "_set_bkg_submap_PARM_4"
.define .tiles "_set_bkg_submap_PARM_5"
.define .map_width "_set_bkg_submap_PARM_6"
.define .tile_offset "_set_bkg_based_submap_PARM_7"
.area _HOME
_set_bkg_tiles::
.define .width "_set_bkg_tiles_PARM_3"
.define .height "_set_bkg_tiles_PARM_4"
.define .tiles "_set_bkg_tiles_PARM_5"
ldy #0
sty *_set_bkg_based_tiles_PARM_6
_set_bkg_based_tiles::
sta *.xpos
stx *.ypos
lda .tiles
sta *.src_tiles
lda .tiles+1
sta *.src_tiles+1
lda *.height
sta *.num_rows
; Prefer vertical stripes if height > width
cmp *.width
beq _set_bkg_tiles_horizontalStripes
bcs _set_bkg_tiles_verticalStripes
_set_bkg_tiles_horizontalStripes:
1$:
lda #0
sta *.tmp+1
lda *.ypos
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
ora *.xpos
sta *.tmp
;
lda *.tmp+1
ora #0x20
tax
lda *.tmp
jsr .ppu_stripe_begin_horizontal
ldx *.width
ldy #0
2$:
lda [*.src_tiles],y
clc
adc *__map_tile_offset
iny
jsr .ppu_stripe_write_byte
dex
bne 2$
jsr .ppu_stripe_end
; .src_tiles += y
tya
clc
adc *.src_tiles
sta *.src_tiles
lda #0
adc *.src_tiles+1
sta *.src_tiles+1
inc *.ypos
dec *.num_rows
bne 1$
rts
.define .num_cols ".num_rows"
_set_bkg_tiles_verticalStripes::
lda *_set_bkg_based_tiles_PARM_6
sta *.tile_offset
lda *.width
sta *.num_cols
ldy #0
1$:
sta *.map_width
lda *.tiles
sta *.src_tiles
lda *.tiles+1
sta *.src_tiles+1
;
lda #0
sta *.tmp+1
lda *.ypos
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
ora *.xpos
sta *.tmp
;
lda *.tmp+1
ora #0x20
tax
lda *.tmp
jsr .ppu_stripe_begin_vertical
ldx *.height
2$:
lda [*.src_tiles],y
clc
adc *__map_tile_offset
jsr .ppu_stripe_write_byte
; .src_tiles += width
lda *.width
clc
adc *.src_tiles
sta *.src_tiles
lda #0
adc *.src_tiles+1
sta *.src_tiles+1
dex
bne 2$
jsr .ppu_stripe_end
iny
inc *.xpos
dec *.num_cols
bne 1$
rts
jmp .set_bkg_common

View File

@@ -3,13 +3,57 @@
.area GBDKOVR (PAG, OVR)
_set_bkg_tile_xy_PARM_3:: .ds 1 ; (shared with _set_vram_byte_PARM_2)
.bkg_tile_ppu_addr:: .ds 2
.ppuhi: .ds 1
.area _HOME
.ifdef NES_TILEMAP_S
.define PPUHI_MASK "#>PPU_NT0"
.else
.define PPUHI_MASK "*.ppuhi"
.endif
_get_bkg_xy_addr::
; XA = (PPU_NT0) | (X << 5) | A
; (A = x_pos, X = y_pos)
.ifne NT_2W
tay
.endif
and #NT_WIDTH-1
sta *.bkg_tile_ppu_addr
.ifne NT_2W
tya
; .ppuhi = (xpos >> 3) & 0b00000100
lsr
lsr
lsr
and #0b00000100
ora #>PPU_NT0
sta *.ppuhi
.else
.ifne NT_2H
lda #>PPU_NT0
sta *.ppuhi
.endif
.endif
.ifne NT_2H
; .ppuhi |= ((ypos / DEVICE_SCREEN_BUFFER_HEIGHT) << 3) & 0b00001000
ldy #0
txa
cmp #NT_HEIGHT
bcc 1$
sbc #NT_HEIGHT ; Assumes carry set by cmp
iny
1$:
tax
tya
asl
asl
asl
and #0b00001000
ora *.ppuhi
sta *.ppuhi
.endif
txa
asl
asl
@@ -22,7 +66,7 @@ _get_bkg_xy_addr::
lda *.bkg_tile_ppu_addr+1
rol
and #0x03
ora #(PPU_NT0 >> 8)
ora PPUHI_MASK
tax
lda *.bkg_tile_ppu_addr
rts

View File

@@ -1,31 +1,49 @@
.include "global.s"
.area GBDKOVR (PAG, OVR)
_set_bkg_submap_PARM_3:: .ds 1
_set_bkg_submap_PARM_4:: .ds 1
_set_bkg_submap_PARM_5:: .ds 2
_set_bkg_submap_PARM_6:: .ds 1
.xpos: .ds 1
.ypos: .ds 1
.num_rows: .ds 1
.src_tiles: .ds 2
.remainder: .ds 1
_set_bkg_submap_PARM_3::
_set_bkg_based_submap_PARM_3:: .ds 1
_set_bkg_submap_PARM_4::
_set_bkg_based_submap_PARM_4:: .ds 1
_set_bkg_submap_PARM_5::
_set_bkg_based_submap_PARM_5:: .ds 2
_set_bkg_submap_PARM_6::
_set_bkg_based_submap_PARM_6:: .ds 1
_set_bkg_based_submap_PARM_7:: .ds 1
.xpos: .ds 1
.ypos: .ds 1
.num_rows: .ds 1
.src_tiles: .ds 2
.remainder: .ds 1
.ppuhi: .ds 1
.stripe_loop_counter: .ds 1
.define .width "_set_bkg_submap_PARM_3"
.define .height "_set_bkg_submap_PARM_4"
.define .tiles "_set_bkg_submap_PARM_5"
.define .map_width "_set_bkg_submap_PARM_6"
.define .tile_offset "_set_bkg_based_submap_PARM_7"
.area _HOME
.ifdef NES_TILEMAP_S
.define PPUHI_MASK "#>PPU_NT0"
.else
.define PPUHI_MASK "*.ppuhi"
.endif
_set_bkg_submap::
.define .width "_set_bkg_submap_PARM_3"
.define .height "_set_bkg_submap_PARM_4"
.define .tiles "_set_bkg_submap_PARM_5"
.define .map_width "_set_bkg_submap_PARM_6"
ldy #0
sty *.tile_offset
_set_bkg_based_submap::
sta *.xpos
stx *.ypos
lda .tiles
CLC
ADC *.xpos
clc
adc *.xpos
sta *.src_tiles
lda .tiles+1
ADC #0
adc #0
sta *.src_tiles+1
; += ypos * map_width
lda *.ypos
@@ -37,18 +55,44 @@ _set_bkg_submap::
txa
adc *.src_tiles+1
sta *.src_tiles+1
.set_bkg_common::
;
lda *.height
sta *.num_rows
; xpos %= DEVICE_SCREEN_WIDTH
.ifne NT_2W
; .ppuhi = (xpos >> 3) & 0b00000100
lda *.xpos
and #.DEVICE_SCREEN_WIDTH-1
lsr
lsr
lsr
and #0b00000100
ora #>PPU_NT0
sta *.ppuhi
.else
.ifne NT_2H
lda #>PPU_NT0
sta *.ppuhi
.endif
.endif
; xpos %= NT_WIDTH
lda *.xpos
and #NT_WIDTH-1
sta *.xpos
; ypos %= DEVICE_SCREEN_HEIGHT
lda #0
clc
FAST_MOD8 *.ypos #.DEVICE_SCREEN_HEIGHT
ldx *.ypos
; ypos %= NT_HEIGHT
lda *.ypos
jsr .div_mod_height
sta *.ypos
.ifne NT_2H
; .ppuhi |= (ypos % DEVICE_SCREEN_BUFFER_HEIGHT) & 0b00001000
txa
asl
asl
asl
and #0b00001000
ora *.ppuhi
sta *.ppuhi
.endif
; Prefer vertical stripes if height > width
lda *.height
cmp *.width
@@ -61,7 +105,7 @@ _set_bkg_submap_horizontalStripes:
clc
adc *.width
sec
sbc #.DEVICE_SCREEN_WIDTH
sbc #NT_WIDTH
sta *.remainder
bmi 1$
lda *.width
@@ -69,81 +113,63 @@ _set_bkg_submap_horizontalStripes:
sbc *.remainder
sta *.width
1$:
; Decrement to allow treating 0-case same as negative
dec *.remainder
_set_bkg_submap_horizontalStripes_rowLoop:
; tmp = PPU_NT0 | (ypos << 5) | xpos
lda #0
sta *.tmp+1
lda *.ypos
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
asl
rol *.tmp+1
ora *.xpos
sta *.tmp
;
lda *.tmp+1
ora #>PPU_NT0
tax
lda *.tmp
jsr .ppu_stripe_begin_horizontal
ldy #0
ldx *.width
1$:
lda [*.src_tiles],y
iny
jsr .ppu_stripe_write_byte
dex
bne 1$
jsr .ppu_stripe_end
stx *.stripe_loop_counter
jsr .setup_stripe_address
ldy #0
ldx *.tmp+1
lda *.tmp
jsr .write_horizontal_stripe
; if wrapped around, write remainder
lda *.remainder
bpl _set_bkg_submap_horizontalStripes_remainder
bmi _set_bkg_submap_horizontalStripes_rowLoopEnd
bne _set_bkg_submap_horizontalStripes_remainder
_set_bkg_submap_horizontalStripes_rowLoopEnd:
; .src_tiles += .map_width
lda *.map_width
clc
adc *.src_tiles
sta *.src_tiles
lda #0
adc *.src_tiles+1
sta *.src_tiles+1
;inc *.ypos
bcc 2$
inc *.src_tiles+1
2$:
; ypos += 1, with wrap back to 0 if gone past bottom of nametable
lda *.ypos
clc
adc #1
cmp #.DEVICE_SCREEN_HEIGHT
cmp #NT_HEIGHT
bcc 1$
lda #0
adc #0xE1 ; 0x02-0x20-C ; (carry assumed set)
.ifne NT_2H
; Flip nametable Y bit after storing wrapped ypos
sta *.ypos
lda *.ppuhi
eor #0b00001000
sta *.ppuhi
bcs 3$ ; Carry still set, use BCS in place of JMP
.endif
1$:
sta *.ypos
3$:
dec *.num_rows
bne _set_bkg_submap_horizontalStripes_rowLoop
rts
_set_bkg_submap_horizontalStripes_remainder:
ldx *.remainder
stx *.stripe_loop_counter
.ifne NT_2W + NT_2H
lda *.tmp+1
ora #>PPU_NT0
ora *.ppuhi
eor #0b00000100
tax
.else
ldx *.tmp+1
.endif
lda *.tmp
and #0xE0 ; Always start remainder at X=0
jsr .ppu_stripe_begin_horizontal
ldx *.remainder
1$:
lda [*.src_tiles],y
iny
jsr .ppu_stripe_write_byte
dex
bpl 1$
jsr .ppu_stripe_end
jsr .write_horizontal_stripe
jmp _set_bkg_submap_horizontalStripes_rowLoopEnd
@@ -155,7 +181,7 @@ _set_bkg_submap_verticalStripes:
clc
adc *.height
sec
sbc #.DEVICE_SCREEN_HEIGHT
sbc #NT_HEIGHT
sta *.remainder
bmi 1$
lda *.height
@@ -163,9 +189,6 @@ _set_bkg_submap_verticalStripes:
sbc *.remainder
sta *.height
1$:
; Decrement to allow treating 0-case same as negative
dec *.remainder
;
lda *.src_tiles
sta *.tiles
lda *.src_tiles+1
@@ -175,12 +198,58 @@ _set_bkg_submap_verticalStripes:
sta *.num_cols
ldy #0
_set_bkg_submap_verticalStripes_columnLoop:
ldx *.height
stx *.stripe_loop_counter
lda *.tiles
sta *.src_tiles
lda *.tiles+1
sta *.src_tiles+1
;
lda #0
jsr .setup_stripe_address
ldx *.tmp+1
lda *.tmp
jsr .write_vertical_stripe
; if wrapped around, write remainder
lda *.remainder
bmi _set_bkg_submap_verticalStripes_columnLoopEnd
bne _set_bkg_submap_verticalStripes_remainder
_set_bkg_submap_verticalStripes_columnLoopEnd:
iny
inc *.xpos
dec *.num_cols
bne _set_bkg_submap_verticalStripes_columnLoop
rts
_set_bkg_submap_verticalStripes_remainder:
ldx *.remainder
stx *.stripe_loop_counter
.ifne NT_2H
lda *.ppuhi
eor #0b00001000
tax
.else
ldx PPUHI_MASK
.endif
lda *.tmp
and #NT_WIDTH-1 ; Always start remainder at Y=0
jsr .write_vertical_stripe
jmp _set_bkg_submap_verticalStripes_columnLoopEnd
.div_mod_height:
ldx #0
sec
1$:
sbc #NT_HEIGHT
bcc 2$
inx
jmp 1$
2$:
adc #NT_HEIGHT
rts
.setup_stripe_address:
; tmp = ppuhi | (ypos << 5) | xpos
lda #(PPU_NT0 >> 13)
sta *.tmp+1
lda *.ypos
asl
@@ -197,53 +266,39 @@ _set_bkg_submap_verticalStripes_columnLoop:
sta *.tmp
;
lda *.tmp+1
ora #>PPU_NT0
tax
lda *.tmp
jsr .ppu_stripe_begin_vertical
ldx *.height
1$:
lda [*.src_tiles],y
jsr .ppu_stripe_write_byte
; .src_tiles += width
lda *.map_width
clc
adc *.src_tiles
sta *.src_tiles
lda #0
adc *.src_tiles+1
sta *.src_tiles+1
dex
bne 1$
jsr .ppu_stripe_end
; if wrapped around, write remainder
lda *.remainder
bpl _set_bkg_submap_verticalStripes_remainder
_set_bkg_submap_verticalStripes_columnLoopEnd:
iny
inc *.xpos
dec *.num_cols
bne _set_bkg_submap_verticalStripes_columnLoop
.ifne NT_2W + NT_2H
ora PPUHI_MASK
.endif
sta *.tmp+1
rts
_set_bkg_submap_verticalStripes_remainder:
ldx #>PPU_NT0
lda *.tmp
and #.DEVICE_SCREEN_WIDTH-1 ; Always start remainder at Y=0
jsr .ppu_stripe_begin_vertical
ldx *.remainder
.write_horizontal_stripe:
jsr .ppu_stripe_begin_horizontal
1$:
lda [*.src_tiles],y
iny
clc
adc *.tile_offset
jsr .ppu_stripe_write_byte
dec *.stripe_loop_counter
bne 1$
jmp .ppu_stripe_end
.write_vertical_stripe:
jsr .ppu_stripe_begin_vertical
1$:
lda [*.src_tiles],y
clc
adc *.tile_offset
jsr .ppu_stripe_write_byte
; .src_tiles += width
lda *.map_width
clc
adc *.src_tiles
sta *.src_tiles
lda #0
adc *.src_tiles+1
sta *.src_tiles+1
dex
bpl 1$
jsr .ppu_stripe_end
jmp _set_bkg_submap_verticalStripes_columnLoopEnd
bcc 2$
inc *.src_tiles+1
2$:
dec *.stripe_loop_counter
bne 1$
jmp .ppu_stripe_end

View File

@@ -14,25 +14,27 @@
.num_columns: .ds 1
.attribute_mask_map: .ds 1
.attribute_mask_shadow: .ds 1
.row_shl_3: .ds 1
.area _HOME
.macro INC_X_WITH_WRAP
txa
clc
adc #1
and #ATTRIBUTE_PACKED_WIDTH-1
ora *.row_shl_3
.macro COORDS_TO_IDX
pha
; ypos bit 2-0 -> bit 5-3
lda *.ypos
and #AT_SHADOW_HEIGHT-1
.ifne NT_2W
asl
.endif
asl
asl
asl
sta *.tmp+2
; xpos bit 2-0 -> bit 2-0
lda *.xpos
and #AT_SHADOW_WIDTH-1
ora *.tmp+2
tax
.endm
.macro INC_XPOS_WITH_WRAP
ldx *.xpos
clc
adc #1
and #ATTRIBUTE_PACKED_WIDTH-1
stx *.xpos
pla
.endm
.macro INC_ROW_SRC
@@ -47,14 +49,70 @@
.endm
.macro INC_Y_WITH_WRAP
txa
lda *.ypos
clc
adc #(1 << 3)
and #0x3F
tax
adc #1
and #AT_SHADOW_HEIGHT-1
sta *.ypos
INC_ROW_SRC
.endm
.macro SET_DIRTY_ROW
lda *.xpos
and #AT_SHADOW_WIDTH-1
tax
lda #0
ldy *.ypos
.ifne NT_2H
cpy #ATTRIBUTE_PACKED_HEIGHT
rol
.endif
.ifne NT_2W
cpx #ATTRIBUTE_PACKED_WIDTH
rol
.endif
tax
lda .bitmask_table,y
ora _attribute_row_dirty,x
sta _attribute_row_dirty,x
.ifne NT_2W
txa
eor #0x1
tax
lda .bitmask_table,y
ora _attribute_row_dirty,x
sta _attribute_row_dirty,x
.endif
.endm
.macro SET_DIRTY_COLUMN
lda *.xpos
and #AT_SHADOW_WIDTH-1
tay
lda #0
ldx *.ypos
.ifne NT_2H
cpx #ATTRIBUTE_PACKED_HEIGHT
rol
.endif
.ifne NT_2W
cpy #ATTRIBUTE_PACKED_WIDTH
rol
.endif
tax
lda .bitmask_table,y
ora _attribute_column_dirty,x
sta _attribute_column_dirty,x
.ifne NT_2H
txa
eor #(1 << NT_2W)
tax
lda .bitmask_table,y
ora _attribute_column_dirty,x
sta _attribute_column_dirty,x
.endif
.endm
_set_bkg_submap_attributes_nes16x16::
.define .width "_set_bkg_submap_attributes_nes16x16_PARM_3"
.define .height "_set_bkg_submap_attributes_nes16x16_PARM_4"
@@ -93,11 +151,8 @@ _set_bkg_submap_attributes_nes16x16::
sta *.ypos
ror *.y_odd
lda *.tiles
clc
adc *.xpos
sta *.src_tiles
lda *.tiles+1
adc #0
sta *.src_tiles+1
lda *.map_width
lsr
@@ -111,13 +166,9 @@ _set_bkg_submap_attributes_nes16x16::
txa
adc *.src_tiles+1
sta *.src_tiles+1
; xpos %= ATTRIBUTE_PACKED_WIDTH
lda *.xpos
and #ATTRIBUTE_PACKED_WIDTH-1
sta *.xpos
; ypos %= ATTRIBUTE_PACKED_HEIGHT
lda *.ypos
and #ATTRIBUTE_PACKED_HEIGHT-1
and #AT_SHADOW_HEIGHT-1
sta *.ypos
; Prefer vertical stripes if height > width
lda *.height
@@ -129,35 +180,46 @@ _set_bkg_submap_attributes_horizontalStripes:
lda *.height
sta *.num_rows
_set_bkg_submap_attributes_horizontalStripes_rowLoop:
;
ldy *.ypos
lda .bitmask_table,y
ora *_attribute_row_dirty
sta *_attribute_row_dirty
;
SET_DIRTY_ROW
lda *.xpos
pha
jsr .process_row
pla
sta *.xpos
jsr .inc_row
dec *.num_rows
bne _set_bkg_submap_attributes_horizontalStripes_rowLoop
bne _set_bkg_submap_attributes_horizontalStripes_rowLoop
rts
_set_bkg_submap_attributes_verticalStripes:
jsr .inc_height_if_wrap
lda *.width
sta *.num_columns
ldy #0
_set_bkg_submap_attributes_verticalStripes_columnLoop:
;
ldx *.xpos
lda .bitmask_table,x
ora *_attribute_column_dirty
sta *_attribute_column_dirty
;
SET_DIRTY_COLUMN
ldy *.xpos
lda *.ypos
pha
lda *.src_tiles
pha
lda *.src_tiles+1
pha
jsr .process_column
INC_XPOS_WITH_WRAP
iny
pla
sta *.src_tiles+1
pla
sta *.src_tiles
pla
sta *.ypos
; Increment X (only if odd/even bit flipped to 0)
lda *.x_odd
eor #0x80
sta *.x_odd
bmi 1$
inc *.xpos
1$:
dec *.num_columns
bne _set_bkg_submap_attributes_verticalStripes_columnLoop
bne _set_bkg_submap_attributes_verticalStripes_columnLoop
rts
.inc_row:
@@ -165,7 +227,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
cmp #ATTRIBUTE_PACKED_HEIGHT-1
beq 2$
lda *.y_odd
adc #0x80
eor #0x80
sta *.y_odd
bmi 1$
lda *.ypos
@@ -178,7 +240,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
rts
2$:
; Skip last 16x16 row of attribute table (empty due to alignment)
lda #0
lda #ATTRIBUTE_PACKED_HEIGHT
sta *.ypos
.inc_row_src:
INC_ROW_SRC
@@ -224,15 +286,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
lda *.width
lsr
sta *.num_columns
lda *.ypos
asl
asl
asl
sta *.row_shl_3
lda *.xpos
ora *.row_shl_3
tax
ldy #0
ldy *.xpos
bit *.x_odd
bpl 2$
; Do a partial update of only TR+BR for the first byte
@@ -244,10 +298,8 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
sta *.tmp
pla
eor #0xFF
and _attribute_shadow,x
ora *.tmp
sta _attribute_shadow,x
INC_X_WITH_WRAP
jsr .write_to_shadow
inc *.xpos
;;;
lda *.width
lsr
@@ -263,11 +315,9 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
iny
and *.attribute_mask_map
sta *.tmp
lda _attribute_shadow,x
and *.attribute_mask_shadow
ora *.tmp
sta _attribute_shadow,x
INC_X_WITH_WRAP
lda *.attribute_mask_shadow
jsr .write_to_shadow
inc *.xpos
dec *.num_columns
bne .column_loop
;
@@ -275,7 +325,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
lda *.width
lsr
ror
eor *.x_odd
eor *.x_odd
bpl 1$
; We have one remaining half-column (16 pixels wide)
; Do a partial update of only TL+BL for the last column
@@ -287,9 +337,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
sta *.tmp
pla
eor #0xFF
and _attribute_shadow,x
ora *.tmp
sta _attribute_shadow,x
jsr .write_to_shadow
1$:
rts
@@ -328,14 +376,6 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
lda *.height
lsr
sta *.num_rows
lda *.ypos
asl
asl
asl
sta *.row_shl_3
lda *.xpos
ora *.row_shl_3
tax
bit *.y_odd
bpl 2$
; Do a partial update of only BL+BR for the first byte
@@ -346,9 +386,7 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
sta *.tmp
pla
eor #0xFF
and _attribute_shadow,x
ora *.tmp
sta _attribute_shadow,x
jsr .write_to_shadow
INC_Y_WITH_WRAP
;;;
lda *.height
@@ -364,11 +402,10 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
lda [*.src_tiles],y
and *.attribute_mask_map
sta *.tmp
lda _attribute_shadow,x
and *.attribute_mask_shadow
ora *.tmp
sta _attribute_shadow,x
INC_Y_WITH_WRAP
lda *.attribute_mask_shadow
jsr .write_to_shadow
inc *.ypos
INC_ROW_SRC
dec *.num_rows
bne .row_loop
;
@@ -384,14 +421,18 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
and #ATTRIBUTE_MASK_TL+ATTRIBUTE_MASK_TR
pha
and [*.src_tiles],y
iny
sta *.tmp
pla
eor #0xFF
jsr .write_to_shadow
1$:
rts
.write_to_shadow:
COORDS_TO_IDX
and _attribute_shadow,x
ora *.tmp
sta _attribute_shadow,x
1$:
rts
.bitmask_table:
@@ -403,3 +444,12 @@ _set_bkg_submap_attributes_verticalStripes_columnLoop:
.db 0b00100000
.db 0b01000000
.db 0b10000000
;
.db 0b00000001
.db 0b00000010
.db 0b00000100
.db 0b00001000
.db 0b00010000
.db 0b00100000
.db 0b01000000
.db 0b10000000