mirror of
https://github.com/86Box/86Box.git
synced 2025-01-23 17:52:01 -05:00
Rework (S)VGA renderer to be internally chunky + attempt to optimise for speed
This commit is contained in:
parent
cf36889bd2
commit
44d70ab943
1 changed files with 88 additions and 83 deletions
|
@ -454,11 +454,10 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
int x;
|
||||
uint32_t addr;
|
||||
uint32_t *p;
|
||||
uint8_t edat[4];
|
||||
uint32_t changed_offset;
|
||||
|
||||
const bool blinked = svga->blink & 0x10;
|
||||
const bool attrblink = ((svga->attrregs[0x10] & 0x08) != 0);
|
||||
const bool blinked = svga->blink & 0x10;
|
||||
const bool attrblink = ((svga->attrregs[0x10] & 0x08) != 0);
|
||||
|
||||
/*
|
||||
The following is likely how it works on an IBM VGA - that is, it works with its BIOS.
|
||||
|
@ -466,33 +465,50 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
- S3 Trio: mode 13h (320x200x8), incbypow2 given as 2 treated as 0
|
||||
- ET4000/W32i: mode 2Eh (640x480x8), incevery given as 2 treated as 1
|
||||
*/
|
||||
const bool forcepacked = combine8bits && (svga->force_old_addr || svga->packed_chain4);
|
||||
const bool forcepacked = combine8bits && (svga->force_old_addr || svga->packed_chain4);
|
||||
|
||||
/*
|
||||
SVGA cards with a high-resolution 8bpp mode may actually bypass the VGA shifter logic.
|
||||
- HT-216 (+ other Video7 chipsets?) has 0x3C4.0xC8 bit 4 which, when set to 1, loads
|
||||
bytes directly, bypassing the shifters.
|
||||
*/
|
||||
const bool highres8bpp = combine8bits && highres;
|
||||
const bool highres8bpp = combine8bits && highres;
|
||||
|
||||
const bool dwordload = ((svga->seqregs[0x01] & 0x10) != 0);
|
||||
const bool wordload = ((svga->seqregs[0x01] & 0x04) != 0) && !dwordload;
|
||||
const bool wordincr = ((svga->crtc[0x17] & 0x08) != 0);
|
||||
const bool dwordincr = ((svga->crtc[0x14] & 0x20) != 0) && !wordincr;
|
||||
const bool dwordshift = ((svga->crtc[0x14] & 0x40) != 0);
|
||||
const bool wordshift = ((svga->crtc[0x17] & 0x40) == 0) && !dwordshift;
|
||||
const bool dwordload = ((svga->seqregs[0x01] & 0x10) != 0);
|
||||
const bool wordload = ((svga->seqregs[0x01] & 0x04) != 0) && !dwordload;
|
||||
const bool wordincr = ((svga->crtc[0x17] & 0x08) != 0);
|
||||
const bool dwordincr = ((svga->crtc[0x14] & 0x20) != 0) && !wordincr;
|
||||
const bool dwordshift = ((svga->crtc[0x14] & 0x40) != 0);
|
||||
const bool wordshift = ((svga->crtc[0x17] & 0x40) == 0) && !dwordshift;
|
||||
const uint32_t incbypow2 = forcepacked ? 0 : (dwordshift ? 2 : wordshift ? 1 : 0);
|
||||
const uint32_t incevery = forcepacked ? 1 : (dwordincr ? 4 : wordincr ? 2 : 1);
|
||||
const uint32_t loadevery = forcepacked ? 1 : (dwordload ? 4 : wordload ? 2 : 1);
|
||||
const uint32_t incevery = forcepacked ? 1 : (dwordincr ? 4 : wordincr ? 2 : 1);
|
||||
const uint32_t loadevery = forcepacked ? 1 : (dwordload ? 4 : wordload ? 2 : 1);
|
||||
|
||||
const bool shift4bit = ((svga->gdcreg[0x05] & 0x40) == 0x40 ) || highres8bpp;
|
||||
const bool shift2bit = ((svga->gdcreg[0x05] & 0x60) == 0x20 ) && !shift4bit;
|
||||
const bool shift4bit = ((svga->gdcreg[0x05] & 0x40) == 0x40) || highres8bpp;
|
||||
const bool shift2bit = ((svga->gdcreg[0x05] & 0x60) == 0x20) && !shift4bit;
|
||||
|
||||
const int dwshift = highres ? 0 : 1;
|
||||
const int dotwidth = 1 << dwshift;
|
||||
const int charwidth = dotwidth * (combine8bits ? 4 : 8);
|
||||
const uint8_t blinkmask = (attrblink ? 0x8 : 0x0);
|
||||
const uint8_t blinkval = (attrblink && blinked ? 0x8 : 0x0);
|
||||
const int dwshift = highres ? 0 : 1;
|
||||
const int dotwidth = 1 << dwshift;
|
||||
const int charwidth = dotwidth * (combine8bits ? 4 : 8);
|
||||
const uint32_t planemask = 0x11111111 * (uint32_t) (svga->plane_mask);
|
||||
const uint32_t blinkmask = (attrblink ? 0x88888888 : 0x0);
|
||||
const uint32_t blinkval = (attrblink && blinked ? 0x88888888 : 0x0);
|
||||
|
||||
/*
|
||||
This is actually a 8x 3-bit lookup table,
|
||||
preshifted by 2 bits to allow shifting by multiples of 4 bits.
|
||||
|
||||
Anyway, when we perform a planar-to-chunky conversion,
|
||||
we keep the pixel values in a scrambled order.
|
||||
This lookup table unscrambles them.
|
||||
|
||||
WARNING: Octal values are used here!
|
||||
*/
|
||||
const uint32_t shift_values = (shift4bit
|
||||
? ((067452301) << 2)
|
||||
: shift2bit
|
||||
? ((026370415) << 2)
|
||||
: ((002461357) << 2));
|
||||
|
||||
if ((svga->displine + svga->y_add) < 0)
|
||||
return;
|
||||
|
@ -502,8 +518,7 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
else
|
||||
changed_offset = svga->remap_func(svga, svga->ma) >> 12;
|
||||
|
||||
if (!(svga->changedvram[changed_offset] || svga->changedvram[changed_offset + 1] ||
|
||||
svga->fullchange))
|
||||
if (!(svga->changedvram[changed_offset] || svga->changedvram[changed_offset + 1] || svga->fullchange))
|
||||
return;
|
||||
p = &svga->monitor->target_buffer->line[svga->displine + svga->y_add][svga->x_add];
|
||||
|
||||
|
@ -513,6 +528,7 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
|
||||
uint32_t incr_counter = 0;
|
||||
uint32_t load_counter = 0;
|
||||
uint32_t edat = 0;
|
||||
for (x = 0; x <= (svga->hdisp + svga->scrollcache); x += charwidth) {
|
||||
if (load_counter == 0) {
|
||||
/* Find our address */
|
||||
|
@ -529,7 +545,7 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
if (svga->ma & (4 << 15))
|
||||
addr |= 0x4;
|
||||
} else {
|
||||
if (svga->ma & (4<<13))
|
||||
if (svga->ma & (4 << 13))
|
||||
addr |= 0x4;
|
||||
}
|
||||
} else {
|
||||
|
@ -548,42 +564,31 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
addr &= svga->vram_display_mask;
|
||||
|
||||
/* Load VRAM */
|
||||
*(uint32_t *)&edat[0] = *(uint32_t *)&svga->vram[addr];
|
||||
edat = *(uint32_t *) &svga->vram[addr];
|
||||
|
||||
if (shift4bit) {
|
||||
/*
|
||||
Remap VGA 4bpp-chunky data into fully planar data
|
||||
Plane 3 LSbit is aligned with MSbit
|
||||
*/
|
||||
uint8_t tmpdat[4] = {0, 0, 0, 0};
|
||||
for (int j = 0; j < 4; j++) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
tmpdat[j] <<= 1;
|
||||
tmpdat[j] |= (edat[i>>1] >> (((0x1&~i)<<2)+j)) & 0x1;
|
||||
}
|
||||
/*
|
||||
EGA and VGA actually use 4bpp planar as its native format.
|
||||
But 4bpp chunky is generally easier to deal with on a modern CPU.
|
||||
shift4bit is the native format for this renderer (4bpp chunky).
|
||||
*/
|
||||
if (!shift4bit) {
|
||||
if (shift2bit) {
|
||||
/* Group 2x 2bpp values into 4bpp values */
|
||||
edat = (edat & 0xCCCC3333) | ((edat << 14) & 0x33330000) | ((edat >> 14) & 0x0000CCCC);
|
||||
} else {
|
||||
/* Group 4x 1bpp values into 4bpp values */
|
||||
edat = (edat & 0xAA55AA55) | ((edat << 7) & 0x55005500) | ((edat >> 7) & 0x00AA00AA);
|
||||
edat = (edat & 0xCCCC3333) | ((edat << 14) & 0x33330000) | ((edat >> 14) & 0x0000CCCC);
|
||||
}
|
||||
*(uint32_t *) (&edat[0]) = *(uint32_t *) (&tmpdat[0]);
|
||||
}
|
||||
|
||||
if (shift2bit) {
|
||||
// Remap CGA 2bpp-chunky data into fully planar data
|
||||
uint8_t dat0 = egaremap2bpp[edat[1]] | (egaremap2bpp[edat[0]] << 4);
|
||||
uint8_t dat1 = egaremap2bpp[edat[1] >> 1] | (egaremap2bpp[edat[0] >> 1] << 4);
|
||||
uint8_t dat2 = egaremap2bpp[edat[3]] | (egaremap2bpp[edat[2]] << 4);
|
||||
uint8_t dat3 = egaremap2bpp[edat[3] >> 1] | (egaremap2bpp[edat[2] >> 1] << 4);
|
||||
edat[0] = dat0;
|
||||
edat[1] = dat1;
|
||||
edat[2] = dat2;
|
||||
edat[3] = dat3;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
According to the 82C451 VGA clone chipset datasheet, all 4 planes chain in a ring.
|
||||
So, rotate them all around.
|
||||
Planar version: edat = (edat >> 8) | (edat << 24);
|
||||
Here's the chunky version...
|
||||
*/
|
||||
*(uint32_t *)&edat[0]
|
||||
= ((*(uint32_t *)&edat[0]) >> 8)
|
||||
| ((*(uint32_t *)&edat[0]) << 24);
|
||||
edat = ((edat >> 1) & 0x77777777) | ((edat << 3) & 0x88888888);
|
||||
}
|
||||
load_counter += 1;
|
||||
if (load_counter >= loadevery)
|
||||
|
@ -593,50 +598,50 @@ svga_render_indexed_gfx(svga_t *svga, bool highres, bool combine8bits)
|
|||
if (incr_counter >= incevery) {
|
||||
incr_counter = 0;
|
||||
svga->ma += 4;
|
||||
// DISCREPANCY TODO FIXME 2/4bpp used vram_mask, 8bpp used vram_display_mask --GM
|
||||
/* DISCREPANCY TODO FIXME 2/4bpp used vram_mask, 8bpp used vram_display_mask --GM */
|
||||
svga->ma &= svga->vram_display_mask;
|
||||
}
|
||||
|
||||
uint32_t current_shift = shift_values;
|
||||
uint32_t out_edat = edat;
|
||||
/*
|
||||
Now that we've converted it all to planar, convert it (back?) to chunky!
|
||||
Apply blink
|
||||
FIXME: Confirm blink behaviour on real hardware
|
||||
|
||||
The VGA 4bpp graphics blink logic was a pain to work out.
|
||||
|
||||
If plane 3 is enabled in the attribute controller, then:
|
||||
- if bit 3 is 0, then we force the output of it to be 1.
|
||||
- if bit 3 is 1, then the output blinks.
|
||||
This can be tested with Lotus 1-2-3 release 2.3 with the WYSIWYG addon.
|
||||
|
||||
If plane 3 is disabled in the attribute controller, then the output blinks.
|
||||
This can be tested with QBASIC SCREEN 10 - anything using color #2 should
|
||||
blink and nothing else.
|
||||
|
||||
If you can simplify the following and have it still work, give yourself a medal.
|
||||
*/
|
||||
out_edat = ((out_edat & planemask & ~blinkmask) | ((out_edat | ~planemask) & blinkmask & blinkval)) ^ blinkmask;
|
||||
|
||||
for (int i = 0; i < 8; i += 2) {
|
||||
const int inshift = 6 - i;
|
||||
uint8_t dat
|
||||
= (edatlookup[(edat[0] >> inshift) & 3][(edat[1] >> inshift) & 3])
|
||||
| (edatlookup[(edat[2] >> inshift) & 3][(edat[3] >> inshift) & 3] << 2);
|
||||
|
||||
/* FIXME: Confirm blink behaviour on real hardware
|
||||
|
||||
The VGA 4bpp graphics blink logic was a pain to work out.
|
||||
|
||||
If plane 3 is enabled in the attribute controller, then:
|
||||
- if bit 3 is 0, then we force the output of it to be 1.
|
||||
- if bit 3 is 1, then the output blinks.
|
||||
This can be tested with Lotus 1-2-3 release 2.3 with the WYSIWYG addon.
|
||||
|
||||
If plane 3 is disabled in the attribute controller, then the output blinks.
|
||||
This can be tested with QBASIC SCREEN 10 - anything using color #2 should
|
||||
blink and nothing else.
|
||||
|
||||
If you can simplify the following and have it still work, give yourself a medal.
|
||||
/*
|
||||
c0 denotes the first 4bpp pixel shifted, while c1 denotes the second.
|
||||
For 8bpp modes, the first 4bpp pixel is the upper 4 bits.
|
||||
*/
|
||||
uint32_t c0 = (dat >> 4) & 0xF;
|
||||
uint32_t c1 = dat & 0xF;
|
||||
c0 = ((c0 & svga->plane_mask & ~blinkmask) |
|
||||
((c0 | ~svga->plane_mask) & blinkmask & blinkval)) ^ blinkmask;
|
||||
c1 = ((c1 & svga->plane_mask & ~blinkmask) |
|
||||
((c1 | ~svga->plane_mask) & blinkmask & blinkval)) ^ blinkmask;
|
||||
uint32_t c0 = (out_edat >> (current_shift & 0x1C)) & 0xF;
|
||||
current_shift >>= 3;
|
||||
uint32_t c1 = (out_edat >> (current_shift & 0x1C)) & 0xF;
|
||||
current_shift >>= 3;
|
||||
|
||||
if (combine8bits) {
|
||||
uint32_t ccombined = (c0 << 4) | c1;
|
||||
uint32_t p0 = svga->map8[ccombined];
|
||||
const int outoffs = (i >> 1) << dwshift;
|
||||
uint32_t ccombined = (c0 << 4) | c1;
|
||||
uint32_t p0 = svga->map8[ccombined];
|
||||
const int outoffs = (i >> 1) << dwshift;
|
||||
for (int subx = 0; subx < dotwidth; subx++)
|
||||
p[outoffs + subx] = p0;
|
||||
} else {
|
||||
uint32_t p0 = svga->pallook[svga->egapal[c0]];
|
||||
uint32_t p1 = svga->pallook[svga->egapal[c1]];
|
||||
uint32_t p0 = svga->pallook[svga->egapal[c0]];
|
||||
uint32_t p1 = svga->pallook[svga->egapal[c1]];
|
||||
const int outoffs = i << dwshift;
|
||||
for (int subx = 0; subx < dotwidth; subx++)
|
||||
p[outoffs + subx] = p0;
|
||||
|
|
Loading…
Add table
Reference in a new issue