Reduce InstanceData footprint

The InstanceData type in the rendering subsystem was previously 16
floats which occupied a total of 64 bytes per instance. This meant that
for every character or background cell drawn, 64 bytes were sent to the
GPU. In the case of a 400x100 cell grid, a total of 2.5MB would be sent.

This patch reduces InstanceData's size to 26 bytes, a 60% improvement!
Using the above example for comparison, a worst case of 1MB would be
transferred.

The motivation for this patch comes from macOS. Once the terminal grid
would reach a certain size, performance experienced a sharp and dramatic
drop (render times would go from ~3ms to ~16ms). I don't want to
speculate too much on the underlying issue, but suffice it to say that
this patch alleviates the problem in my testing.
This commit is contained in:
Joe Wilm 2017-07-01 09:58:37 -07:00 committed by Joe Wilm
parent 5a220b73db
commit 3cdba29124
4 changed files with 74 additions and 50 deletions

7
Cargo.lock generated
View File

@ -13,6 +13,7 @@ dependencies = [
"font 0.1.0",
"gl_generator 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"glutin 0.6.1 (git+https://github.com/jwilm/glutin?rev=cc64178d39a1fa06b2c5403117e5e0ef24deeac4)",
"half 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.23 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
@ -448,6 +449,11 @@ dependencies = [
"x11-dl 2.14.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "half"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "heapsize"
version = "0.3.9"
@ -1291,6 +1297,7 @@ dependencies = [
"checksum gl_generator 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0940975a4ca12b088d32b5d5134826c47d2e73de4b0b459b05244c01503eccbb"
"checksum gleam 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)" = "9590e0e578d528a080c5abac678e7efbe349a73c7316faafd4073edf5f462d01"
"checksum glutin 0.6.1 (git+https://github.com/jwilm/glutin?rev=cc64178d39a1fa06b2c5403117e5e0ef24deeac4)" = "<none>"
"checksum half 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "63d68db75012a85555434ee079e7e6337931f87a087ab2988becbadf64673a7f"
"checksum heapsize 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "556cd479866cf85c3f671209c85e8a6990211c916d1002c2fcb2e9b7cf60bc36"
"checksum inotify 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e8458c07bdbdaf309c80e2c3304d14c3db64e7465d4f07cf589ccb83fd0ff31a"
"checksum itoa 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "eb2f404fbc66fd9aac13e998248505e7ecb2ad8e44ab6388684c5fb11c6c251c"

View File

@ -33,6 +33,7 @@ clap = "2.20"
fnv = "1.0.5"
unicode-width = "0.1.4"
arraydeque = "0.2"
half = "1.0"
clippy = { version = "0.0.104", optional = true }
[target.'cfg(any(target_os = "linux", target_os = "freebsd", target_os="dragonfly", target_os="openbsd"))'.dependencies]

View File

@ -38,6 +38,7 @@ extern crate errno;
extern crate fnv;
extern crate font;
extern crate glutin;
extern crate half;
extern crate libc;
extern crate mio;
extern crate notify;

View File

@ -27,6 +27,7 @@ use gl::types::*;
use gl;
use index::{Line, Column, RangeInclusive};
use notify::{Watcher as WatcherApi, RecommendedWatcher as Watcher, op};
use half::f16;
use config::{self, Config, Delta};
use term::{self, cell, RenderableCell};
@ -125,14 +126,14 @@ pub struct ShaderProgram {
#[derive(Debug, Clone)]
pub struct Glyph {
tex_id: GLuint,
top: f32,
left: f32,
width: f32,
height: f32,
uv_bot: f32,
uv_left: f32,
uv_width: f32,
uv_height: f32,
top: i16,
left: i16,
width: i16,
height: i16,
uv_bot: f16,
uv_left: f16,
uv_width: f16,
uv_height: f16,
}
/// Naïve glyph cache
@ -281,29 +282,33 @@ impl GlyphCache {
#[derive(Debug)]
#[repr(C)]
struct InstanceData {
// coords
col: f32,
row: f32,
// Grid coordinates of the cell
//
// By storing these as u16, it puts a limit on the max dimensions of the
// terminal to u16 max value. Practically speaking, this shouldn't be a
// problem.
col: u16,
row: u16,
// glyph offset
left: f32,
top: f32,
left: i16,
top: i16,
// glyph scale
width: f32,
height: f32,
width: i16,
height: i16,
// uv offset
uv_left: f32,
uv_bot: f32,
uv_left: f16,
uv_bot: f16,
// uv scale
uv_width: f32,
uv_height: f32,
uv_width: f16,
uv_height: f16,
// color
r: f32,
g: f32,
b: f32,
r: u8,
g: u8,
b: u8,
// background color
bg_r: f32,
bg_g: f32,
bg_b: f32,
bg_r: u8,
bg_g: u8,
bg_b: u8,
}
#[derive(Debug)]
@ -366,8 +371,8 @@ impl Batch {
}
self.instances.push(InstanceData {
col: cell.column.0 as f32,
row: cell.line.0 as f32,
col: cell.column.0 as u16,
row: cell.line.0 as u16,
top: glyph.top,
left: glyph.left,
@ -379,13 +384,13 @@ impl Batch {
uv_width: glyph.uv_width,
uv_height: glyph.uv_height,
r: cell.fg.r as f32,
g: cell.fg.g as f32,
b: cell.fg.b as f32,
r: cell.fg.r,
g: cell.fg.g,
b: cell.fg.b,
bg_r: cell.bg.r as f32,
bg_g: cell.bg.g as f32,
bg_b: cell.bg.b as f32,
bg_r: cell.bg.r,
bg_g: cell.bg.g,
bg_b: cell.bg.b,
});
}
@ -490,38 +495,48 @@ impl QuadRenderer {
(BATCH_MAX * size_of::<InstanceData>()) as isize,
ptr::null(), gl::STREAM_DRAW);
// coords
let mut size = 0;
gl::VertexAttribPointer(1, 2,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_SHORT, gl::FALSE,
size_of::<InstanceData>() as i32,
ptr::null());
gl::EnableVertexAttribArray(1);
gl::VertexAttribDivisor(1, 1);
size += 2 * size_of::<u16>();
// glyphoffset
gl::VertexAttribPointer(2, 4,
gl::FLOAT, gl::FALSE,
gl::SHORT, gl::FALSE,
size_of::<InstanceData>() as i32,
(2 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(2);
gl::VertexAttribDivisor(2, 1);
size += 4 * size_of::<i16>();
// uv
gl::VertexAttribPointer(3, 4,
gl::FLOAT, gl::FALSE,
gl::HALF_FLOAT, gl::FALSE,
size_of::<InstanceData>() as i32,
(6 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(3);
gl::VertexAttribDivisor(3, 1);
size += 4 * size_of::<f16>();
// color
gl::VertexAttribPointer(4, 3,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_BYTE, gl::FALSE,
size_of::<InstanceData>() as i32,
(10 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(4);
gl::VertexAttribDivisor(4, 1);
size += 3 * size_of::<u8>();
// color
gl::VertexAttribPointer(5, 3,
gl::FLOAT, gl::FALSE,
gl::UNSIGNED_BYTE, gl::FALSE,
size_of::<InstanceData>() as i32,
(13 * size_of::<f32>()) as *const _);
size as *const _);
gl::EnableVertexAttribArray(5);
gl::VertexAttribDivisor(5, 1);
@ -1319,14 +1334,14 @@ impl Atlas {
Glyph {
tex_id: self.id,
top: glyph.top as f32,
width: width as f32,
height: height as f32,
left: glyph.left as f32,
uv_bot: uv_bot,
uv_left: uv_left,
uv_width: uv_width,
uv_height: uv_height,
top: glyph.top as i16,
width: width as i16,
height: height as i16,
left: glyph.left as i16,
uv_bot: f16::from_f32(uv_bot),
uv_left: f16::from_f32(uv_left),
uv_width: f16::from_f32(uv_width),
uv_height: f16::from_f32(uv_height),
}
}