From 6eead1f95cda85863b0476bb4129b71e94aefccf Mon Sep 17 00:00:00 2001 From: Joe Wilm Date: Sun, 20 May 2018 16:04:14 -0700 Subject: [PATCH] Specialize Storage::swap for Row Removes 4 movaps instructions from generated assembly. --- src/grid/mod.rs | 2 +- src/grid/row.rs | 1 - src/grid/storage.rs | 70 ++++++++++++++++++++++++++++++++++++--------- src/lib.rs | 1 + 4 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/grid/mod.rs b/src/grid/mod.rs index 0998d640..252733f3 100644 --- a/src/grid/mod.rs +++ b/src/grid/mod.rs @@ -27,7 +27,7 @@ pub use self::row::Row; mod tests; mod storage; -use self::storage::Storage; +use self::storage::{Storage, Swap}; /// Bidirection iterator pub trait BidirectionalIterator: Iterator { diff --git a/src/grid/row.rs b/src/grid/row.rs index a0805822..d23e9045 100644 --- a/src/grid/row.rs +++ b/src/grid/row.rs @@ -58,7 +58,6 @@ impl Row { } /// Resets contents to the contents of `other` - #[inline(never)] pub fn reset(&mut self, other: &T) { let occ = self.occ; for item in &mut self.inner[..occ] { diff --git a/src/grid/storage.rs b/src/grid/storage.rs index ea220159..a6d0d2a8 100644 --- a/src/grid/storage.rs +++ b/src/grid/storage.rs @@ -15,10 +15,67 @@ use std::ops::{Index, IndexMut}; use std::slice; use index::Line; +use grid::Row; /// Maximum number of invisible lines before buffer is resized const TRUNCATE_STEP: usize = 100; +pub trait Swap { + fn swap(&mut self, _: usize, _: usize); +} + +impl Swap for Storage { + /// Swap two lines in raw buffer + /// + /// # Panics + /// + /// `swap` will panic if either `a` or `b` are out-of-bounds of the + /// underlying storage. + default fn swap(&mut self, a: usize, b: usize) { + let a = self.compute_index(a); + let b = self.compute_index(b); + + self.inner.swap(a, b); + } +} + +impl Swap for Storage> { + /// Custom swap implementation for Row. + /// + /// Exploits the known size of Row to produce a slightly more efficient + /// swap than going through slice::swap. + /// + /// The default implementation from swap generates 8 movups and 4 movaps + /// instructions. This implementation only uses 8 movups instructions. + fn swap(&mut self, a: usize, b: usize) { + use std::mem::{size_of, uninitialized}; + use ::libc::memcpy; + + debug_assert!(size_of::>() == 32); + + let a = self.compute_index(a); + let b = self.compute_index(b); + + unsafe { + // Cast to a u64 array of size 4 to pretend that the data is copy + let a_ptr = self.inner.as_mut_ptr().offset(a as isize) as *mut u64; + let b_ptr = self.inner.as_mut_ptr().offset(b as isize) as *mut u64; + + // Swap space + let mut tmp: u64; + + // Copy 1 qword at a time + // + // The optimizer unrolls this loop and vectorizes it. + for i in 0..4 { + tmp = *a_ptr.offset(i); + *a_ptr.offset(i) = *b_ptr.offset(i); + *b_ptr.offset(i) = tmp; + } + } + } +} + #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Storage { inner: Vec, @@ -205,19 +262,6 @@ impl Storage { self.inner.swap(a, b); } - /// Swap two lines in raw buffer - /// - /// # Panics - /// - /// `swap` will panic if either `a` or `b` are out-of-bounds of the - /// underlying storage. - pub fn swap(&mut self, a: usize, b: usize) { - let a = self.compute_index(a); - let b = self.compute_index(b); - - self.inner.swap(a, b); - } - /// Iterator over *logical* entries in the storage /// /// This *does not* iterate over hidden entries. diff --git a/src/lib.rs b/src/lib.rs index a8e18451..45734eba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ // limitations under the License. // //! Alacritty - The GPU Enhanced Terminal +#![feature(specialization)] #![cfg_attr(feature = "clippy", feature(plugin))] #![cfg_attr(feature = "clippy", plugin(clippy))] #![cfg_attr(feature = "clippy", deny(clippy))]