alacritty/src/io.rs

// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Unmerged utf8 chars iterator vendored from std::io
//!
use std::io::{BufRead, ErrorKind, Error};
use std::fmt;
use std::error as std_error;
use std::result;
use std::char;

static UTF8_CHAR_WIDTH: [u8; 256] = [
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
];

/// Given a first byte, determine how many bytes are in this UTF-8 character
#[inline]
pub fn utf8_char_width(b: u8) -> usize {
    return UTF8_CHAR_WIDTH[b as usize] as usize;
}

/// An iterator over the `char`s of a reader.
///
/// This struct is generally created by calling [`utf8_chars()`][utf8_chars] on a reader.
/// Please see the documentation of `utf8_chars()` for more details.
///
/// [utf8_chars]: trait.BufRead.html#method.utf8_chars
pub struct Utf8Chars<R> {
    inner: R,
}

impl<R> Utf8Chars<R> {
    pub fn new(inner: R) -> Utf8Chars<R> {
        Utf8Chars { inner: inner }
    }
}

/// An enumeration of possible errors that can be generated from the `Utf8Chars`
/// adapter.
#[derive(Debug)]
pub enum Utf8CharsError {
    /// Variant representing that the underlying stream was read successfully
    /// but contains a byte sequence ill-formed in UTF-8.
    InvalidUtf8,

    /// Variant representing that the underlying stream contains the start
    /// of a byte sequence well-formed in UTF-8, but ends prematurely.
    ///
    /// Contains number of unused bytes
    IncompleteUtf8(usize),

    /// Variant representing that an I/O error occurred.
    Io(Error),
}

impl<R: BufRead> Iterator for Utf8Chars<R> {
    type Item = result::Result<char, Utf8CharsError>;

    // allow(unused_assignments) because consumed += 1 is not recognized as being used
    #[allow(unused_assignments)]
    fn next(&mut self) -> Option<result::Result<char, Utf8CharsError>> {
        macro_rules! read_byte {
            (EOF => $on_eof: expr) => {
                {
                    let byte;
                    loop {
                        match self.inner.fill_buf() {
                            Ok(buffer) => {
                                if let Some(&b) = buffer.first() {
                                    byte = b;
                                    break
                                } else {
                                    $on_eof
                                }
                            }
                            Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
                            Err(e) => return Some(Err(Utf8CharsError::Io(e))),
                        }
                    }
                    byte
                }
            }
        }

        let first = read_byte!(EOF => return None);
        self.inner.consume(1);

        let mut consumed = 1;

        macro_rules! continuation_byte {
            ($range: pat) => {
                {
                    match read_byte!(EOF => return Some(Err(Utf8CharsError::IncompleteUtf8(consumed)))) {
                        byte @ $range => {
                            self.inner.consume(1);
                            consumed += 1;
                            (byte & 0b0011_1111) as u32
                        }
                        _ => return Some(Err(Utf8CharsError::InvalidUtf8))
                    }
                }
            }
        }

        // Ranges can be checked against https://tools.ietf.org/html/rfc3629#section-4
        let code_point = match utf8_char_width(first) {
            1 => return Some(Ok(first as char)),
            2 => {
                let second = continuation_byte!(0x80...0xBF);
                ((first & 0b0001_1111) as u32) << 6 | second
            }
            3 => {
                let second = match first {
                    0xE0        => continuation_byte!(0xA0...0xBF),
                    0xE1...0xEC => continuation_byte!(0x80...0xBF),
                    0xED        => continuation_byte!(0x80...0x9F),
                    0xEE...0xEF => continuation_byte!(0x80...0xBF),
                    _ => unreachable!(),
                };
                let third = continuation_byte!(0x80...0xBF);
                ((first & 0b0000_1111) as u32) << 12 | second << 6 | third
            }
            4 => {
                let second = match first {
                    0xF0        => continuation_byte!(0x90...0xBF),
                    0xF0...0xF3 => continuation_byte!(0x80...0xBF),
                    0xF4        => continuation_byte!(0x80...0x8F),
                    _ => unreachable!(),
                };
                let third = continuation_byte!(0x80...0xBF);
                let fourth = continuation_byte!(0x80...0xBF);
                ((first & 0b0000_0111) as u32) << 18 | second << 12 | third << 6 | fourth
            }
            _ => return Some(Err(Utf8CharsError::InvalidUtf8))
        };
        unsafe {
            Some(Ok(char::from_u32_unchecked(code_point)))
        }
    }
}

impl std_error::Error for Utf8CharsError {
    fn description(&self) -> &str {
        match *self {
            Utf8CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",
            Utf8CharsError::IncompleteUtf8(_) => {
                "stream ended in the middle of an UTF-8 byte sequence"
            }
            Utf8CharsError::Io(ref e) => std_error::Error::description(e),
        }
    }
    fn cause(&self) -> Option<&std_error::Error> {
        match *self {
            Utf8CharsError::InvalidUtf8 | Utf8CharsError::IncompleteUtf8(_) => None,
            Utf8CharsError::Io(ref e) => e.cause(),
        }
    }
}

impl fmt::Display for Utf8CharsError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Utf8CharsError::InvalidUtf8 => {
                "invalid UTF-8 byte sequence".fmt(f)
            }
            Utf8CharsError::IncompleteUtf8(_) => {
                "stream ended in the middle of an UTF-8 byte sequence".fmt(f)
            }
            Utf8CharsError::Io(ref e) => e.fmt(f),
        }
    }
}
Vendor upcoming Utf8Chars iterator from libstd The upcoming Utf8Chars iterator was vendored from a libstd PR. The iterator works on BufRead types which is critical for improving performance. A small modification was made where the number of unused bytes is included with Utf8CharsError::IncompleteUtf8. The pty reader thread was updated to use this new type. Next steps will be moving the parsing there and either sending parse results in batches or updating the terminal directly from that thread. 2016-07-01 04:26:26 +00:00			`// Copyright 2015 The Rust Project Developers. See the COPYRIGHT`
			`// file at the top-level directory of this distribution and at`
			`// http://rust-lang.org/COPYRIGHT.`
			`//`
			`// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or`
			`// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license`
			`// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your`
			`// option. This file may not be copied, modified, or distributed`
			`// except according to those terms.`

			`//! Unmerged utf8 chars iterator vendored from std::io`
			`//!`
			`use std::io::{BufRead, ErrorKind, Error};`
			`use std::fmt;`
			`use std::error as std_error;`
			`use std::result;`
			`use std::char;`

			`static UTF8_CHAR_WIDTH: [u8; 256] = [`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,`
			`1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F`
			`0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,`
			`0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F`
			`0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,`
			`0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF`
			`0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,`
			`2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF`
			`3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF`
			`4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF`
			`];`

			`/// Given a first byte, determine how many bytes are in this UTF-8 character`
			`#[inline]`
			`pub fn utf8_char_width(b: u8) -> usize {`
			`return UTF8_CHAR_WIDTH[b as usize] as usize;`
			`}`

			/// An iterator over the `char`s of a reader.
			`///`
			/// This struct is generally created by calling [`utf8_chars()`][utf8_chars] on a reader.
			/// Please see the documentation of `utf8_chars()` for more details.
			`///`
			`/// [utf8_chars]: trait.BufRead.html#method.utf8_chars`
			`pub struct Utf8Chars<R> {`
			`inner: R,`
			`}`

			`impl<R> Utf8Chars<R> {`
			`pub fn new(inner: R) -> Utf8Chars<R> {`
			`Utf8Chars { inner: inner }`
			`}`
			`}`

			/// An enumeration of possible errors that can be generated from the `Utf8Chars`
			`/// adapter.`
			`#[derive(Debug)]`
			`pub enum Utf8CharsError {`
			`/// Variant representing that the underlying stream was read successfully`
			`/// but contains a byte sequence ill-formed in UTF-8.`
			`InvalidUtf8,`

			`/// Variant representing that the underlying stream contains the start`
			`/// of a byte sequence well-formed in UTF-8, but ends prematurely.`
			`///`
			`/// Contains number of unused bytes`
Improve pty reading and renderer synchronization The pty read thread now runs the parser and directly updates the terminal in the same thread. This obviates the need for a channel which sends every char read from the pty; this is a huge performance boon. Synchronization between the updater and the renderer is now achieved with a PriorityMutex. Previously, an atomic bool was (poorly) used to request the lock on terminal. The PriorityMutex is dead simple to use, and it _Just Works_. 2016-07-01 17:34:08 +00:00			`IncompleteUtf8(usize),`
Vendor upcoming Utf8Chars iterator from libstd The upcoming Utf8Chars iterator was vendored from a libstd PR. The iterator works on BufRead types which is critical for improving performance. A small modification was made where the number of unused bytes is included with Utf8CharsError::IncompleteUtf8. The pty reader thread was updated to use this new type. Next steps will be moving the parsing there and either sending parse results in batches or updating the terminal directly from that thread. 2016-07-01 04:26:26 +00:00
			`/// Variant representing that an I/O error occurred.`
			`Io(Error),`
			`}`

			`impl<R: BufRead> Iterator for Utf8Chars<R> {`
			`type Item = result::Result<char, Utf8CharsError>;`

			`// allow(unused_assignments) because consumed += 1 is not recognized as being used`
			`#[allow(unused_assignments)]`
			`fn next(&mut self) -> Option<result::Result<char, Utf8CharsError>> {`
			`macro_rules! read_byte {`
			`(EOF => $on_eof: expr) => {`
			`{`
			`let byte;`
			`loop {`
			`match self.inner.fill_buf() {`
			`Ok(buffer) => {`
			`if let Some(&b) = buffer.first() {`
			`byte = b;`
			`break`
			`} else {`
			`$on_eof`
			`}`
			`}`
			`Err(ref e) if e.kind() == ErrorKind::Interrupted => {}`
			`Err(e) => return Some(Err(Utf8CharsError::Io(e))),`
			`}`
			`}`
			`byte`
			`}`
			`}`
			`}`

			`let first = read_byte!(EOF => return None);`
			`self.inner.consume(1);`

			`let mut consumed = 1;`

			`macro_rules! continuation_byte {`
			`($range: pat) => {`
			`{`
			`match read_byte!(EOF => return Some(Err(Utf8CharsError::IncompleteUtf8(consumed)))) {`
			`byte @ $range => {`
			`self.inner.consume(1);`
			`consumed += 1;`
			`(byte & 0b0011_1111) as u32`
			`}`
			`_ => return Some(Err(Utf8CharsError::InvalidUtf8))`
			`}`
			`}`
			`}`
			`}`

			`// Ranges can be checked against https://tools.ietf.org/html/rfc3629#section-4`
			`let code_point = match utf8_char_width(first) {`
			`1 => return Some(Ok(first as char)),`
			`2 => {`
			`let second = continuation_byte!(0x80...0xBF);`
			`((first & 0b0001_1111) as u32) << 6 \| second`
			`}`
			`3 => {`
			`let second = match first {`
			`0xE0 => continuation_byte!(0xA0...0xBF),`
			`0xE1...0xEC => continuation_byte!(0x80...0xBF),`
			`0xED => continuation_byte!(0x80...0x9F),`
			`0xEE...0xEF => continuation_byte!(0x80...0xBF),`
			`_ => unreachable!(),`
			`};`
			`let third = continuation_byte!(0x80...0xBF);`
			`((first & 0b0000_1111) as u32) << 12 \| second << 6 \| third`
			`}`
			`4 => {`
			`let second = match first {`
			`0xF0 => continuation_byte!(0x90...0xBF),`
			`0xF0...0xF3 => continuation_byte!(0x80...0xBF),`
			`0xF4 => continuation_byte!(0x80...0x8F),`
			`_ => unreachable!(),`
			`};`
			`let third = continuation_byte!(0x80...0xBF);`
			`let fourth = continuation_byte!(0x80...0xBF);`
			`((first & 0b0000_0111) as u32) << 18 \| second << 12 \| third << 6 \| fourth`
			`}`
			`_ => return Some(Err(Utf8CharsError::InvalidUtf8))`
			`};`
			`unsafe {`
			`Some(Ok(char::from_u32_unchecked(code_point)))`
			`}`
			`}`
			`}`

			`impl std_error::Error for Utf8CharsError {`
			`fn description(&self) -> &str {`
			`match *self {`
			`Utf8CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",`
			`Utf8CharsError::IncompleteUtf8(_) => {`
			`"stream ended in the middle of an UTF-8 byte sequence"`
			`}`
			`Utf8CharsError::Io(ref e) => std_error::Error::description(e),`
			`}`
			`}`
			`fn cause(&self) -> Option<&std_error::Error> {`
			`match *self {`
			`Utf8CharsError::InvalidUtf8 \| Utf8CharsError::IncompleteUtf8(_) => None,`
			`Utf8CharsError::Io(ref e) => e.cause(),`
			`}`
			`}`
			`}`

			`impl fmt::Display for Utf8CharsError {`
			`fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {`
			`match *self {`
			`Utf8CharsError::InvalidUtf8 => {`
			`"invalid UTF-8 byte sequence".fmt(f)`
			`}`
			`Utf8CharsError::IncompleteUtf8(_) => {`
			`"stream ended in the middle of an UTF-8 byte sequence".fmt(f)`
			`}`
			`Utf8CharsError::Io(ref e) => e.fmt(f),`
			`}`
			`}`
			`}`