23 September 2017

Table of Contents

概述

bufio 有三个主要部分,分别是 bufio.Writer bufio.Reader bufio.Scanner
可以通过 bufio.NewWriter(xxx) bufio.NewScanner(xxx) bufio.NewRead(xxx) 来创建

bufio.Writer 主要用来写入一串东西到 Writer
bufio.Reader 和 bufio.Scanner 都是用来读取一串东西到 Reader

Reader 和 Scanner 的区别在于,Reader 需要一个 buf 来存储数据,Scanner 在实现里面已经自带了

r := bufio.NewScanner(os.Stdin)
r.Scan()
fmt.Printf("%v\n", r.Text())    // scanner 会过滤掉换行符


r := bufio.NewReader(os.Stdin)
buf := make([]byte, 64)
r.Read(buf)
fmt.Printf("%v\n", string(buf)) // 会输出两个换行符,buf 会将换行符读入自身

Scanner 还可以自定义读取的方式 SplitFunc

Constants

const (
      // MaxScanTokenSize is the maximum size used to buffer a token
      // unless the user provides an explicit buffer with Scan.Buffer.
      // The actual maximum token size may be smaller as the buffer
      // may need to include, for instance, a newline.
      MaxScanTokenSize = 65 * 1024
)

Variables

var (
      ErrInvalidUnreadByte = errors.New("bufio: invalid use of UnreadByte")
      ErrInvalidUnreadRune = errors.New("bufio: invalid use of UnreadRune")
      ErrBufferFull        = errors.New("bufio: buffer full")
      ErrNegativeCount     = errors.New("bufio: negative count")
)

var (
      ErrTooLong         = errors.New("bufio.Scanner: token too long")
      ErrNegativeAdvance = errors.New("bufio.Scanner: SplitFunc returns negative advance count")
      ErrAdvanceTooFar   = errors.New("bufio.Scanner: SplitFunc returns advance count beyond input")
)

// ErrFinalToken is a special sentinel error value.
// It is intended to be returned by a Split function to indicate that the token being delivered with the error is the last token and scanning should stop after this one.
// After ErrFinalToken is received by Scan, scanning stops with no error.
// The value is useful to stop processing early or when it is necessary to deliver a final empty token.
// One could achieve the same behavior with a custom error value but providing one here is tidier.
// See the emptyFinalToken example for a use of this value.
var ErrFinalToken = errors.New("final token")

type ReadWriter

func NewReadWriter(r *Reader, w *Writer) *ReadWriter

type Reader

func NewReader(rd io.Reader) *Reader

func NewReaderSize(rd io.Reader, size int) *Reader

func (b *Reader) Buffered() int

func (b *Reader) Discard(n int) (discarded int, err error)

func (b *Reader) Peek(n int) ([]byte, error)

func (b *Reader) Read(p []byte) (n int, err error)

func (b *Reader) ReadByte() (byte, error)

func (b *Reader) ReadBytes(delim byte) ([]byte, error)

func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error)

func (b *Reader) ReadRune() (r rune, size int, err error)

func (b *Reader) ReadSlice(delim byte) (line []byte, err error)

func (b *Reader) ReadString(delim byte) (string, error)

func (b *Reader) Reset(r io.Reader)

func (b *Reader) UnreadByte() error

func (b *Reader) UnreadRune() error

func (b *Reader) WriteTo(w io.Writer) (n int64, err error)

type SplitFunc

type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error)

func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanBytes is a split function for a Scanner that returns each byte as a token.

func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanLines is a split function for a Scanner that returns each line of text, stripped of any trailing end-of-line marker. The returned line may be empty. The end-of-line marker is one optional carriage return followed by one mandatory newline. In regular expression notation, it is `\r?\n`. The last non-empty line of input will be returned even if it has no newline.

func ScanRunes(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanRunes is a split function for a Scanner that returns each UTF-8-encoded rune as a token. The sequence of runes returned is equivalent to that from a range loop over the input as a string, which means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd". Because of the Scan interface, this makes it impossible for the client to distinguish correctly encoded replacement runes from encoding errors.

func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error)

ScanWords is a split function for a Scanner that returns each space-separated word of text, with surrounding spaces deleted. It will never return an empty string. The definition of space is set by unicode.IsSpace.

type Scanner

Scanner provides a convenient interface for reading data such as a file of newline-delimited lines of text.

Successive calls to the Scan method will step through the 'tokens' of a file, skipping the bytes between the tokens.

The specification of a token is defined by a split function of type SplitFunc;

the default split function breaks the input into lines with line termination stripped.

Split functions are defined in this package for scanning a file into lines, bytes, UTF-8-encoded runes, and space-delimited words. The client may instead provide a custom split function.

scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
      fmt.Printf("%v\n", scanner.Text())
}

func NewScanner(r io.Reader) *Scanner

func (s *Scanner) Buffer(buf []byte, max int)

sets the initial buffer to use when scanning and the maximum size of buffer that may be allocated during scanning. The maximum token size is the larger of max and cap(buf). If max <= cap(buf), Scan will use this buffer only and do no allocation.

By default, Scan uses an internal buffer and sets the maximum token size to MaxScanTokenSize.

Buffer panics if it is called after scanning has started.

buf 是一个 byte 数组,用来存储当前需要读取的内容,buf 的内容一般都要容得下读取内容。如果不够,会使用 maximum 来扩容
maximum 是一个数值,当 buf 的容量不足的时候,可以扩容到该值。

buf := make([]byte, 10)
scanner.Buffer(buf, 10)
// reading input: bufio.Scanner: token too long

buf := make([]byte, 1)
scanner.Buffer(buf, 10000)
// reading input: bufio.Scanner: token too long

func (s *Scanner) Bytes() []byte

func (s *Scanner) Err() error

func (s *Scanner) Scan() bool

func (s *Scanner) Split(split SplitFunc)

设置 Split 函数

scanner.Split(split)

Split 有下面几种方式:

  • ScanBytes
  • ScanLines
  • ScanRunes
  • ScanWords

rune 和 byte 的区别是:byte 每次读取一个 8bit 的字符,rune 读取的是 utf8

设置了 Split 函数,就表示每次调用一次 scanner.Scan() 函数,是读取 1 Byte、1 Line、1 Rune、或者是 1 Word,默认是 1 Line

const input = "1234 5678 1234567901234567890 1234"

scanner := bufio.NewScanner(strings.NewReader(input))
split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
     advance, token, err = bufio.ScanWords(data, atEOF)

     if err == nil && token != nil {
             _, err = strconv.ParseInt(string(token), 10, 32)
     }
     return
}

scanner.Split(split)

for index := 0; scanner.Scan(); index += 1 {
     fmt.Printf("read %v: %s\n", index, scanner.Text())
}

if err := scanner.Err(); err != nil {
     fmt.Printf("Invalid input: %s", err)
}
// read 0: 1234
// read 1: 5678
// Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range

自定义 SplitFunc

const input = "1,2,3,4,"
scanner := bufio.NewScanner(strings.NewReader(input))

SplitComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
     fmt.Printf("data: %s\n", data)
     for i := 0; i < len(data); i++ {
             if data[i] == ',' {
                     return i + 1, data[:i], nil
             }
     }
     return 0, data, bufio.ErrFinalToken
}

scanner.Split(SplitComma)

for scanner.Scan() {
     fmt.Printf("read: %s\n", scanner.Text())
}

if err := scanner.Err(); err != nil {
     fmt.Fprintln(os.Stderr, "reading input:", err)
}

// data: 1,2,3,4,
// read: 1
//
// data: 2,3,4,
// read: 2
//
// data: 3,4,
// read: 3
//
// data: 4,
// read: 4
//
// data:
// read:

上面的返回函数了 ErrFinalToken

func (s *Scanner) Scan() bool {
     ...
     if err == ErrFinalToken {  // 这里有个判断,如果 err 是 ErrFinalToken,那么 Scan 函数会返回 true
             s.token = token
             s.done = true
             return true
     }
     ...
}

readLine

scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
     fmt.Println(scanner.Text()) // Println will add back the final '\n'
}
if err := scanner.Err(); err != nil {
     fmt.Fprintln(os.Stderr, "reading standard input:", err)
}
const input = "Now is the winter of our discontent,\nMade glorious summer by this sun of York.\n"

scanner := bufio.NewScanner(strings.NewReader(input))

// Set the split function for the scanning operation.
scanner.Split(bufio.ScanWords)

// Count the words.
count := 0
for scanner.Scan() {
     count++
}

if err := scanner.Err(); err != nil {
     fmt.Fprintln(os.Stderr, "reading input:", err)
}
fmt.Printf("%d\n", count)

func (s *Scanner) Text() string

type Writer

w := bufio.NewWriter(os.Stdout)
fmt.Fprint(w, "Hello, ")
fmt.Fprint(w, "world!\n")
fmt.Printf("%v\n", w.Buffered()) // 14
// w.Flush() // Don't forget to flush!

// 如果没有 Flush,就不会输出

func NewWriter(w io.Writer) *Writer

func NewWriterSize(w io.Writer, size int) *Writer

func (b *Writer) Available() int

func (b *Writer) Buffered() int

func (b *Writer) Flush() error

func (b *Writer) ReadFrom(r io.Reader) (n int64, err error)

func (b *Writer) Reset(w io.Writer)

func (b *Writer) Write(p []byte) (nn int, err error)

func (b *Writer) WriteByte(c byte) error

func (b *Writer) WriteRune(r rune) (size int, err error)

func (b *Writer) WriteString(s string) (int, error)