markdown: Fix large markdown files that got truncated

This commit is contained in:
Matthew Holt 2015-07-18 12:57:16 -06:00
parent a74b20f278
commit 00997db5ae

View file

@ -5,6 +5,7 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"github.com/BurntSushi/toml" "github.com/BurntSushi/toml"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"
@ -150,6 +151,7 @@ func (y *YAMLMetadataParser) Parse(b []byte) ([]byte, error) {
if err != nil { if err != nil {
return markdown, err return markdown, err
} }
m := make(map[string]interface{}) m := make(map[string]interface{})
if err := yaml.Unmarshal(b, &m); err != nil { if err := yaml.Unmarshal(b, &m); err != nil {
return markdown, err return markdown, err
@ -187,52 +189,46 @@ func (y *YAMLMetadataParser) Closing() []byte {
return []byte("---") return []byte("---")
} }
// extractMetadata extracts metadata content from a page. // extractMetadata separates metadata content from from markdown content in b.
// it returns the metadata, the remaining bytes (markdown), // It returns the metadata, the remaining bytes (markdown), and an error, if any.
// and an error if any.
// Useful for MetadataParser with defined identifiers (YAML, TOML)
func extractMetadata(parser MetadataParser, b []byte) (metadata []byte, markdown []byte, err error) { func extractMetadata(parser MetadataParser, b []byte) (metadata []byte, markdown []byte, err error) {
b = bytes.TrimSpace(b) b = bytes.TrimSpace(b)
reader := bytes.NewBuffer(b) reader := bufio.NewReader(bytes.NewBuffer(b))
scanner := bufio.NewScanner(reader)
// Read first line // Read first line, which should indicate metadata or not
if !scanner.Scan() { line, err := reader.ReadBytes('\n')
// if no line is read, if err != nil || !bytes.Equal(bytes.TrimSpace(line), parser.Opening()) {
// assume metadata not present return nil, b, fmt.Errorf("first line missing expected metadata identifier")
return nil, b, nil
}
line := bytes.TrimSpace(scanner.Bytes())
if !bytes.Equal(line, parser.Opening()) {
return nil, b, fmt.Errorf("wrong identifier")
} }
// buffer for metadata contents // buffer for metadata contents
buf := bytes.Buffer{} metaBuf := bytes.Buffer{}
// Read remaining lines until closing identifier is found // Read remaining lines until closing identifier is found
for scanner.Scan() { for {
line := scanner.Bytes() line, err := reader.ReadBytes('\n')
if err != nil {
// if closing identifier found if err == io.EOF {
if bytes.Equal(bytes.TrimSpace(line), parser.Closing()) { // no closing metadata identifier found
return nil, nil, fmt.Errorf("metadata not closed ('%s' not found)", parser.Closing())
// get the scanner to return remaining bytes }
scanner.Split(func(data []byte, atEOF bool) (int, []byte, error) { return nil, nil, err
return len(data), data, nil
})
// scan the remaining bytes
scanner.Scan()
return buf.Bytes(), scanner.Bytes(), nil
} }
buf.Write(line)
buf.WriteString("\r\n") // if closing identifier found, the remaining bytes must be markdown content
if bytes.Equal(bytes.TrimSpace(line), parser.Closing()) {
break
}
metaBuf.Write(line)
metaBuf.WriteString("\r\n")
} }
// closing identifier not found // By now, the rest of the buffer contains markdown content
return buf.Bytes(), nil, fmt.Errorf("metadata not closed. '%v' not found", string(parser.Closing())) contentBuf := new(bytes.Buffer)
io.Copy(contentBuf, reader)
return metaBuf.Bytes(), contentBuf.Bytes(), nil
} }
// findParser finds the parser using line that contains opening identifier // findParser finds the parser using line that contains opening identifier