parent
7409bba0d8
commit
2eb2441f2b
7 changed files with 85 additions and 19 deletions
@ -0,0 +1,50 @@ |
||||
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xml // import "miniflux.app/reader/xml"
|
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/xml" |
||||
"fmt" |
||||
"io" |
||||
"io/ioutil" |
||||
|
||||
"miniflux.app/reader/encoding" |
||||
) |
||||
|
||||
// NewDecoder returns a XML decoder that filters illegal characters.
|
||||
func NewDecoder(data io.Reader) *xml.Decoder { |
||||
decoder := xml.NewDecoder(data) |
||||
decoder.Entity = xml.HTMLEntity |
||||
decoder.Strict = false |
||||
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { |
||||
utf8Reader, err := encoding.CharsetReader(charset, input) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
rawData, err := ioutil.ReadAll(utf8Reader) |
||||
if err != nil { |
||||
return nil, fmt.Errorf("Unable to read data: %q", err) |
||||
} |
||||
filteredBytes := bytes.Map(filterValidXMLChar, rawData) |
||||
return bytes.NewReader(filteredBytes), nil |
||||
} |
||||
|
||||
return decoder |
||||
} |
||||
|
||||
// This function is copied from encoding/xml package,
|
||||
// and is used to check if all the characters are legal.
|
||||
func filterValidXMLChar(r rune) rune { |
||||
if r == 0x09 || |
||||
r == 0x0A || |
||||
r == 0x0D || |
||||
r >= 0x20 && r <= 0xD7FF || |
||||
r >= 0xE000 && r <= 0xFFFD || |
||||
r >= 0x10000 && r <= 0x10FFFF { |
||||
return r |
||||
} |
||||
return -1 |
||||
} |
@ -0,0 +1,29 @@ |
||||
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xml // import "miniflux.app/reader/xml"
|
||||
|
||||
import ( |
||||
"encoding/xml" |
||||
"fmt" |
||||
"strings" |
||||
"testing" |
||||
) |
||||
|
||||
func TestIllegalCharacters(t *testing.T) { |
||||
type myxml struct { |
||||
XMLName xml.Name `xml:"rss"` |
||||
Version string `xml:"version,attr"` |
||||
Title string `xml:"title"` |
||||
} |
||||
|
||||
data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>%s</title></rss>`, "\x10") |
||||
var x myxml |
||||
|
||||
decoder := NewDecoder(strings.NewReader(data)) |
||||
err := decoder.Decode(&x) |
||||
if err != nil { |
||||
t.Error(err) |
||||
} |
||||
} |
Loading…
Reference in new issue