Compare commits

...

3 Commits

Author SHA1 Message Date
Frédéric Guillot d610d091fe Avoid constraint error when having duplicate entries 5 years ago
Frédéric Guillot 36d7732234 Disable strict XML parsing 5 years ago
Frédéric Guillot ca48f7612a Ignore invalid content type 5 years ago
  1. 10
      http/client/response.go
  2. 3
      http/client/response_test.go
  3. 1
      reader/atom/parser.go
  4. 19
      reader/atom/parser_test.go
  5. 1
      reader/opml/parser.go
  6. 34
      reader/opml/parser_test.go
  7. 1
      reader/rdf/parser.go
  8. 19
      reader/rdf/parser_test.go
  9. 1
      reader/rss/parser.go
  10. 19
      reader/rss/parser_test.go
  11. 6
      storage/entry.go
  12. 9
      storage/feed.go

@ -8,7 +8,6 @@ import (
"bytes"
"io"
"io/ioutil"
"mime"
"regexp"
"strings"
"unicode/utf8"
@ -74,17 +73,12 @@ func (r *Response) IsModified(etag, lastModified string) bool {
// - Feeds with wrong encoding defined and already in UTF-8
func (r *Response) EnsureUnicodeBody() (err error) {
if r.ContentType != "" {
mediaType, _, mediaErr := mime.ParseMediaType(r.ContentType)
if mediaErr != nil {
return mediaErr
}
// JSON feeds are always in UTF-8.
if strings.Contains(mediaType, "json") {
if strings.Contains(r.ContentType, "json") {
return
}
if strings.Contains(mediaType, "xml") {
if strings.Contains(r.ContentType, "xml") {
buffer, _ := ioutil.ReadAll(r.Body)
r.Body = bytes.NewReader(buffer)

@ -121,6 +121,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
{"urdu.xml", "text/xml; charset=utf-8", true},
{"content-type-only-win-8859-1.xml", "application/xml; charset=ISO-8859-1", true},
{"rdf_utf8.xml", "application/rss+xml; charset=utf-8", true},
{"rdf_utf8.xml", "application/rss+xml; charset: utf-8", true}, // Invalid Content-Type
{"charset-content-type-xml-iso88591.xml", "application/rss+xml; charset=ISO-8859-1", false},
{"windows_1251.xml", "text/xml", false},
{"smallfile.xml", "text/xml; charset=utf-8", true},
@ -136,7 +137,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
r := &Response{Body: bytes.NewReader(content), ContentType: tc.contentType}
parseErr := r.EnsureUnicodeBody()
if parseErr != nil {
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, err)
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
}
isUnicode := utf8.ValidString(r.String())

@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
atomFeed := new(atomFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(atomFeed)

@ -577,3 +577,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
func TestParseWithInvalidCharacterEntity(t *testing.T) {
data := `
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/a&b"/>
</feed>
`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "http://example.org/a&b" {
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
}
}

@ -17,6 +17,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
feeds := new(opml)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feeds)

@ -193,6 +193,40 @@ func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) {
}
}
}
func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) {
data := `<?xml version="1.0"?>
<opml version="1.0">
<head>
<title>mySubscriptions.opml</title>
</head>
<body>
<outline title="Feed 1">
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline>
</outline>
</body>
</opml>
`
var expected SubcriptionList
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""})
subscriptions, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(subscriptions) != 1 {
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1)
}
for i := 0; i < len(subscriptions); i++ {
if !subscriptions[i].Equals(expected[i]) {
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
}
}
}
func TestParseInvalidXML(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))

@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rdfFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)

@ -403,3 +403,22 @@ func TestParseFeedWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example Feed</title>
<link>http://example.org/a&b</link>
</channel>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "http://example.org/a&b" {
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
}
}

@ -18,6 +18,7 @@ func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rssFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)

@ -633,3 +633,22 @@ func TestParseWithHTMLEntity(t *testing.T) {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
}
}
func TestParseWithInvalidCharacterEntity(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<link>https://example.org/a&b</link>
<title>Example Feed</title>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "https://example.org/a&b" {
t.Errorf(`Incorrect url, got: %q`, feed.SiteURL)
}
}

@ -84,7 +84,7 @@ func (s *Storage) createEntry(entry *model.Entry) error {
).Scan(&entry.ID, &entry.Status)
if err != nil {
return fmt.Errorf("unable to create entry %q (feed #%d): %v", entry.URL, entry.FeedID, err)
return fmt.Errorf("Unable to create entry %q (feed #%d): %v", entry.URL, entry.FeedID, err)
}
for i := 0; i < len(entry.Enclosures); i++ {
@ -137,9 +137,9 @@ func (s *Storage) updateEntry(entry *model.Entry) error {
// entryExists checks if an entry already exists based on its hash when refreshing a feed.
func (s *Storage) entryExists(entry *model.Entry) bool {
var result int
query := `SELECT count(*) as c FROM entries WHERE user_id=$1 AND feed_id=$2 AND hash=$3`
query := `SELECT 1 FROM entries WHERE user_id=$1 AND feed_id=$2 AND hash=$3`
s.db.QueryRow(query, entry.UserID, entry.FeedID, entry.Hash).Scan(&result)
return result >= 1
return result == 1
}
// cleanupEntries deletes from the database entries marked as "removed" and not visible anymore in the feed.

@ -216,9 +216,12 @@ func (s *Storage) CreateFeed(feed *model.Feed) error {
for i := 0; i < len(feed.Entries); i++ {
feed.Entries[i].FeedID = feed.ID
feed.Entries[i].UserID = feed.UserID
err := s.createEntry(feed.Entries[i])
if err != nil {
return err
if !s.entryExists(feed.Entries[i]) {
err := s.createEntry(feed.Entries[i])
if err != nil {
return err
}
}
}

Loading…
Cancel
Save