Do not proxy image data url

pull/826/head^2
Frédéric Guillot 4 years ago committed by Frédéric Guillot
parent 5c3e78f605
commit 3afdf25012
  1. 1
      go.mod
  2. 14
      go.sum
  3. 20
      proxy/image_proxy.go
  4. 48
      proxy/image_proxy_test.go
  5. 15
      reader/sanitizer/sanitizer.go
  6. 22
      reader/sanitizer/sanitizer_test.go

@ -13,6 +13,7 @@ require (
github.com/prometheus/procfs v0.2.0 // indirect
github.com/rylans/getlang v0.0.0-20200505200108-4c3188ff8a2d
github.com/stretchr/testify v1.6.1 // indirect
github.com/tdewolff/minify/v2 v2.9.7 // indirect
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
golang.org/x/net v0.0.0-20200625001655-4c5254603344
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d

@ -34,6 +34,7 @@ github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QH
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U=
github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
@ -51,6 +52,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
@ -62,6 +64,7 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv
github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@ -164,6 +167,7 @@ github.com/lib/pq v1.8.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
@ -268,6 +272,7 @@ github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4k
github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY=
github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ=
github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw=
github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI=
@ -278,6 +283,13 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tdewolff/minify v1.1.0 h1:nxHQi1ML+g3ZbZHffiZ6eC7vMqNvSRfX3KB5Y5y/kfw=
github.com/tdewolff/minify v2.3.6+incompatible h1:2hw5/9ZvxhWLvBUnHE06gElGYz+Jv9R4Eys0XUzItYo=
github.com/tdewolff/minify/v2 v2.9.7 h1:r8ewdcX8VYUoNj+s9WSy4FtNNNqNPevWOkb/MksAtzQ=
github.com/tdewolff/minify/v2 v2.9.7/go.mod h1:AcJ/ggtHex5N/QiafLI8rlIO3qwSlgbPNLi27VZSYz8=
github.com/tdewolff/parse/v2 v2.5.4 h1:ggaQ1SVE8wErRrZwUs49I6iQ1zL/tFlb7KtYsk2I8Yk=
github.com/tdewolff/parse/v2 v2.5.4/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho=
github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
@ -356,12 +368,14 @@ golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200724161237-0e2f3a69832c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c h1:38q6VNPWR010vN82/SB121GujZNIfAUb4YttE2rhGuc=
golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=

@ -5,6 +5,7 @@
package proxy // import "miniflux.app/proxy"
import (
"regexp"
"strings"
"miniflux.app/config"
@ -14,6 +15,8 @@ import (
"github.com/gorilla/mux"
)
var regexSplitSrcset = regexp.MustCompile(`,\s+`)
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
func ImageProxyRewriter(router *mux.Router, data string) string {
proxyImages := config.Opts.ProxyImages()
@ -28,7 +31,7 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
doc.Find("img").Each(func(i int, img *goquery.Selection) {
if srcAttr, ok := img.Attr("src"); ok {
if proxyImages == "all" || !url.IsHTTPS(srcAttr) {
if !isDataURL(srcAttr) && (proxyImages == "all" || !url.IsHTTPS(srcAttr)) {
img.SetAttr("src", ProxifyURL(router, srcAttr))
}
}
@ -59,18 +62,21 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) {
var proxifiedSources []string
for _, source := range strings.Split(attributeValue, ",") {
for _, source := range regexSplitSrcset.Split(attributeValue, -1) {
parts := strings.Split(strings.TrimSpace(source), " ")
nbParts := len(parts)
if nbParts > 0 {
source = ProxifyURL(router, parts[0])
rewrittenSource := parts[0]
if !isDataURL(rewrittenSource) {
rewrittenSource = ProxifyURL(router, rewrittenSource)
}
if nbParts > 1 {
source += " " + parts[1]
rewrittenSource += " " + parts[1]
}
proxifiedSources = append(proxifiedSources, source)
proxifiedSources = append(proxifiedSources, rewrittenSource)
}
}
@ -78,3 +84,7 @@ func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeV
element.SetAttr("srcset", strings.Join(proxifiedSources, ", "))
}
}
func isDataURL(s string) bool {
return strings.HasPrefix(s, "data:")
}

@ -234,7 +234,7 @@ func TestProxyFilterWithPictureSource(t *testing.T) {
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w"></picture>`
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w"></picture>`
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w"/></picture>`
output := ImageProxyRewriter(r, input)
@ -242,3 +242,49 @@ func TestProxyFilterWithPictureSource(t *testing.T) {
t.Errorf(`Not expected output: got %s`, output)
}
}
func TestImageProxyWithImageDataURL(t *testing.T) {
os.Clearenv()
os.Setenv("PROXY_IMAGES", "all")
var err error
parser := config.NewParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
input := `<img src="">`
expected := `<img src=""/>`
output := ImageProxyRewriter(r, input)
if expected != output {
t.Errorf(`Not expected output: got %s`, output)
}
}
func TestImageProxyWithImageSourceDataURL(t *testing.T) {
os.Clearenv()
os.Setenv("PROXY_IMAGES", "all")
var err error
parser := config.NewParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
input := `<picture><source srcset=""/></picture>`
expected := `<picture><source srcset=""/></picture>`
output := ImageProxyRewriter(r, input)
if expected != output {
t.Errorf(`Not expected output: got %s`, output)
}
}

@ -19,6 +19,7 @@ import (
var (
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
splitSrcsetRegex = regexp.MustCompile(`,\s+`)
)
// Sanitize returns safe HTML.
@ -110,6 +111,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
} else {
continue
}
} else if tagName == "img" && attribute.Key == "src" && strings.HasPrefix(attribute.Val, "data:") {
value = attribute.Val
} else {
value, err = url.AbsoluteURL(baseURL, value)
if err != nil {
@ -439,15 +442,19 @@ Each string is composed of:
*/
func sanitizeSrcsetAttr(baseURL, value string) string {
var sanitizedSources []string
rawSources := strings.Split(value, ",")
rawSources := splitSrcsetRegex.Split(value, -1)
for _, rawSource := range rawSources {
parts := strings.Split(strings.TrimSpace(rawSource), " ")
nbParts := len(parts)
if nbParts > 0 {
sanitizedSource, err := url.AbsoluteURL(baseURL, parts[0])
if err != nil {
continue
sanitizedSource := parts[0]
if !strings.HasPrefix(parts[0], "data:") {
var err error
sanitizedSource, err = url.AbsoluteURL(baseURL, parts[0])
if err != nil {
continue
}
}
if nbParts == 2 && isValidWidthOrDensityDescriptor(parts[1]) {

@ -15,8 +15,18 @@ func TestValidInput(t *testing.T) {
}
}
func TestImgWithDataURL(t *testing.T) {
input := `<img src="" alt="Example">`
expected := `<img src="" alt="Example" loading="lazy">`
output := Sanitize("http://example.org/", input)
if output != expected {
t.Errorf(`Wrong output: %s`, output)
}
}
func TestImgWithSrcset(t *testing.T) {
input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x,example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`
input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`
expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`
output := Sanitize("http://example.org/", input)
@ -25,6 +35,16 @@ func TestImgWithSrcset(t *testing.T) {
}
}
func TestImgWithSrcsetAndDataURL(t *testing.T) {
input := `<img srcset="" src="http://example.org/example-320w.jpg" alt="Example">`
expected := `<img srcset="" src="http://example.org/example-320w.jpg" alt="Example" loading="lazy">`
output := Sanitize("http://example.org/", input)
if output != expected {
t.Errorf(`Wrong output: %s`, output)
}
}
func TestSourceWithSrcsetAndMedia(t *testing.T) {
input := `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`
expected := `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`

Loading…
Cancel
Save