[dw-free] Allow iframe tags for YouTube embed
[commit: http://hg.dwscoalition.org/dw-free/rev/7c201868736d]
http://bugs.dwscoalition.org/show_bug.cgi?id=3418
Allow embedded iframes from a whitelist of domains.
Patch adapted from LiveJournal by
fu.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=3418
Allow embedded iframes from a whitelist of domains.
Patch adapted from LiveJournal by
Files modified:
- cgi-bin/DW/Hooks/EmbedWhitelist.pm
- cgi-bin/LJ/CleanHTML.pm
- t/clean-embed.t
--------------------------------------------------------------------------------
diff -r c03034073124 -r 7c201868736d cgi-bin/DW/Hooks/EmbedWhitelist.pm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/Hooks/EmbedWhitelist.pm Fri Feb 04 15:04:51 2011 +0800
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+#
+# This code was based on code originally created by the LiveJournal project
+# owned and operated by Live Journal, Inc. The code has been modified and expanded
+# by Dreamwidth Studios, LLC. These files were originally licensed under
+# the terms of the license supplied by Live Journal, Inc, which can
+# currently be found at:
+#
+# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt
+#
+# In accordance with the original license, this code and all its
+# modifications are provided under the GNU General Public License.
+# A copy of that license can be found in the LICENSE file included as
+# part of this distribution.
+#
+#
+# DW::Hooks::EmbedWhitelist
+#
+# Keep a whitelist of trusted sites which we trust for certain kinds of embeds
+#
+# Authors:
+# Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2011 by Dreamwidth Studios, LLC.
+
+package DW::Hooks::EmbedWhitelist;
+
+use strict;
+use LJ::Hooks;
+
+LJ::Hooks::register_hook( 'allow_iframe_embeds', sub {
+ my ( $embed_url, %opts ) = @_;
+
+ return 0 unless $embed_url;
+
+ ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html)
+ return 1 if $embed_url =~ m!^https?://(?:[\w.-]*\.)?youtube\.com/embed/[-_a-zA-Z0-9]{11,}(?:\?.*)?$!;
+
+ return 0;
+
+} );
+
+1;
diff -r c03034073124 -r 7c201868736d cgi-bin/LJ/CleanHTML.pm
--- a/cgi-bin/LJ/CleanHTML.pm Mon Feb 07 12:52:01 2011 +0800
+++ b/cgi-bin/LJ/CleanHTML.pm Fri Feb 04 15:04:51 2011 +0800
@@ -162,6 +162,10 @@ sub clean
if (ref $opts->{'remove'} eq "ARRAY") {
foreach (@{$opts->{'remove'}}) { $action{$_} = "deny"; $remove{$_} = 1; }
}
+ if (ref $opts->{'conditional'} eq "ARRAY") {
+ foreach (@{$opts->{'conditional'}}) { $action{$_} = "conditional"; }
+ }
+
$action{'script'} = "eat";
@@ -413,6 +417,23 @@ sub clean
$p->unget_token($token);
$p->get_tag("/$tag");
next;
+ }
+
+ # force this specific instance of the tag to be allowed (for conditional)
+ my $force_allow = 0;
+ if (defined $action{$tag} and $action{$tag} eq "conditional") {
+ if ( $tag eq "iframe" ) {
+ $force_allow = LJ::Hooks::run_hook( 'allow_iframe_embeds', $attr->{src} );
+ unless ( $force_allow ) {
+ ## eat this tag
+ if (!$attr->{'/'}) {
+ ## if not autoclosed tag (<iframe />),
+ ## then skip everything till the closing tag
+ $p->get_tag("/iframe");
+ }
+ next TOKEN;
+ }
+ }
}
# try to call HTMLCleaner's element-specific cleaner on this open tag
@@ -823,7 +844,10 @@ sub clean
my $allow;
if ($mode eq "allow") {
$allow = 1;
- if (defined $action{$tag} and $action{$tag} eq "deny") { $allow = 0; }
+ if ( defined $action{$tag} and $action{$tag} eq "deny" ) { $allow = 0; }
+ if ( defined $action{$tag} and $action{$tag} eq "conditional" ) {
+ $allow = $force_allow;
+ }
} else {
$allow = 0;
if (defined $action{$tag} and $action{$tag} eq "allow") { $allow = 1; }
@@ -963,7 +987,7 @@ sub clean
} else {
if ($mode eq "allow") {
$allow = 1;
- if (defined $action{$tag} and $action{$tag} eq "deny") { $allow = 0; }
+ if (defined $action{$tag} and ( $action{$tag} eq "deny" || $action{$tag} eq "conditional" ) ) { $allow = 0; }
} else {
$allow = 0;
if (defined $action{$tag} and $action{$tag} eq "allow") { $allow = 1; }
@@ -1020,6 +1044,12 @@ sub clean
} else {
$newdata .= "</$tag>";
}
+ }
+
+ if ( defined $action{$tag} and $action{$tag} eq "conditional" && $tagstack[-1] eq $tag ) {
+ $newdata .= "</$tag>";
+ pop @tagstack;
+ $opencount{$tag}--;
}
}
}
@@ -1446,6 +1476,30 @@ sub clean_event
});
}
+# clean JS out of embed module
+sub clean_embed {
+ my ( $ref ) = @_;
+ return unless $$ref;
+ return unless LJ::is_enabled( 'embedmodule-cleancontent' );
+
+ clean( $ref, {
+ addbreaks => 0,
+ tablecheck => 0,
+ mode => 'allow',
+ allow => [ qw( object embed ) ],
+ deny => [ qw( script ) ],
+ remove => [ qw( script ) ],
+ conditional => [ qw( iframe ) ],
+ ljcut_disable => 1,
+ cleancss => 1,
+ extractlinks => 0,
+ noautolinks => 1,
+ extractimages => 0,
+ noexpandembedded => 1,
+ transform_embed_nocheck => 1,
+ });
+}
+
sub get_okay_comment_tags
{
return @comment_all;
diff -r c03034073124 -r 7c201868736d t/clean-embed.t
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t/clean-embed.t Fri Feb 04 15:04:51 2011 +0800
@@ -0,0 +1,162 @@
+# -*-perl-*-
+use strict;
+
+use Test::More tests => 23;
+use lib "$ENV{LJHOME}/cgi-bin";
+require 'ljlib.pl';
+
+use LJ::CleanHTML;
+
+my ( $orig_post, $clean_post );
+
+my $clean = sub {
+ my ( $opts ) = @_;
+ LJ::CleanHTML::clean_embed( \$orig_post, $opts );
+};
+
+note("no content");
+$orig_post = qq{};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "empty" );
+
+
+note("simple object");
+$orig_post = qq{<object></object>};
+$clean_post = qq{<object></object>};
+$clean->();
+is( $orig_post, $clean_post, "basic <object>" );
+
+
+note("<object> and <embed> tags");
+$orig_post = qq{<object width="640" height="385"><param name="movie" value="http://www.example.com/video"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.example.com/video" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="640" height="385"></embed></object>};
+$clean_post = qq{<object width="640" height="385"><param name="movie" value="http://www.example.com/video"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.example.com/video" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="640" height="385"></embed></object>};
+$clean->();
+is( $orig_post, $clean_post, "<object> and <embed> tags" );
+
+
+note("script tag");
+$orig_post = qq{<object><script>bar</script></object>};
+$clean_post = qq{<object></object>};
+$clean->();
+is( $orig_post, $clean_post, "<script> tag" );
+
+
+note("iframe tag");
+$orig_post = qq{<iframe src="http://example.com/randompage"></iframe>};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag" );
+
+
+my $id = "ABC123abc-_";
+note("trusted site: youtube");
+$orig_post = qq{<object width="640" height="385"><param name="movie" value="http://www.youtube.com/v/$id?fs=1&hl=en_US"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/$id?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="640" height="385"></embed></object>};
+$clean_post = qq{<object width="640" height="385"><param name="movie" value="http://www.youtube.com/v/$id?fs=1&hl=en_US"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/$id?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="640" height="385"></embed></object>};
+$clean->();
+is( $orig_post, $clean_post, "old-style embeds" );
+
+
+$orig_post = qq{<iframe src="http://www.youtube.com/"></iframe>};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: youtube (not an embed url)" );
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/123"></iframe>};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: youtube (invalid id)" );
+
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id"></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: www.youtube.com (iframe embed code)" );
+
+$orig_post = qq{<iframe src="http://youtube.com/embed/$id"></iframe>};
+$clean_post = qq{<iframe src="http://youtube.com/embed/$id"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: youtube.com (iframe embed code)" );
+
+$orig_post = qq{<iframe src="http://abc.youtube.com/embed/$id"></iframe>};
+$clean_post = qq{<iframe src="http://abc.youtube.com/embed/$id"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: abc.youtube.com (iframe embed code)" );
+
+$orig_post = qq{<iframe src="http://www.not-youtube.com/embed/$id"></iframe>};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: not-youtube.com" );
+
+$orig_post = qq{<iframe src="http://www.not-youtube.com/embed/$id"></iframe>};
+$clean_post = qq{};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: www.not-youtube.com" );
+
+$orig_post = qq{<iframe src="http://youtube.com/embed/$id"></iframe> <iframe src="http://www.not-youtube.com/embed/$id"></iframe>};
+$clean_post = qq{<iframe src="http://youtube.com/embed/$id"></iframe> };
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: youtube.com (iframe embed code)" );
+
+
+# HTML 4 says an iframe can contain fallback content
+# HTML 5 says an iframe contains no fallback content
+# this doesn't actually concern itself with either. We just want to make sure
+# that you can't sneak in malicious code by wrapping it in an iframe from a trusted domain
+# (iframe contents are treated as text nodes, not HTML tokens, so these aren't stripped, merely escaped)
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id"><iframe src="http://not-youtube.com/embed/$id"></iframe></iframe>};
+# inner iframe tag closes the iframe; outer tag is discarded
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id"><iframe src="http://not-youtube.com/embed/$id"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: nested trusted and untrusted" );
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id"><script type="text/javascript">alert("hi");</script></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id"><script type="text/javascript">alert("hi");</script></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: nested trusted with script tags" );
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id"><style type="text/css">alert(document["coo"+"kies"])</style></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id"><style type="text/css">alert(document["coo"+"kies"])</style></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: nested trusted with style tags" );
+
+
+# and also make sure we are cleaning the iframe parameters properly
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" onload="alert('hi!');" width="200"></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" width="200"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: trusted with malicious parameters" );
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" style="javascript:alert('hi')" width="200"></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" width="200"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: trusted with malicious parameters" );
+
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" style="position: absolute;" width="200"></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id" height="100" width="200"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: trusted with malicious parameters" );
+
+
+# not sure if we need to do anything about this
+$orig_post = qq{<iframe src="http://www.youtube.com/embed/$id" width="1" height="1"></iframe>};
+$clean_post = qq{<iframe src="http://www.youtube.com/embed/$id" width="1" height="1"></iframe>};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: trying to make it invisible" );
+
+
+# what if it's malformed HTML?
+TODO: {
+ local $TODO = "Not sure how to handle this. The HTML parser treats iframe like it can't contain other tags, so anything up to a closing iframe tag is text. If it's self-closed or not closed, then everything up to the end is considered text. Curretly this means that all text after an unclosed iframe is wiped out and not saved to the db -- see LJ::parse_embed_module";
+ $orig_post = qq{<iframe src="http://www.youtube.com/embed/$id" />end};
+ $clean_post = qq{<iframe src="http://www.youtube.com/embed/$id"></iframe>end};
+ $clean->();
+ is( $orig_post, $clean_post, "<iframe> tag: self-closing trusted" );
+}
+
+$orig_post = qq{<iframe src="http://not-youtube.com/embed/$id" />end};
+$clean_post = qq{end};
+$clean->();
+is( $orig_post, $clean_post, "<iframe> tag: self-closing untrusted" );
+
+
--------------------------------------------------------------------------------
