fu: Close-up of Fu, bringing a scoop of water to her mouth (Default)
fu ([personal profile] fu) wrote in [site community profile] changelog2011-08-03 03:00 am

[dw-free] Video Embedding: YouTube vids w/ 'privacy-enhanced mode' enabled can't be embedded

[commit: http://hg.dwscoalition.org/dw-free/rev/84c26bd010e3]

http://bugs.dwscoalition.org/show_bug.cgi?id=3780

Refactor to parse out the URI into components using a module, rather than
using a regex on the whole URL, to make the conditionals more readable, and
to pave the way for further whitelisting.

Then add www.youtube-nocookie.com to the valid domains to embed from,
following the same rules as youtube.com

Patch by [personal profile] fu.

Files modified:
  • cgi-bin/DW/Hooks/EmbedWhitelist.pm
  • t/embed-whitelist.t
--------------------------------------------------------------------------------
diff -r 929a769363e8 -r 84c26bd010e3 cgi-bin/DW/Hooks/EmbedWhitelist.pm
--- a/cgi-bin/DW/Hooks/EmbedWhitelist.pm	Wed Aug 03 06:42:04 2011 +0800
+++ b/cgi-bin/DW/Hooks/EmbedWhitelist.pm	Wed Aug 03 10:24:41 2011 +0800
@@ -27,14 +27,39 @@
 
 use strict;
 use LJ::Hooks;
+use URI;
+
+sub match_subdomain {
+    my $want_domain = $_[0];
+    my $domain_from_uri = $_[1];
+
+    return $domain_from_uri =~ /^(?:[\w.-]*\.)?\Q$want_domain\E$/;
+}
+
+sub match_full_path {
+    my $want_path = $_[0];
+    my $path_from_uri = $_[1];
+
+    return $path_from_uri =~ /^$want_path$/;
+}
 
 LJ::Hooks::register_hook( 'allow_iframe_embeds', sub {
     my ( $embed_url, %opts ) = @_;
 
     return 0 unless $embed_url;
 
+    my $parsed_uri = URI->new( $embed_url );
+
+    my $uri_scheme = $parsed_uri->scheme;
+    return 0 unless $uri_scheme eq "http" || $uri_scheme eq "https";
+
+    my $uri_host = $parsed_uri->host;
+    my $uri_path = $parsed_uri->path;   # not including query
+
     ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html)
-    return 1 if $embed_url =~ m!^https?://(?:[\w.-]*\.)?youtube\.com/embed/[-_a-zA-Z0-9]{11,}(?:\?.*)?$!;
+    if ( match_subdomain( "youtube.com", $uri_host ) || match_subdomain( "youtube-nocookie.com", $uri_host ) ) {
+        return 1 if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path );
+    }
 
     return 0;
 
diff -r 929a769363e8 -r 84c26bd010e3 t/embed-whitelist.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t/embed-whitelist.t	Wed Aug 03 10:24:41 2011 +0800
@@ -0,0 +1,48 @@
+# -*-perl-*-
+use strict;
+
+use Test::More tests => 10;
+use lib "$ENV{LJHOME}/cgi-bin";
+require 'ljlib.pl';
+
+use DW::Hooks::EmbedWhitelist;
+
+sub test_good_url {
+    local $Test::Builder::Level = $Test::Builder::Level + 1;
+    my $url = $_[0];
+    my $msg = $_[1];
+    subtest "good embed url $url", sub {
+        ok( LJ::Hooks::run_hook( "allow_iframe_embeds", $url ), $msg );
+    }
+}
+
+sub test_bad_url {
+    local $Test::Builder::Level = $Test::Builder::Level + 1;
+    my $url = $_[0];
+    my $msg = $_[1];
+    subtest "bad embed url $url", sub {
+        ok( ! LJ::Hooks::run_hook( "allow_iframe_embeds", $url ), $msg );
+    }
+}
+
+note( "testing various schemas" );
+{
+    test_bad_url( "", "data schema" );
+
+    test_good_url( "http://www.youtube.com/embed/123457890abc", "known good (assumed good for this test)" );
+    test_bad_url( "data://www.youtube.com/embed/123457890abc", "looks good, but has a bad schema" );
+}
+
+note( "youtube" );
+{
+    test_good_url( "http://www.youtube.com/embed/x1xx2xxxxxX", "normal youtube url" );
+    test_good_url( "https://www.youtube.com/embed/x1xx2xxxxxX", "https youtube url" );
+    test_good_url( "http://www.youtube-nocookie.com/embed/x1xx2xxxxxX", "privacy-enhanced youtube url" );
+    test_good_url( "https://www.youtube-nocookie.com/embed/x1xx2xxxxxX", "https privacy-enhanced youtube url" );
+
+    # with arguments
+    test_good_url( "http://www.youtube.com/embed/x1xx2xxxxxX?somearg=1&otherarg=2", "with arguments" );
+
+    test_bad_url( "http://www.youtube.com/notreallyembed/x1xx2xxxxxX", "wrong path");
+    test_bad_url( "http://www.youtube.com/embed/x1xx2xxxxxX/butnotreally", "wrong path");
+}
--------------------------------------------------------------------------------