Message info
 
To:mediawiki-cvs@lists.wikimedia.org From:dantman@svn.wikimedia.org Subject:[MediaWiki-CVS] SVN: [111891] trunk/phase3 Date:Sun, 19 Feb 2012 21:43:37 +0000
 

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/111891

Revision: 111891
Author: dantman
Date: 2012-02-19 21:43:37 +0000 (Sun, 19 Feb 2012)
Log Message:
-----------
Handle one part of bug 32545 while improving MediaWiki's support for Microdata in content by adding support for the <data>, <time>, <meta>, and <link> elements. The latter two are only permitted when Microdata is enabled, and for security are only allowed to be actual elements when they have a strict set of attributes set.

Modified Paths:
--------------
trunk/phase3/RELEASE-NOTES-1.20
trunk/phase3/includes/Sanitizer.php
trunk/phase3/tests/parser/parserTests.txt

Modified: trunk/phase3/RELEASE-NOTES-1.20
===================================================================
--- trunk/phase3/RELEASE-NOTES-1.20 2012-02-19 21:07:55 UTC (rev 111890)
+++ trunk/phase3/RELEASE-NOTES-1.20 2012-02-19 21:43:37 UTC (rev 111891)
@@ -22,6 +22,8 @@
* (bug 34475) Add support for IP/CIDR notation to tablesorter
* (bug 27619) Remove preference option to display broken links as link?
* (bug 15404) Add support for sorting fractions in jquery.tablesorter
+* The <data>, <time>, <meta>, and <link> elements are allowed within WikiText for use
+ with Microdata.

=== Bug fixes in 1.20 ===
* (bug 30245) Use the correct way to construct a log page title.

Modified: trunk/phase3/includes/Sanitizer.php
===================================================================
--- trunk/phase3/includes/Sanitizer.php 2012-02-19 21:07:55 UTC (rev 111890)
+++ trunk/phase3/includes/Sanitizer.php 2012-02-19 21:43:37 UTC (rev 111891)
@@ -364,7 +364,7 @@
* @return string
*/
static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) {
- global $wgUseTidy;
+ global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes;

static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags,
$htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised;
@@ -381,12 +381,19 @@
'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn',
'kbd', 'samp'
);
+ if ( $wgHtml5 ) {
+ $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time' ) );
+ }
$htmlsingle = array(
'br', 'hr', 'li', 'dt', 'dd'
);
$htmlsingleonly = array( # Elements that cannot have close tags
'br', 'hr'
);
+ if ( $wgHtml5 && $wgAllowMicrodataAttributes ) {
+ $htmlsingle[] = $htmlsingleonly[] = 'meta';
+ $htmlsingle[] = $htmlsingleonly[] = 'link';
+ }
$htmlnest = array( # Tags that can be nested--??
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul',
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span'
@@ -528,6 +535,10 @@
call_user_func_array( $processCallback, array( &$params, $args ) );
}

+ if ( !Sanitizer::validateTag( $params, $t ) ) {
+ $badtag = true;
+ }
+
# Strip non-approved attributes from the tag
$newparams = Sanitizer::fixTagAttributes( $params, $t );
}
@@ -709,6 +720,37 @@
}

/**
+ * Takes attribute names and values for a tag and the tah name and
+ * validates that the tag is allowed to be present.
+ * This DOES NOT validate the attributes, nor does it validate the
+ * tags themselves. This method only handles the special circumstances
+ * where we may want to allow a tag within content but ONLY when it has
+ * specific attributes set.
+ *
+ * @param $
+ */
+ static function validateTag( $params, $element ) {
+ $params = Sanitizer::decodeTagAttributes( $params );
+
+ if ( $element == 'meta' || $element == 'link' ) {
+ if ( !isset( $params['itemprop'] ) ) {
+ // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content
+ return false;
+ }
+ if ( $element == 'meta' && !isset( $params['content'] ) ) {
+ // <meta> must have a content="" for the itemprop
+ return false;
+ }
+ if ( $element == 'link' && !isset( $params['href'] ) ) {
+ // <link> must have an associated href=""
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
* Take an array of attribute names and values and normalize or discard
* illegal values for the given element type.
*
@@ -809,7 +851,7 @@
unset( $out['itemid'] );
unset( $out['itemref'] );
}
- # TODO: Strip itemprop if we aren't descendants of an itemscope.
+ # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref.
}
return $out;
}
@@ -1483,7 +1525,7 @@

# Numbers refer to sections in HTML 4.01 standard describing the element.
# See: http://www.w3.org/TR/html4/
- $whitelist = array (
+ $whitelist = array(
# 7.5.4
'div' => $block,
'center' => $common, # deprecated
@@ -1611,7 +1653,24 @@
# 'title' may not be 100% valid here; it's XHTML
# http://www.w3.org/TR/REC-MathML/
'math' => array( 'class', 'style', 'id', 'title' ),
+ );
+
+ if ( $wgHtml5 ) {
+ # HTML5 elements, defined by:
+ # http://www.whatwg.org/specs/web-apps/current-work/multipage/
+ $whitelist += array(
+ 'data' => array_merge( $common, array( 'value' ) ),
+ 'time' => array_merge( $common, array( 'datetime' ) ),
+
+ // meta and link are only present when Microdata is allowed anyways
+ // so we don't bother adding another condition here
+ // meta and link are only valid for use as Microdata so we do not
+ // allow the common attributes here.
+ 'meta' => array( 'itemprop', 'content' ),
+ 'link' => array( 'itemprop', 'href' ),
);
+ }
+
return $whitelist;
}


Modified: trunk/phase3/tests/parser/parserTests.txt
===================================================================
--- trunk/phase3/tests/parser/parserTests.txt 2012-02-19 21:07:55 UTC (rev 111890)
+++ trunk/phase3/tests/parser/parserTests.txt 2012-02-19 21:43:37 UTC (rev 111891)
@@ -5420,6 +5420,26 @@
!! end

!! test
+Sanitizer: Validating that <meta> and <link> work, but only for Microdata
+!! input
+<div itemscope>
+ <meta itemprop="hello" content="world">
+ <meta http-equiv="refresh" content="5">
+ <link itemprop="hello" href="{{SERVER}}">
+ <link rel="stylesheet" href="{{SERVER}}">
+</div>
+!! result
+<div itemscope="itemscope">
+<p> <meta itemprop="hello" content="world" />
+ <meta http-equiv="refresh" content="5">
+</p>
+ <link itemprop="hello" href="http://Britney-Spears" />
+ <link rel="stylesheet" href="<a rel="nofollow" class="external free" href="http://Britney-Spears">http://Britney-Spears</a>">
+</div>
+
+!! end
+
+!! test
Language converter: output gets cut off unexpectedly (bug 5757)
!! options
language=zh


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs