Skip to content
6 changes: 3 additions & 3 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1468,8 +1468,8 @@ public function serialize_token(): string {

/*
* The HTML parser strips a leading newline immediately after the start
* tag of TEXTAREA, PRE, and LISTING elements. When serializing, prepend
* a leading newline to ensure the semantic HTML content is preserved.
* tag of TEXTAREA, PRE, and LISTING elements in HTML content. When serializing,
* prepend a leading newline to ensure the semantic HTML content is preserved.
*
* For example, `<pre>\n\nX</pre>` must not become `<pre>\nX</pre>` because its content
* has changed. However, `<pre>X</pre>` and `<pre>\nX</pre>` are _equivalent_.
Expand All @@ -1488,7 +1488,7 @@ public function serialize_token(): string {
*
* @see https://html.spec.whatwg.org/multipage/parsing.html
*/
if ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) {
if ( $in_html && ( 'TEXTAREA' === $tag_name || 'PRE' === $tag_name || 'LISTING' === $tag_name ) ) {
$html .= "\n";
}

Expand Down
49 changes: 37 additions & 12 deletions tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php
Original file line number Diff line number Diff line change
Expand Up @@ -463,21 +463,30 @@ public static function data_provider_serialize_doctype() {
}

/**
* Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are preserved upon normalization,
* and that normalization is idempotent in these cases.
* Ensures that leading newlines in PRE, LISTING, and TEXTAREA elements are normalized
* according to their parsing namespace, and that normalization is idempotent in these cases.
*
* @ticket 64607
*
* @dataProvider data_provider_normalize_special_leading_newline_cases
*
* @param string $input HTML input containing leading newlines in PRE, LISTING, or TEXTAREA elements.
* @param string $expected Expected output after normalization, which should preserve leading newlines.
* @param string $expected Expected exact output after normalization.
*/
public function test_normalize_special_leading_newline_handling( string $input, string $expected ) {
$normalized = WP_HTML_Processor::normalize( $input );
$this->assertEqualHTML( $expected, $normalized );

/*
* Byte equality pins normalize()'s serialized form; HTML equality verifies
* semantic equivalence. This distinction matters because HTML parsing ignores
* one leading LF after PRE, LISTING, and TEXTAREA start tags.
*/
$this->assertSame( $expected, $normalized );
$this->assertEqualHTML( $input, $normalized );

$normalized_twice = WP_HTML_Processor::normalize( $normalized );
$this->assertEqualHTML( $expected, $normalized_twice );
$this->assertSame( $expected, $normalized_twice );
$this->assertEqualHTML( $normalized, $normalized_twice );
}

/**
Expand Down Expand Up @@ -653,50 +662,66 @@ public static function data_provider_normalized_fuzzer_cases_that_should_be_idem
/**
* Data provider.
*
* @return array[]
* @return array<string, array{string, string}>
*/
public static function data_provider_normalize_special_leading_newline_cases() {
public static function data_provider_normalize_special_leading_newline_cases(): array {
return array(
'Leading newline in PRE' => array(
"<pre>\nline 1\nline 2</pre>",
"<pre>line 1\nline 2</pre>",
"<pre>\nline 1\nline 2</pre>",
),
'Double leading newline in PRE' => array(
"<pre>\n\nline 2\nline 3</pre>",
"<pre>\n\nline 2\nline 3</pre>",
),
'Multiple text nodes inside PRE' => array(
"<pre>\nline 1<!--comment--> still line 1</pre>",
'<pre>line 1<!--comment--> still line 1</pre>',
"<pre>\nline 1<!--comment--> still line 1</pre>",
),
'Multiple text nodes inside PRE with leading newlines' => array(
"<pre>\n\nline 2<!--comment--> still line 2</pre>",
"<pre>\n\nline 2<!--comment--> still line 2</pre>",
),
'Leading newline in LISTING' => array(
"<listing>\nline 1\nline 2</listing>",
"<listing>line 1\nline 2</listing>",
"<listing>\nline 1\nline 2</listing>",
),
'Double leading newline in LISTING' => array(
"<listing>\n\nline 2\nline 3</listing>",
"<listing>\n\nline 2\nline 3</listing>",
),
'Multiple text nodes inside LISTING' => array(
"<listing>\nline 1<!--comment--> still line 1</listing>",
'<listing>line 1<!--comment--> still line 1</listing>',
"<listing>\nline 1<!--comment--> still line 1</listing>",
),
'Multiple text nodes inside LISTING with leading newlines' => array(
"<listing>\n\nline 2<!--comment--> still line 2</listing>",
"<listing>\n\nline 2<!--comment--> still line 2</listing>",
),
'Leading newline in TEXTAREA' => array(
"<textarea>\nline 1\nline 2</textarea>",
"<textarea>line 1\nline 2</textarea>",
"<textarea>\nline 1\nline 2</textarea>",
),
'Double leading newline in TEXTAREA' => array(
"<textarea>\n\nline 2\nline 3</textarea>",
"<textarea>\n\nline 2\nline 3</textarea>",
),
'Foreign MathML TEXTAREA does not ignore leading newlines' => array(
'<math><textarea>X</textarea></math>',
'<math><textarea>X</textarea></math>',
),
'Foreign MathML TEXTAREA preserves leading newline' => array(
"<math><textarea>\nX</textarea></math>",
"<math><textarea>\nX</textarea></math>",
),
'Foreign SVG TEXTAREA does not ignore leading newlines' => array(
'<svg><textarea>X</textarea></svg>',
'<svg><textarea>X</textarea></svg>',
),
'Foreign SVG TEXTAREA preserves leading newline' => array(
"<svg><textarea>\nX</textarea></svg>",
"<svg><textarea>\nX</textarea></svg>",
),
);
}
}
Loading