and
elements and their contents.
* 2. Selects all elements.
* 3. Iterates over those paragraphs, strips out all HTML tags and trims white-space around.
* 4. Then the first non-empty paragraph is picked as the result.
*
* @param string $text
* @return string
*/
public function derive( string $text ): ?string {
$myText = $text;
$stripTags = [ 'style', 'table' ];
foreach ( $stripTags as $tag ) {
$pattern = "%<$tag\b[^>]*+>(?:(?R)|[^<]*+(?:(?!?$tag\b)<[^<]*+)*+)*+$tag>%i";
$myText = preg_replace( $pattern, '', $myText );
}
$paragraphs = [];
if ( preg_match_all( '#
.*?
#is', $myText, $paragraphs ) ) {
foreach ( $paragraphs[0] as $paragraph ) {
$paragraph = trim( strip_tags( $paragraph ) );
if ( !$paragraph ) {
continue;
}
return $paragraph;
}
}
return null;
}
}