1

我一直在努力解决这个问题。我知道要查看很多代码,但是我不知道问题出在哪里,并且似乎无法缩小范围。我会赏金的。

我写了这个类来解析 bbcodes。它主要使用 strtok() ,除非您将两个标签彼此相邻放置,否则该类效果很好,而且我一生都无法弄清楚原因。

例如[b] [i]test1[/i] [/b]导致<strong> <em>test1</em> </strong>. 却[b][i]test1[/i][/b]导致<strong>i]test1/b]</strong>. 最后一个</strong>标签只在那里,因为解析器会自动关闭它在字符串中找不到结束标签的标签。它以某种方式完全错过了[i]and[/b]标签。

这是该类以及它用于设置各种 bbcode 的一个子类。子类基本上只是一个没有行为的数据结构。

<?php
    // beware images can contain any url/any get request. beware of csrf
    class Lev_TextProcessor_Extension_BbCode {

        protected $elements = array();
        protected $openTags = array();

        public function __construct() {
            $this->elements['b'] = new Lev_TextProcessor_Extension_BbCode_Element('<strong>', '</strong>');
            $this->elements['i'] = new Lev_TextProcessor_Extension_BbCode_Element('<em>', '</em>');
            $this->elements['u'] = new Lev_TextProcessor_Extension_BbCode_Element('<span style="text-decoration: underline;">', '</span>');
            $this->elements['s'] = new Lev_TextProcessor_Extension_BbCode_Element('<span style="text-decoration: line-through;">', '</span>');
            $this->elements['size'] = new Lev_TextProcessor_Extension_BbCode_Element('<span style="font-size: ', '</span>', 'px;">');
            $this->elements['color'] = new Lev_TextProcessor_Extension_BbCode_Element('<span style="color: ', '</span>', ';">');
            $this->elements['center'] = new Lev_TextProcessor_Extension_BbCode_Element('<div style="text-align: center;">', '</div>', '', true, true, false);
            $this->elements['url'] = new Lev_TextProcessor_Extension_BbCode_Element('<a href="', '</a>', '">');
            $this->elements['email'] = new Lev_TextProcessor_Extension_BbCode_Element('<a href="mailto:', '</a>', '">');
            $this->elements['img'] = new Lev_TextProcessor_Extension_BbCode_Element('<img src="', '" alt="" />', '', false, false, true);
            $this->elements['youtube'] = new Lev_TextProcessor_Extension_BbCode_Element('<object width="400" height="325"><param name="movie" value="http://www.youtube.com/v/{param}"></param><embed src="http://www.youtube.com/v/', '" type="application/x-shockwave-flash" width="400" height="325"></embed></object>', '', false, false, false);
            $this->elements['code'] = new Lev_TextProcessor_Extension_BbCode_Element('<pre><code>', '</code></pre>', '', true, false, false);
        }

        public function processText($input) {
            // pre processing
            $input = htmlspecialchars($input, ENT_NOQUOTES);
            $input = nl2br($input);
            $input = str_replace(array("\n", "\r"), '', $input);
            // start main processing
            $output = '';
            $allow_child_tags = true;
            $allow_child_quotes = true;

            $string_segment = strtok($input, '[');

            do {
                // check content for quotes
                if ($allow_child_quotes === false) {
                    if (strpos($string_segment, '"') === false) {
                        $output .= $string_segment;
                    }
                } else {
                    // add content to output
                    $output .= $string_segment;
                }

                $tag_contents = strtok(']');

                if (strpos($tag_contents, '/') === 0) {
                    // closing tag
                    $tag = substr($tag_contents, 1);
                    if (isset($this->elements[$tag]) === true && array_search($tag, $this->openTags) !== false) {
                        // tag found
                        do {
                            // close tags till matching tag found
                            $last_open_tag = array_pop($this->openTags);
                            $output .= $this->elements[$last_open_tag]->htmlAfter;
                        } while ($last_open_tag !== $tag);
                        $allow_child_tags = true;
                        $allow_child_quotes = true;
                    }
                } else {
                    // opening tag
                    // separate tag name from argument if there is one
                    $equal_pos = strpos($tag_contents, '=');
                    if ($equal_pos === false) {
                        $tag_name = $tag_contents;
                    } else {
                        $tag_name = substr($tag_contents, 0, $equal_pos);
                        $tag_argument = substr($tag_contents, $equal_pos + 1);
                    }
                    if (isset($this->elements[$tag_name]) === true) {
                        // tag found
                        if (($this->elements[$tag_name]->allowParentTags === true || count($this->openTags) === 0) && $allow_child_tags === true) {
                            // add tag to open tag list and set flags
                            $this->openTags[] = $tag_name;
                            $allow_child_tags = $this->elements[$tag_name]->allowChildTags;
                            $allow_child_quotes = $this->elements[$tag_name]->allowChildQuotes;
                            $output .= $this->elements[$tag_name]->htmlBefore;
                            // if argument exists
                            if ($equal_pos !== false) {
                                if (strpos($tag_argument, '"') === false) {
                                    $output .= $tag_argument;
                                }
                                $output .= $this->elements[$tag_name]->htmlCenter;
                            }
                        }
                    }
                }

                $string_segment = strtok('[');
            } while ($string_segment !== false);
            // close left over tags
            while ($tag = array_pop($this->openTags)) {
                $output .= $this->elements[$tag]->htmlAfter;
            }
            return $output;
        }
    }
?>

<?php

    class Lev_TextProcessor_Extension_BbCode_Element {

        public $htmlBefore;
        public $htmlAfter;
        public $htmlCenter;
        public $allowChildQuotes;
        public $allowChildTags;
        public $allowParentTags;

        public function __construct($html_before, $html_after, $html_center = '', $allow_child_quotes = true, $allow_child_tags = true, $allow_parent_tags = true) {
            if ($allow_child_quotes === false && $allow_child_tags === true) throw new Lev_TextProcessor_Exception('You may not allow child tags if you do not allow child quotes.');
            $this->htmlBefore = $html_before;
            $this->htmlAfter = $html_after;
            $this->htmlCenter = $html_center;
            $this->allowChildQuotes = $allow_child_quotes;
            $this->allowChildTags = $allow_child_tags;
            $this->allowParentTags = $allow_parent_tags;
        }
    }
?>

编辑

通过创建以下用于标记化的类来修复。

<?php

    // unlike PHP's strtok() function, this class will not skip over empty tokens.
    class Lev_TextProcessor_Tokenizer {

        protected $string;

        public function __construct($string) {
            $this->string = $string;
        }

        public function getToken($token) {
            $segment_length = strcspn($this->string, $token);
            $token = substr($this->string, 0, $segment_length);
            $this->string = substr($this->string, $segment_length + 1);
            return $token;
        }
    }
?>
4

1 回答 1

0

Although I don't think this is really the solution it seems this is the only way I'm going to get my point across.

It could be something with the way strtok() works that to get the results you want.

Although not perfect I was able to obtain results close to what you were expecting with this:

 <?
 $data1 = strtok('[b][i]test1[/i][/b]','[');
 $data2 = strtok(']');
 $data3 = strtok('[');
 $data4 = strtok(']');
 $data5 = strtok('[');
 $data6 = strtok(']');
 var_dump($data1, $data2,$data3, $data4, $data5, $data6);
 /*
  OUTPUT
    string(2) "b]"
    string(1) "i"
    string(5) "test1"
    string(2) "/i"
    string(3) "/b]"
    bool(false)
 * /
 ?>

As I said it isn't perfect but maybe seeing this will help you on your way to handling this solution. I personally have never handled BBCode with this type parsing instead using preg_match().

于 2011-10-11T19:19:05.830 回答