TalkPHP
 
 
Account Login
Latest Articles
» The basic usage of PHPTAL, a XML/XHTML template library for PHP
» Vulnerable methods and the areas they are commonly trusted in.
» Simple way to protect a form from bot
» The Basics On: How Session Stealing Works
» How to keep your forms from double posting data
IRC Channel
IRC Speech Bubble Join the friendly bunch on IRC...
(#TalkPHP on Freenode)

...Also available via a web interface.

See this thread for information on the TalkPHP Free Hugs Initiative™. Subject to availability.
Associates
Associates
CSS Tutorials
Reply
 
LinkBack Thread Tools Search this Thread Display Modes
Old 10-23-2008, 09:11 PM   #1 (permalink)
FSX
The Wanderer
 
FSX's Avatar
 
Join Date: Oct 2008
Posts: 15
Thanks: 0
FSX is on a distinguished road
Default Wiki syntax parser

I'm not sure if I'm posting in the right forum. I'm new here. ^_^

I'm trying to make a parser that parses everything that is described here (except the gadgets): WikiSyntax - support - Google Code - The reference to the wiki syntax for Google Code projects. I've been using some code from the bbcode parser from FluxBB, the textile parser and the markdown parser.

I've got most of it working, except the lists and quotes. Tables are on the 'to do' list.
  • Lists aren't nesting very well.
  • There is a weird bug it the 'parse list' part. If a list is the last thing on a page the last item of the list isn't being parsed.
  • Quotes can't be nested.

I've been trying to get it working, but I can't. If someone can help me with this I would really appreciate it. :)

Here's the parser code:
Code:
<?php

class WikiSyntaxParser
{
    protected $text, $debug, $image_extensions=array('png', 'jpg', 'gif');

    protected $escape_chars = array('\{', '\}', '\*', '\_', '\`', '\^', '\,,', '\~~', '\[', '\]', '\(', '\)');
    protected $escape_chars_entities = array('{', '}', '*', '_', ''', '^', ',,', '~~', '[', ']', '(', ')');

    protected $hashes = array('hash'=>array(), 'html'=>array());
    
    // Tags
    protected $tags = array(
        'hr' => '<hr />'
        );

    // Function execution order
    protected $functions = array(
        'make_safe',
        'hash_code_blocks',
        'escape_chars',
        'hash_links',
        'hash_urls',
        'parse_headers',
        'parse_typeface',
        'parse_divider',
        'parse_lists',
        'parse_quotes',
        'handle_whitespaces',
        'replace_hashes'
        );

    public function __construct()
    {

    }

    public function parse($text)
    {
        // Set input to class var
        $this->text = $text;

        // Execute functions
        foreach ($this->functions as $function)
            $this->text = $this->{$function}($this->text);

        // Add paragraph tag around text, but make sure there are no empty paragraphs
        $this->text = preg_replace('#<br />\s*?<br />((\s*<br />)*)#is', "</p>$1<p>", $this->text);
        $this->text = str_replace('<p><br />', '<p>', $this->text);
        $this->text = str_replace('<p></p>', '', '<p>'.$this->text.'</p>');

        return $this->text;
    }

    /* --- Prepare and cleanup functions
    ------------------------------------------------- */

    protected function make_safe($text)
    {
        return preg_replace('/^[ ]+$/m', '', self::html_encode(self::linebreaks($text)));
    }

    protected function escape_chars($text)
    {
        return str_replace($this->escape_chars, $this->escape_chars_entities, $text);
    }

    protected function handle_whitespaces($text)
    {
        return str_replace(array("\n", "\t", '  ', '  '), array('<br />', '&nbsp; &nbsp; ', '&nbsp; ', ' &nbsp;'), $text);
    }

    protected function replace_hashes($text)
    {
        if (count($this->hashes['hash']) > 0)
            return str_replace($this->hashes['hash'], $this->hashes['html'], $text);
        else
            return $text;
    }

    /* --- Hash functions
    ------------------------------------------------- */

    protected function hash_code_blocks($text)
    {
        $pattern = '/(\{{3})(.*?)(\}{3})/ims';
        return preg_replace_callback($pattern, array(&$this, 'cb_hash_code_blocks'), $text);
    }

    // Makes hashes of links
    protected function hash_links($text)
    {
        $pattern = '/\[(\((rel|class|id)\=(.*?)\))*((http|https|ftp|irc):\/\/[a-zA-Z0-9._%-?&\/:\#]+)[ ](.*?)\]/i';
        return preg_replace_callback($pattern, array(&$this, 'cb_hash_links'), $text);
    }

    // Makes hashes of urls
    protected function hash_urls($text)
    {
        $pattern = '/(\((rel|class|id)\=(.*?)\))*((http|https|ftp|irc):\/\/[a-zA-Z0-9._%-?&\/:\#]+)/i';
        return preg_replace_callback($pattern, array(&$this, 'cb_hash_urls'), $text);
    }

    /* --- Parse functions
    ------------------------------------------------- */

    protected function parse_headers($text)
    {
        return preg_replace_callback('/^(\={1,6})[ ]*(.*?)[ ]*\=*\n+/m', array(&$this, 'cb_parse_headers'), $text);
    }

    protected function parse_typeface($text)
    {
        // Patterns
        $patterns = array(
            '/\*(.*?)\*/i',
            '/_(.*?)_/i',
            '/`(.*?)`/i',
            '/\^(.*?)\^/i',
            '/,,(.*?),,/i',
            '/~~(.*?)~~/i',
            );

        // Replacements
        $replacements = array(
            '<strong>$1</strong>',
            '<em>$1</em>',
            '<code>$1</code>',
            '<sup>$1</sup>',
            '<sub>$1</sub>',
            '<span style="text-decoration: line-through;">$1</span>',
            );

        return preg_replace($patterns, $replacements, $text);
    }

    protected function parse_divider($text)
    {
        return preg_replace('/^(\-{4,})\n+/m', '</p>'.$this->tags['hr'].'<p>', $text);
    }

    protected function parse_quotes($text)
    {
        return preg_replace_callback('/([ ]{4,}|[\t]{1,})(.*?).+\n(.+\n)*\n*\n+/m', array(&$this, 'cb_parse_quotes'), $text); // ~_~
    }

    protected function parse_lists($text)
    {
        return preg_replace_callback('/([ ]{1})([*#]+)[ ](.*?)(.+\n)*/m', array(&$this, 'cb_parse_lists'), $text);
    }
    
    /* --- Callback functions
    ------------------------------------------------- */

    protected function cb_parse_headers($matches)
    {
        $level = strlen($matches[1]);
        return '</p><h'.$level.'>'.$matches[2].'</h'.$level.'><p>';
    }

    protected function cb_hash_code_blocks($matches)
    {
        $code_block_hash = md5($matches[0].mt_rand());
        $attribute = NULL;

        $this->hashes['hash'][] = $code_block_hash;
        $this->hashes['html'][] = '</p><pre><code>'.trim(str_replace("\t", '&nbsp; &nbsp; ', $matches[2]), "\n").'</code></pre><p>';

        return $code_block_hash;
    }

    protected function cb_hash_links($matches)
    {
        $link_hash = md5($matches[0].mt_rand());
        $attribute = NULL;

        $this->hashes['hash'][] = $link_hash;

        if (!empty($matches[2]))
            $attribute = ' '.$matches[2].'="'.$matches[3].'"';

        if (!in_array(self::get_ext($matches[6]), $this->image_extensions))
            $this->hashes['html'][] = '<a href="'.$matches[4].'" title="'.$matches[6].'"'.$attribute.'>'.$matches[6].'</a>';
        else
            $this->hashes['html'][] = '<a href="'.$matches[4].'"'.$attribute.'><img src="'.$matches[6].'" alt="" /></a>';

        return $link_hash;
    }

    protected function cb_hash_urls($matches)
    {
        $url_hash = md5($matches[0].mt_rand());
        $attribute = NULL;

        $this->hashes['hash'][] = $url_hash;

        if (!empty($matches[2]))
            $attribute = ' '.$matches[2].'="'.$matches[3].'"';

        if (!in_array(self::get_ext($matches[4]), $this->image_extensions))
            $this->hashes['html'][] = '<a href="'.$matches[4].'"'.$attribute.'>'.$matches[4].'</a>';
        else
            $this->hashes['html'][] = '<img src="'.$matches[4].'" alt=""'.$attribute.' />';

        return $url_hash;
    }

    protected function cb_parse_quotes($matches)
    {
        $temp_lines = explode("\n", $matches[0]);

        foreach ($temp_lines as $line)
            $lines[] = trim($line);

        return '</p><blockquote><p>'.implode("\n", $lines).'</p></blockquote><p>';
    }
    
    // NOTICE: Nesting lists is currently not possible, because it's breaking my head. O_O
    protected function cb_parse_lists($matches)
    {
        $items = explode("\n", trim($matches[0], "\t\n"));
        $count_items = count($items);

        for ($x=0;$x < $count_items;$x++)
        {
            // Items
            $cur_item = $items[$x];
            $cur_item_trim = trim($cur_item, "* #");
            $next_item = ($x+1) < $count_items ? $items[$x+1] : false;
            
            if (preg_match('/([ ]*)([*#]+)[ ](.*?)/i', $cur_item, $m))
            {
                $cur_item_len = strlen($m[1]);

                $next_item_len = 0;
                $list_el = $m[2] == '*' ? 'ul' : 'ol';

                if (preg_match('/([ ]*)([*#]+)[ ](.*?)/i', $next_item, $nm))
                    $next_item_len = strlen($nm[1]);
                    
                if (!isset($lists[$cur_item_len]))
                {
                    $lists[$cur_item_len] = $cur_item_len;
                    $line = '<'.$list_el.'><li>'.$cur_item_trim;
                }
                else
                {
                    $line = '<li>'.$cur_item_trim;
                }
                
                if($next_item_len <= $cur_item_len)
                    $line .= '</li>';
                
                foreach (array_reverse($lists) as $k => $v)
                {
                    $result[] = $v;
                
                    if($v > $next_item_len) 
                    {
                        $line .= '</'.$list_el.'>';
                        
                        if($v > 1)
                            $line .= '</li>';
                            
                        unset($lists[$v]);
                    }
                }
            }
            
            $output[] = $line;
        }

        return '</p>'.implode($output).'<p>';
        // return '</p>'.implode($output).'<pre>'.print_r($result, true).'</pre><p>';
        // return '</p><pre>'.self::html_encode(print_r($output, true)).'</pre><pre>'.print_r($result, true).'</pre><p>';
    }

    /* --- String manipulation functions
    ------------------------------------------------- */

    // Encode html
    static protected function html_encode($str)
    {
        return htmlspecialchars(trim($str), ENT_QUOTES, 'UTF-8');
    }

    // Convert linebreakes to unix linebreaks
    static protected function linebreaks($str)
    {
        return str_replace(array("\r\n", "\r"), "\n", $str);
    }

    // Get extension from image
    static protected function get_ext($filepath)
    {
        $explode = explode('.', $filepath);

        return $explode[count($explode)-1];
    }
}

?>
How to use it:
Code:
$text = '*Bold* _italic_ ~~strikethrough~~';

require 'parser.php';
$parser = new WikiSyntaxParser;

echo $parser->parse($text);
__________________
61924
FSX is offline  
Reply With Quote
Old 10-24-2008, 05:06 PM   #2 (permalink)
FSX
The Wanderer
 
FSX's Avatar
 
Join Date: Oct 2008
Posts: 15
Thanks: 0
FSX is on a distinguished road
Default

C'mon people. Help me. T_T
__________________
61924
FSX is offline  
Reply With Quote
Reply



Currently Active Users Viewing This Thread: 1 (0 members and 1 guests)
 
Thread Tools Search this Thread
Search this Thread:

Advanced Search
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

vB code is On
Smilies are On
[IMG] code is On
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are On


All times are GMT. The time now is 04:21 PM.

 
     

Powered by vBulletin® Version 3.6.8
Copyright ©2000 - 2013, Jelsoft Enterprises Ltd.
Search Engine Optimization by vBSEO 3.1.0
Inactive Reminders By Icora Web Design