
165 lines
5.2 KiB
Raw Normal View History

* A very basic parser / serializer web service.
var express = require('express'),
html5 = require('html5');
var mp = '../modules/parser/';
var ParserPipelineFactory = require(mp + 'mediawiki.parser.js').ParserPipelineFactory,
ParserEnv = require(mp + 'mediawiki.parser.environment.js').MWParserEnvironment,
WikitextSerializer = require(mp + 'mediawiki.WikitextSerializer.js').WikitextSerializer;
var env = new ParserEnv( {
// fetch templates from enwiki for now..
wgScript: '',
// stay within the 'proxied' content, so that we can click around
wgScriptPath: '', //',
wgScriptExtension: '.php',
// XXX: add options for this!
wgUploadPath: '',
fetchTemplates: true,
// enable/disable debug output using this switch
debug: false,
trace: false,
maxDepth: 40
} );
var parserPipelineFactory = new ParserPipelineFactory( env );
//var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' );
var app = express.createServer();
app.get('/', function(req, res){
res.write('<body><strong>Welcome to the alpha test web service for the ' +
'<a href="">Parsoid project<a>.</strong>');
res.write( '<p>Usage: <ul><li>GET /title for the DOM');
res.write('<li>POST a DOM as parameter "content" to /title for the wikitext</ul>');
res.write('You can browse the English Wikipedia, for example at '+
'<strong><a href="/Main_Page">Main Page</a></strong>. ');
res.end('There are also forms for experiments: <strong><a href="/_wikitext/">WikiText -&gt; HTML DOM</a></strong> and <strong><a href="/_html/">HTML DOM -&gt; WikiText</a></strong>.');
var htmlSpecialChars = function ( s ) {
return s.replace(/&/g,'&amp;')
var textarea = function ( res, content ) {
res.write('<form method=POST><textarea name="content" cols=90 rows=9>');
res.write( ( content &&
htmlSpecialChars( content) ) ||
res.write('</textarea><br><input type="submit"></form>');
* Form-based HTML DOM -> wikitext interface for manual testing
app.get(/\/_html\/(.*)/, function(req, res){
env.pageName = req.params[0];
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
res.write( "Your HTML DOM:" );
textarea( res );
});\/_html\/(.*)/, function(req, res){
env.pageName = req.params[0];
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
var p = new html5.Parser();
p.parse( '<html><body>' + req.body.content + '</body></html>' );
res.write('<pre style="background-color: #efefef">');
new WikitextSerializer({env: env}).serializeDOM(
function( c ) {
res.write( htmlSpecialChars( c ) );
res.write( "<hr>Your HTML DOM:" );
textarea( res, req.body.content );
* Form-based wikitext -> HTML DOM interface for manual testing
app.get(/\/_wikitext\/(.*)/, function(req, res){
env.pageName = req.params[0];
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
res.write( "Your wikitext:" );
textarea( res );
});\/_wikitext\/(.*)/, function(req, res){
env.pageName = req.params[0];
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' );
parser.on('document', function ( document ) {
//res.write('<form method=POST><input name="content"></form>');
//res.end("hello world\n" + req.method + ' ' + req.params.title);
res.write( "<hr>Your wikitext:" );
textarea( res, req.body.content );
try {
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
console.log('starting parsing of ' + req.params[0]);
// FIXME: This does not handle includes or templates correctly
parser.process( req.body.content );
} catch (e) {
console.log( e );
res.write( e );
* Regular article parsing
app.get(/\/(.*)/, function(req, res){
env.pageName = req.params[0];
if ( env.pageName === 'favicon.ico' ) {
res.end( 'no favicon yet..');
var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' );
parser.on('document', function ( document ) {
//res.write('<form method=POST><input name="content"></form>');
//res.end("hello world\n" + req.method + ' ' + req.params.title);
try {
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
console.log('starting parsing of ' + req.params[0]);
// FIXME: This does not handle includes or templates correctly
parser.process('{{:' + req.params[0] + '}}' );
} catch (e) {
console.log( e );
res.end( e );
textarea( res, req.body.content );
* Regular article serialization using POST
*/\/(.*)/, function(req, res){
env.pageName = req.params[0];
res.setHeader('Content-Type', 'text/x-mediawiki; charset=UTF-8');
var p = new html5.Parser();
p.parse( req.body.content );
new WikitextSerializer({env: env}).serializeDOM(
res.write.bind( res ) );
module.exports = app;