2012-05-23 10:35:00 +00:00
/ * *
* A very basic parser / serializer web service .
* /
var express = require ( 'express' ) ,
html5 = require ( 'html5' ) ;
var mp = '../modules/parser/' ;
var ParserPipelineFactory = require ( mp + 'mediawiki.parser.js' ) . ParserPipelineFactory ,
ParserEnv = require ( mp + 'mediawiki.parser.environment.js' ) . MWParserEnvironment ,
WikitextSerializer = require ( mp + 'mediawiki.WikitextSerializer.js' ) . WikitextSerializer ;
var env = new ParserEnv ( {
// fetch templates from enwiki for now..
wgScript : 'http://en.wikipedia.org/w' ,
// stay within the 'proxied' content, so that we can click around
wgScriptPath : '' , //http://en.wikipedia.org/wiki',
wgScriptExtension : '.php' ,
// XXX: add options for this!
wgUploadPath : 'http://upload.wikimedia.org/wikipedia/commons' ,
fetchTemplates : true ,
// enable/disable debug output using this switch
debug : false ,
trace : false ,
maxDepth : 40
} ) ;
var parserPipelineFactory = new ParserPipelineFactory ( env ) ;
//var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' );
var app = express . createServer ( ) ;
app . use ( express . bodyParser ( ) ) ;
app . get ( '/' , function ( req , res ) {
2012-05-24 12:58:35 +00:00
res . write ( '<body><strong>Welcome to the alpha test web service for the ' +
2012-05-24 13:10:03 +00:00
'<a href="http://www.mediawiki.org/wiki/Parsoid">Parsoid project<a>.</strong>' ) ;
2012-05-24 12:58:35 +00:00
res . write ( '<p>Usage: <ul><li>GET /title for the DOM' ) ;
2012-05-23 10:35:00 +00:00
res . write ( '<li>POST a DOM as parameter "content" to /title for the wikitext</ul>' ) ;
2012-05-24 13:10:03 +00:00
res . write ( 'You can browse the English Wikipedia, for example at ' +
'<strong><a href="/Main_Page">Main Page</a></strong>. ' ) ;
res . end ( 'There are also forms for experiments: <strong><a href="/_wikitext/">WikiText -> HTML DOM</a></strong> and <strong><a href="/_html/">HTML DOM -> WikiText</a></strong>.' ) ;
2012-05-23 10:35:00 +00:00
} ) ;
2012-05-23 15:50:35 +00:00
var htmlSpecialChars = function ( s ) {
return s . replace ( /&/g , '&' )
. replace ( /</g , '<' )
. replace ( /"/g , '"' )
. replace ( /'/g , ''' ) ;
} ;
2012-05-23 10:35:00 +00:00
var textarea = function ( res , content ) {
res . write ( '<form method=POST><textarea name="content" cols=90 rows=9>' ) ;
res . write ( ( content &&
2012-05-23 15:50:35 +00:00
htmlSpecialChars ( content ) ) ||
2012-05-23 10:35:00 +00:00
'' ) ;
res . write ( '</textarea><br><input type="submit"></form>' ) ;
} ;
/ * *
* Form - based HTML DOM - > wikitext interface for manual testing
* /
app . get ( /\/_html\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
res . write ( "Your HTML DOM:" ) ;
textarea ( res ) ;
res . end ( '' ) ;
} ) ;
app . post ( /\/_html\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
var p = new html5 . Parser ( ) ;
2012-05-23 15:50:35 +00:00
p . parse ( '<html><body>' + req . body . content + '</body></html>' ) ;
2012-05-23 10:35:00 +00:00
res . write ( '<pre style="background-color: #efefef">' ) ;
new WikitextSerializer ( { env : env } ) . serializeDOM (
p . tree . document . childNodes [ 0 ] . childNodes [ 1 ] ,
2012-05-23 15:50:35 +00:00
function ( c ) {
res . write ( htmlSpecialChars ( c ) ) ;
} ) ;
2012-05-23 10:35:00 +00:00
res . write ( '</pre>' ) ;
res . write ( "<hr>Your HTML DOM:" ) ;
textarea ( res , req . body . content ) ;
res . end ( '' ) ;
} ) ;
/ * *
* Form - based wikitext - > HTML DOM interface for manual testing
* /
app . get ( /\/_wikitext\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
res . write ( "Your wikitext:" ) ;
textarea ( res ) ;
res . end ( '' ) ;
} ) ;
app . post ( /\/_wikitext\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
var parser = parserPipelineFactory . makePipeline ( 'text/x-mediawiki/full' ) ;
parser . on ( 'document' , function ( document ) {
res . write ( document . body . innerHTML ) ;
//res.write('<form method=POST><input name="content"></form>');
//res.end("hello world\n" + req.method + ' ' + req.params.title);
res . write ( "<hr>Your wikitext:" ) ;
textarea ( res , req . body . content ) ;
res . end ( '' ) ;
} ) ;
try {
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
console . log ( 'starting parsing of ' + req . params [ 0 ] ) ;
// FIXME: This does not handle includes or templates correctly
parser . process ( req . body . content ) ;
} catch ( e ) {
console . log ( e ) ;
res . write ( e ) ;
}
} ) ;
/ * *
* Regular article parsing
* /
app . get ( /\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-23 15:17:54 +00:00
if ( env . pageName === 'favicon.ico' ) {
res . end ( 'no favicon yet..' ) ;
return ;
}
2012-05-23 10:35:00 +00:00
var parser = parserPipelineFactory . makePipeline ( 'text/x-mediawiki/full' ) ;
parser . on ( 'document' , function ( document ) {
res . end ( document . body . innerHTML ) ;
//res.write('<form method=POST><input name="content"></form>');
//res.end("hello world\n" + req.method + ' ' + req.params.title);
} ) ;
try {
2012-05-23 16:11:51 +00:00
res . setHeader ( 'Content-Type' , 'text/html; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
console . log ( 'starting parsing of ' + req . params [ 0 ] ) ;
// FIXME: This does not handle includes or templates correctly
parser . process ( '{{:' + req . params [ 0 ] + '}}' ) ;
} catch ( e ) {
console . log ( e ) ;
res . end ( e ) ;
textarea ( res , req . body . content ) ;
}
} ) ;
/ * *
* Regular article serialization using POST
* /
app . post ( /\/(.*)/ , function ( req , res ) {
2012-05-23 14:43:45 +00:00
env . pageName = req . params [ 0 ] ;
2012-05-24 08:18:41 +00:00
res . setHeader ( 'Content-Type' , 'text/x-mediawiki; charset=UTF-8' ) ;
2012-05-23 10:35:00 +00:00
var p = new html5 . Parser ( ) ;
p . parse ( req . body . content ) ;
new WikitextSerializer ( { env : env } ) . serializeDOM (
p . tree . document . childNodes [ 0 ] . childNodes [ 1 ] ,
res . write . bind ( res ) ) ;
res . end ( '' ) ;
} ) ;
module . exports = app ;