/** * A very basic parser / serializer web service. */ var express = require('express'), jsDiff = require('diff'), html5 = require('html5'); var mp = '../modules/parser/'; var ParserPipelineFactory = require(mp + 'mediawiki.parser.js').ParserPipelineFactory, ParserEnv = require(mp + 'mediawiki.parser.environment.js').MWParserEnvironment, WikitextSerializer = require(mp + 'mediawiki.WikitextSerializer.js').WikitextSerializer, TemplateRequest = require(mp + 'mediawiki.ApiRequest.js').TemplateRequest; var env = new ParserEnv( { // fetch templates from enwiki for now.. wgScript: 'http://en.wikipedia.org/w', // stay within the 'proxied' content, so that we can click around wgScriptPath: '/', //http://en.wikipedia.org/wiki', wgScriptExtension: '.php', // XXX: add options for this! wgUploadPath: 'http://upload.wikimedia.org/wikipedia/commons', fetchTemplates: true, // enable/disable debug output using this switch debug: false, trace: false, maxDepth: 40, interwikis: "en|de|fr|nl|it|pl|es|ru|ja|pt|zh|sv|vi|uk|ca|no|fi|cs|hu|ko|fa|id|tr|ro|ar|sk|eo|da|sr|lt|ms|eu|he|sl|bg|kk|vo|war|hr|hi|et|az|gl|simple|nn|la|th|el|new|roa-rup|oc|sh|ka|mk|tl|ht|pms|te|ta|be-x-old|ceb|br|be|lv|sq|jv|mg|cy|lb|mr|is|bs|yo|an|hy|fy|bpy|lmo|pnb|ml|sw|bn|io|af|gu|zh-yue|ne|nds|ku|ast|ur|scn|su|qu|diq|ba|tt|my|ga|cv|ia|nap|bat-smg|map-bms|wa|kn|als|am|bug|tg|gd|zh-min-nan|yi|vec|hif|sco|roa-tara|os|arz|nah|uz|sah|mn|sa|mzn|pam|hsb|mi|li|ky|si|co|gan|glk|ckb|bo|fo|bar|bcl|ilo|mrj|fiu-vro|nds-nl|tk|vls|se|gv|ps|rue|dv|nrm|pag|koi|pa|rm|km|kv|udm|csb|mhr|fur|mt|wuu|lij|ug|lad|pi|zea|sc|bh|zh-classical|nov|ksh|or|ang|kw|so|nv|xmf|stq|hak|ay|frp|frr|ext|szl|pcd|ie|gag|haw|xal|ln|rw|pdc|pfl|krc|crh|eml|ace|gn|to|ce|kl|arc|myv|dsb|vep|pap|bjn|as|tpi|lbe|wo|mdf|jbo|kab|av|sn|cbk-zam|ty|srn|kbd|lo|ab|lez|mwl|ltg|ig|na|kg|tet|za|kaa|nso|zu|rmy|cu|tn|chr|got|sm|bi|mo|bm|iu|chy|ik|pih|ss|sd|pnt|cdo|ee|ha|ti|bxr|om|ks|ts|ki|ve|sg|rn|dz|cr|lg|ak|tum|fj|st|tw|ch|ny|ff|xh|ng|ii|cho|mh|aa|kj|ho|mus|kr|hz" } ); var parserPipelineFactory = new ParserPipelineFactory( env ); //var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' ); var app = express.createServer(); app.use(express.bodyParser()); app.get('/', function(req, res){ res.write('
Usage:
There are also some tools for experiments:
'); new WikitextSerializer({env: env}).serializeDOM( p.tree.document.childNodes[0].childNodes[1], function( c ) { res.write( htmlSpecialChars( c ) ); }); res.write(''); res.write( "
' + htmlSpecialChars( out ) + '
' + patch); }; var parse = function ( res, cb, src ) { var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' ); parser.on('document', cb.bind( null, res, src ) ); try { res.setHeader('Content-Type', 'text/html; charset=UTF-8'); parser.process( src ); } catch (e) { console.log( e ); res.end( e ); } }; /** * Round-trip article testing */ app.get( new RegExp('/_rt/(?:(?:(?:' + env.interwikis + '):)?(' + env.interwikis + '):)?(.*)') , function(req, res){ env.pageName = req.params[1]; if ( req.params[0] ) { env.wgScriptPath = '/_rt/' + req.params[0] + ':'; env.wgScript = 'http://' + req.params[0] + '.wikipedia.org/w'; } else { // default to English Wikipedia env.wgScriptPath = '/_rt/'; env.wgScript = 'http://en.wikipedia.org/w'; } if ( env.pageName === 'favicon.ico' ) { res.end( 'no favicon yet..' ); return; } var target = env.resolveTitle( env.normalizeTitle( env.pageName ), '' ); console.log('starting parsing of ' + target); var tpr = new TemplateRequest( env, target ); tpr.once('src', parse.bind( null, res, roundTripDiff )); }); /** * Form-based round-tripping for manual testing */ app.get(/\/_rtform\/(.*)/, function(req, res){ env.pageName = req.params[0]; res.setHeader('Content-Type', 'text/html; charset=UTF-8'); res.write( "Your wikitext:" ); textarea( res ); res.end(''); }); app.post(/\/_rtform\/(.*)/, function(req, res){ env.pageName = req.params[0]; res.setHeader('Content-Type', 'text/html; charset=UTF-8'); var parser = parserPipelineFactory.makePipeline( 'text/x-mediawiki/full' ); parse( res, roundTripDiff, req.body.content); }); /** * Regular article parsing */ app.get(new RegExp( '/(?:(?:(?:' + env.interwikis + '):)?(' + env.interwikis + '):)?(.*)' ), function(req, res){ env.pageName = req.params[1]; if ( req.params[0] ) { env.wgScriptPath = '/' + req.params[0] + ':'; env.wgScript = 'http://' + req.params[0] + '.wikipedia.org/w'; } else { // default to English Wikipedia env.wgScriptPath = '/'; env.wgScript = 'http://en.wikipedia.org/w'; } if ( env.pageName === 'favicon.ico' ) { res.end( 'no favicon yet..'); return; } var target = env.resolveTitle( env.normalizeTitle( env.pageName ), '' ); console.log('starting parsing of ' + target); var tpr = new TemplateRequest( env, target ); tpr.once('src', parse.bind( null, res, function ( res, src, document ) { res.end(document.body.innerHTML); })); }); /** * Regular article serialization using POST */ app.post(/\/(.*)/, function(req, res){ env.pageName = req.params[0]; env.wgScriptPath = '/'; res.setHeader('Content-Type', 'text/x-mediawiki; charset=UTF-8'); var p = new html5.Parser(); p.parse( req.body.content ); new WikitextSerializer({env: env}).serializeDOM( p.tree.document.childNodes[0].childNodes[1], res.write.bind( res ) ); res.end(''); }); module.exports = app;