From 79f3b318d03d3f3d9844829f02e7448c946fbb9f Mon Sep 17 00:00:00 2001 From: Sam Smith Date: Tue, 25 Apr 2017 12:26:49 +0100 Subject: [PATCH] Track and discard duplicate enqueued events The eventLogging change listener is responsible for ensuring that the internal state of Page Previews matches its external state (that perceived by the user and UA). It does this by logging events with ext.eventLog.Schema#log. This makes it the perfect place to track and discard duplicate events enqueued by the Page Previews codebase observed in T161769. Make the change listener track events that it's logged by storing hashes of the dynamic parts of them in memory. If the eventLogging change listener sees the same event more than once, then it discards it and increments a counter in StatsD. This behaviour should be enabled for a matter of days as we should see whether the duplicate events are being enqueued by the Page Previews codebase immediately. Bug: T163198 Change-Id: I6a38a2619d777a76dd45eb7300079e1f07b07b12 --- resources/dist/index.js | Bin 122431 -> 124358 bytes resources/dist/index.js.map | Bin 156968 -> 159394 bytes src/changeListeners/eventLogging.js | 88 ++++++++-- src/index.js | 2 +- .../changeListeners/eventLogging.test.js | 153 ++++++++++++++---- 5 files changed, 201 insertions(+), 42 deletions(-) diff --git a/resources/dist/index.js b/resources/dist/index.js index f4ce82e6e1a3f8000579e088380f049d48e6d44c..f45e17e2129bf719451b0bc4b74f4be33fcdb47e 100644 GIT binary patch delta 1786 zcmah}OK%%h6qXU9Or#c6XtLxaNR6F1Gnts$O;e{yX)97e5-H&!!lTzS$DWJFbIsg4 zPOT_-$GT!vtZvz{K`1FgJPJDyVnO={*s(z@;0GXa?wv_UKn2Mf&%JZbcOKt&&bMEm z`0mFOpT4&@c!9|PC8z*Fs+dPDNTh!1i^|+ggDbeU`pS&UfQ2m)a}{*#5tyPMEJ;KN zlM-Y4t8S$_S9QzaIIz}iN>tEbl1qEUvN)N{E|61G#88CnNPide z7IWuX5U}`p;p4K>2&DzW&5fq&;8tt3TGgFj(Bao(-a(4H zOA+%X$@5Jo~;iFFuiM~odRjoGT*1#MZ`kFA-|jI0>#%2IC-n|Sgksry%ry>xbx?S z-xLZTra$l3($PDohaW#uIGH~9>5V1n1=wL_P%-n`nMSk%m1twMDJX9A66=!8c6Q4o z4Bf=zx9RWSkEYk}Ka~#eE=_0?%eb1iHukkRE#4$?jvM0Aw?7(t-B=K&xv@zu?G@bw zX69s5NB-OGcp_U_V(=!^>vhUG!Ccms$-g+9jS8cKhqT-vPnN;WM6OkwC3{%W5qJY` z(jei#Ov8D6Bhd&FO+@_)i{P^8`y`8^VbJG;M%V`*72_fLC1Au)p9YrnOaTc^Qr$6e zcDU6fS?8NW36dN4+JvBcO;XDjkMX>ySDvJuCq+|jvzXW+RUzu-X-h>6;!Q6Yu&9+S zMCQ29ovT(Vi+Wi&P8Qy_kDgZa{Fyl+@v*o~Q`-k*vQM1`-sy%$lH=npITN46+Tk9% zzOlASQFU=*NaDZx7>TZs*GU!(=1yPZX*g>O`Bc!vgMPraZ=18I2i5*Zn#9240V z=&1k6_u51FzN$CH5xxOK;*pl_KXiN;BIi8)_3pI^%WOHTLG$S^8(~V@ELCR<&Up1fs?Jy%J-LQ<+iPJVj&Q zk}6NkEXmAES4hh*QYg;PO`W`RtHR{Wp8}KD9}=9bvO{h9Gebt<$$2OAr;8tFbmx1y`2ui)?8eioSA6~8W7VJG!?2RC+~4%;^LgVZjb!*$&VRTCM#?b Q*!*`-bHw(3QAYC%0P`nRA^-pY diff --git a/resources/dist/index.js.map b/resources/dist/index.js.map index 183729eaababd82f454931fedf273cd9f464f372..e45d82106b35848c0b28a4964deb312a26c83545 100644 GIT binary patch delta 2109 zcma)7O>9(E6wYlZP^2jZ+FC>C87MN-PT%x(WTvH^%4;o9!%s~C6DiU=Gk0b#ee>S* z-8(H4LfVD8G~i3T#HDUrn1~WXqKT2MiP42yLSo#w(zqkWpL6g0Bo#0&-fQ2v_nh;c z^L^*c$KQ5*_^{*FiTx~EB^30^;%$8>L+ljBDWz|@MnGmKTAq?7Kr7Ma7dc!L(c81H5_VAry z=!~!9&2@A0`b_9_4TsCIUVMf^c)dTY!1nOBM0+0wopa)R)^WH=aBX%{o5JoRJtQ5j z&L0Tx?i&mt)*g;7?F#Rn8Vv7tbceTgJr{o3*YjR%08{V3cl+p1vrnz=-qkgW-21|P z`}2*{RFs$iSz;h$z`Y_=g??*8a%?2aC7e8WX~d>L-J&0GSt?sg9t`ORP6)=JBxU7z zBEbc^SUJC($d!Dr=qDu0l@j<}Nff-b#Nm3Bf4f=#%dbKMTGp;=^A_pZUI(iD<83|5 z;51=YJ;93}%LDf$E3!b}Pb_(bs+W`8_W)7YG&vS+))|8vOWu-93;`1^`i$hcpjnrJ z=j*c9xnLL|y#4#W17mh7WhYZ(DLW2{1e`Av1e1`Zf(uJ^-y!|iAOoY5z+tKp-Ex^% zl%+`+9_G-3?&TP8dDM_@_&`_{AG??3E|)d% z3oy6FJPGR80Wz=;T)$YvwlZxKaYMrqAjf)OopPzQQt$&1ewmpgAIKrK*C9O{)D=D! z>SK5))n$gEsp|ssR;#SqifO?-+6|O^wc_#|m8_l)xPZph5iQApA#7@*LC$wwE;O^| z$kXR0#_|F!vW4IjTjOk9Sf-XUSOY?}UTMZR)@2M*MYRMv5B4QsWvmf#t#JyM8}Za@ zf}FG$QR!&FT3y#)Z&pq7d2E62Q5Pej=XH5pL(ot7;Bjn6IyKSwKe{bxPmHTRNj`Z0 zG1;xYiIVN=(KOkiz61iQy|Znv>Zp=V_184HIwx`^R;F=~0nM#6^|M9;kp2quF8GMX zJ4-95x*Io<5iJ_s;jIU1V;kvLXCvedt8r&Z8voYU3ae=e8Gl}YC`1g&>VZuTWzAL0 zU`!TH$jZEK6*bdzLF3r!yA|5yI#jp)sRuKDEx&<^iW=_oJnPmV&m`q8%g2Bstu-8R z9Ef_0FrdJ01ju%4847f0>>cT&R(Wm(gJ?2Pr}ahEa-PzwQsAx>enGC&07)$J4UaLs zcP$~2Yjz!1f>*3BQkt+|I+jW%r*Y%zt|f4t)&GKJJ!zZQGi`*)D^&fnC?4}sh*Aa4N zC$`V%M|B<|ht*qYaxU~qm-^~E(x-knP4+aucB{&zwoWyWY0KP)!{p(Wt~k<2rC7V5 OXORW>|J)?QKmP-P%*(|9 delta 357 zcmZ2tr2u z-pK+XER)kzq$Y3RoIOt%4^*;{hS+<`1HR@jILZI`3gy?3OV`d>C^3%8CA4n@)Wcbf;01y zQx!5x)Qc5LGExKTAgEfHkr=}>Rf`p58 zK$@l>RAw|41xuDEW|m~;r7NW67bz6y=cZ2QR$)|{K6fo6$8-Y^M#brUYK#g~+nI!? z*)tkVms4jHnSR!T(O~)lbw(F)>zF(sEQu+}OjFQ+n6IFzP(58+o6%=_f+tYz1Z_s4 W>Fu73q1!)bGgicHFYjX1egXi4GIO8+ diff --git a/src/changeListeners/eventLogging.js b/src/changeListeners/eventLogging.js index 8f19d970a..861f2135c 100644 --- a/src/changeListeners/eventLogging.js +++ b/src/changeListeners/eventLogging.js @@ -1,26 +1,94 @@ var $ = jQuery; +/** + * Hashes the string using the 32-bit FNV-1a algorithm. + * + * @see http://isthe.com/chongo/tech/comp/fnv/#FNV-1a + * @see http://isthe.com/chongo/tech/comp/fnv/#FNV-param + * + * @param {String} string + * @return {Number} A 32-bit unsigned integer + */ +function fnv1a32( string ) { + /* eslint-disable no-bitwise */ + + var result = 2166136261, // Offset basis. + i = 0; + + for ( i = 0; i < string.length; ++i ) { + result ^= string.charCodeAt( i ); + result *= 16777619; // Prime. + } + + return result >>> 0; + /* eslint-enable no-bitwise */ +} + /** * Creates an instance of the event logging change listener. * - * When an event is enqueued to be logged it'll be logged using the schema. - * Since it's the responsibility of EventLogging (and the UA) to deliver - * logged events, the `EVENT_LOGGED` is immediately dispatched rather than - * waiting for some indicator of completion. + * When an event is enqueued it'll be logged using the schema. Since it's the + * responsibility of Event Logging (and the UA) to deliver logged events, + * `EVENT_LOGGED` is immediately dispatched rather than waiting for some + * indicator of completion. + * + * This change listener also stores hashes of all enqueued events. If a + * duplicate event is queued - there's a hash collision - then the + * `PagePreviews.EventLogging.DuplicateEvent` counter is incremented via [the + * "StatsD timers and counters" analytics event protocol][0]. + * + * See the following for additional context: + * + * * https://phabricator.wikimedia.org/T161769 + * * https://phabricator.wikimedia.org/T163198 + * + * [0]: https://github.com/wikimedia/mediawiki-extensions-WikimediaEvents/blob/master/modules/ext.wikimediaEvents.statsd.js * * @param {Object} boundActions * @param {mw.eventLog.Schema} schema + * @param {ext.popups.EventTracker} track * @return {ext.popups.ChangeListener} */ -module.exports = function ( boundActions, schema ) { +module.exports = function ( boundActions, schema, track ) { + var tokenToSeenMap = {}, + hashToSeenMap = {}; + return function ( _, state ) { var eventLogging = state.eventLogging, - event = eventLogging.event; + event = eventLogging.event, + token, + key; - if ( event ) { - schema.log( $.extend( true, {}, eventLogging.baseData, event ) ); - - boundActions.eventLogged(); + if ( !event ) { + return; } + + token = event.linkInteractionToken; + + if ( tokenToSeenMap[ token ] === true ) { + track( 'counter.PagePreviews.EventLogging.DuplicateToken', 1 ); + } + + tokenToSeenMap[ token ] = true; + + // Use 32-bit FNV-1a based on Ian Boyd's (incredibly detailed) analysis of + // several algorithms designed to quickly hash a string + // . + // + // ... + // + // It's also remarkably easy to implement!!1 + key = fnv1a32( JSON.stringify( event ) ).toString( 16 ); + + // Has the event been seen before? + if ( hashToSeenMap[ key ] === true ) { + track( 'counter.PagePreviews.EventLogging.DuplicateEvent', 1 ); + } else { + hashToSeenMap[ key ] = true; + + schema.log( $.extend( true, {}, eventLogging.baseData, event ) ); + } + + boundActions.eventLogged(); }; }; diff --git a/src/index.js b/src/index.js index 51245574c..4f5cfcb0c 100644 --- a/src/index.js +++ b/src/index.js @@ -142,7 +142,7 @@ mw.requestIdleCallback( function () { // Load EventLogging schema if possible... mw.loader.using( 'ext.eventLogging.Schema' ).done( function () { schema = createSchema( mw.config, window ); - registerChangeListener( store, changeListeners.eventLogging( boundActions, schema ) ); + registerChangeListener( store, changeListeners.eventLogging( boundActions, schema, statsvTracker ) ); } ); boundActions.boot( diff --git a/tests/node-qunit/changeListeners/eventLogging.test.js b/tests/node-qunit/changeListeners/eventLogging.test.js index 9fd224529..bc02b1cc0 100644 --- a/tests/node-qunit/changeListeners/eventLogging.test.js +++ b/tests/node-qunit/changeListeners/eventLogging.test.js @@ -10,13 +10,25 @@ QUnit.module( 'ext.popups/eventLogging', { log: this.sandbox.spy() }; + this.track = this.sandbox.spy(); + this.changeListener = eventLogging( this.boundActions, - this.schema + this.schema, + this.track ); } } ); +function createState( baseData, event ) { + return { + eventLogging: { + baseData: baseData, + event: event + } + }; +} + QUnit.test( 'it should log the queued event', function ( assert ) { var baseData, state; @@ -28,14 +40,9 @@ QUnit.test( 'it should log the queued event', function ( assert ) { baz: 'qux' }; - state = { - eventLogging: { - baseData: baseData, - event: { - action: 'pageLoaded' - } - } - }; + state = createState( baseData, { + action: 'pageLoaded' + } ); this.changeListener( undefined, state ); @@ -49,31 +56,115 @@ QUnit.test( 'it should log the queued event', function ( assert ) { ); } ); -QUnit.test( - 'it should call the eventLogged bound action creator', - function ( assert ) { - var state = { - eventLogging: { - baseData: {}, - event: undefined - } - }; +QUnit.test( 'it should call the eventLogged bound action creator', function ( assert ) { + var state = createState( {}, undefined ); - this.changeListener( undefined, state ); + this.changeListener( undefined, state ); - assert.notOk( - this.boundActions.eventLogged.called, - 'It shouldn\'t call the eventLogged bound action creator if there\'s no queued event.' - ); + assert.notOk( + this.boundActions.eventLogged.called, + 'It shouldn\'t call the eventLogged bound action creator if there\'s no queued event.' + ); - // --- + // --- - state.eventLogging.event = { - action: 'pageLoaded' - }; + state.eventLogging.event = { + action: 'pageLoaded' + }; - this.changeListener( undefined, state ); + this.changeListener( undefined, state ); - assert.ok( this.boundActions.eventLogged.called ); - } -); + assert.ok( this.boundActions.eventLogged.called ); +} ); + +QUnit.test( 'it should handle duplicate events', function ( assert ) { + var state, + nextState; + + state = nextState = createState( undefined, { + action: 'dwelledButAbandoned', + linkInteractionToken: '1234567890', + totalInteractionTime: 48 + } ); + + this.changeListener( undefined, state ); + this.changeListener( state, nextState ); + + assert.ok( this.track.calledTwice ); + assert.deepEqual( + this.track.getCall( 0 ).args, + [ + 'counter.PagePreviews.EventLogging.DuplicateToken', + 1 + ] + ); + assert.deepEqual( + this.track.getCall( 1 ).args, + [ + 'counter.PagePreviews.EventLogging.DuplicateEvent', + 1 + ], + 'It should increment the duplicate token and event counters.' + ); + + assert.notOk( + this.schema.log.calledTwice, + 'It shouldn\'t log the event.' + ); + + // --- + + nextState = createState( { + action: 'dwelledButAbandoned', + linkInteractionToken: '0987654321', + totalInteractionTime: 16 + } ); + + this.changeListener( state, nextState ); + + assert.notOk( + this.track.calledThrice, + 'The counter isn\'t incremented if the event isn\'t a duplicate' + ); +} ); + +QUnit.test( 'it should handle no event being logged', function ( assert ) { + var state; + + state = createState( undefined ); + + this.changeListener( undefined, state ); + this.changeListener( state, state ); + + assert.ok( this.track.notCalled ); +} ); + +QUnit.test( 'it should handle duplicate tokens', function ( assert ) { + var state, + nextState; + + state = createState( undefined, { + action: 'opened', + linkInteractionToken: '1234567890', + totalInteractionTime: 48 + } ); + + nextState = createState( undefined, { + action: 'dwelledButAbandoned', + linkInteractionToken: '1234567890', + totalInteractionTime: 96 + } ); + + this.changeListener( undefined, state ); + this.changeListener( state, nextState ); + + assert.ok( this.track.calledOnce ); + assert.deepEqual( + this.track.getCall( 0 ).args, + [ + 'counter.PagePreviews.EventLogging.DuplicateToken', + 1 + ], + 'It should increment the duplicate token counter.' + ); +} );