Track and discard duplicate enqueued events

The eventLogging change listener is responsible for ensuring that the
internal state of Page Previews matches its external state (that
perceived by the user and UA). It does this by logging events with
ext.eventLog.Schema#log. This makes it the perfect place to track and
discard duplicate events enqueued by the Page Previews codebase observed
in T161769.

Make the change listener track events that it's logged by storing hashes
of the dynamic parts of them in memory. If the eventLogging change
listener sees the same event more than once, then it discards it and
increments a counter in StatsD.

This behaviour should be enabled for a matter of days as we should see
whether the duplicate events are being enqueued by the Page Previews
codebase immediately.

Bug: T163198
Change-Id: I6a38a2619d777a76dd45eb7300079e1f07b07b12
This commit is contained in:
Sam Smith 2017-04-25 12:26:49 +01:00
parent 3d0899c0a0
commit 79f3b318d0
5 changed files with 201 additions and 42 deletions

Binary file not shown.

Binary file not shown.

View file

@ -1,26 +1,94 @@
var $ = jQuery;
/**
* Hashes the string using the 32-bit FNV-1a algorithm.
*
* @see http://isthe.com/chongo/tech/comp/fnv/#FNV-1a
* @see http://isthe.com/chongo/tech/comp/fnv/#FNV-param
*
* @param {String} string
* @return {Number} A 32-bit unsigned integer
*/
function fnv1a32( string ) {
/* eslint-disable no-bitwise */
var result = 2166136261, // Offset basis.
i = 0;
for ( i = 0; i < string.length; ++i ) {
result ^= string.charCodeAt( i );
result *= 16777619; // Prime.
}
return result >>> 0;
/* eslint-enable no-bitwise */
}
/**
* Creates an instance of the event logging change listener.
*
* When an event is enqueued to be logged it'll be logged using the schema.
* Since it's the responsibility of EventLogging (and the UA) to deliver
* logged events, the `EVENT_LOGGED` is immediately dispatched rather than
* waiting for some indicator of completion.
* When an event is enqueued it'll be logged using the schema. Since it's the
* responsibility of Event Logging (and the UA) to deliver logged events,
* `EVENT_LOGGED` is immediately dispatched rather than waiting for some
* indicator of completion.
*
* This change listener also stores hashes of all enqueued events. If a
* duplicate event is queued - there's a hash collision - then the
* `PagePreviews.EventLogging.DuplicateEvent` counter is incremented via [the
* "StatsD timers and counters" analytics event protocol][0].
*
* See the following for additional context:
*
* * https://phabricator.wikimedia.org/T161769
* * https://phabricator.wikimedia.org/T163198
*
* [0]: https://github.com/wikimedia/mediawiki-extensions-WikimediaEvents/blob/master/modules/ext.wikimediaEvents.statsd.js
*
* @param {Object} boundActions
* @param {mw.eventLog.Schema} schema
* @param {ext.popups.EventTracker} track
* @return {ext.popups.ChangeListener}
*/
module.exports = function ( boundActions, schema ) {
module.exports = function ( boundActions, schema, track ) {
var tokenToSeenMap = {},
hashToSeenMap = {};
return function ( _, state ) {
var eventLogging = state.eventLogging,
event = eventLogging.event;
event = eventLogging.event,
token,
key;
if ( event ) {
schema.log( $.extend( true, {}, eventLogging.baseData, event ) );
boundActions.eventLogged();
if ( !event ) {
return;
}
token = event.linkInteractionToken;
if ( tokenToSeenMap[ token ] === true ) {
track( 'counter.PagePreviews.EventLogging.DuplicateToken', 1 );
}
tokenToSeenMap[ token ] = true;
// Use 32-bit FNV-1a based on Ian Boyd's (incredibly detailed) analysis of
// several algorithms designed to quickly hash a string
// <https://softwareengineering.stackexchange.com/a/145633>.
//
// ...
//
// It's also remarkably easy to implement!!1
key = fnv1a32( JSON.stringify( event ) ).toString( 16 );
// Has the event been seen before?
if ( hashToSeenMap[ key ] === true ) {
track( 'counter.PagePreviews.EventLogging.DuplicateEvent', 1 );
} else {
hashToSeenMap[ key ] = true;
schema.log( $.extend( true, {}, eventLogging.baseData, event ) );
}
boundActions.eventLogged();
};
};

View file

@ -142,7 +142,7 @@ mw.requestIdleCallback( function () {
// Load EventLogging schema if possible...
mw.loader.using( 'ext.eventLogging.Schema' ).done( function () {
schema = createSchema( mw.config, window );
registerChangeListener( store, changeListeners.eventLogging( boundActions, schema ) );
registerChangeListener( store, changeListeners.eventLogging( boundActions, schema, statsvTracker ) );
} );
boundActions.boot(

View file

@ -10,13 +10,25 @@ QUnit.module( 'ext.popups/eventLogging', {
log: this.sandbox.spy()
};
this.track = this.sandbox.spy();
this.changeListener = eventLogging(
this.boundActions,
this.schema
this.schema,
this.track
);
}
} );
function createState( baseData, event ) {
return {
eventLogging: {
baseData: baseData,
event: event
}
};
}
QUnit.test( 'it should log the queued event', function ( assert ) {
var baseData,
state;
@ -28,14 +40,9 @@ QUnit.test( 'it should log the queued event', function ( assert ) {
baz: 'qux'
};
state = {
eventLogging: {
baseData: baseData,
event: {
action: 'pageLoaded'
}
}
};
state = createState( baseData, {
action: 'pageLoaded'
} );
this.changeListener( undefined, state );
@ -49,31 +56,115 @@ QUnit.test( 'it should log the queued event', function ( assert ) {
);
} );
QUnit.test(
'it should call the eventLogged bound action creator',
function ( assert ) {
var state = {
eventLogging: {
baseData: {},
event: undefined
}
};
QUnit.test( 'it should call the eventLogged bound action creator', function ( assert ) {
var state = createState( {}, undefined );
this.changeListener( undefined, state );
this.changeListener( undefined, state );
assert.notOk(
this.boundActions.eventLogged.called,
'It shouldn\'t call the eventLogged bound action creator if there\'s no queued event.'
);
assert.notOk(
this.boundActions.eventLogged.called,
'It shouldn\'t call the eventLogged bound action creator if there\'s no queued event.'
);
// ---
// ---
state.eventLogging.event = {
action: 'pageLoaded'
};
state.eventLogging.event = {
action: 'pageLoaded'
};
this.changeListener( undefined, state );
this.changeListener( undefined, state );
assert.ok( this.boundActions.eventLogged.called );
}
);
assert.ok( this.boundActions.eventLogged.called );
} );
QUnit.test( 'it should handle duplicate events', function ( assert ) {
var state,
nextState;
state = nextState = createState( undefined, {
action: 'dwelledButAbandoned',
linkInteractionToken: '1234567890',
totalInteractionTime: 48
} );
this.changeListener( undefined, state );
this.changeListener( state, nextState );
assert.ok( this.track.calledTwice );
assert.deepEqual(
this.track.getCall( 0 ).args,
[
'counter.PagePreviews.EventLogging.DuplicateToken',
1
]
);
assert.deepEqual(
this.track.getCall( 1 ).args,
[
'counter.PagePreviews.EventLogging.DuplicateEvent',
1
],
'It should increment the duplicate token and event counters.'
);
assert.notOk(
this.schema.log.calledTwice,
'It shouldn\'t log the event.'
);
// ---
nextState = createState( {
action: 'dwelledButAbandoned',
linkInteractionToken: '0987654321',
totalInteractionTime: 16
} );
this.changeListener( state, nextState );
assert.notOk(
this.track.calledThrice,
'The counter isn\'t incremented if the event isn\'t a duplicate'
);
} );
QUnit.test( 'it should handle no event being logged', function ( assert ) {
var state;
state = createState( undefined );
this.changeListener( undefined, state );
this.changeListener( state, state );
assert.ok( this.track.notCalled );
} );
QUnit.test( 'it should handle duplicate tokens', function ( assert ) {
var state,
nextState;
state = createState( undefined, {
action: 'opened',
linkInteractionToken: '1234567890',
totalInteractionTime: 48
} );
nextState = createState( undefined, {
action: 'dwelledButAbandoned',
linkInteractionToken: '1234567890',
totalInteractionTime: 96
} );
this.changeListener( undefined, state );
this.changeListener( state, nextState );
assert.ok( this.track.calledOnce );
assert.deepEqual(
this.track.getCall( 0 ).args,
[
'counter.PagePreviews.EventLogging.DuplicateToken',
1
],
'It should increment the duplicate token counter.'
);
} );