scripts/experiment-properties/distribution/gaussian_distribution_generator.js
export {generateDistribution};
const TRIAL_LIMIT = 250;
function generateDistribution(correlation, error, size, numsd, mean, sd){
// dynamicallyLoadScript(MATHJS_URL);
var coordinates = {x_values: [], y_values: []};
var overshootSize = size + 20; // Generating > size will guarantee we have
// a distribution of the speciied size later.
var done = true;
// Initialize the points and make sure the correlation is in an acceptable error range.
// Restart from scratch if adjusting for the error value is taking too long.
do {
// Reset coordinates:
coordinates = {x_values: [], y_values: []};
initializePoints(coordinates, correlation, overshootSize, numsd, mean, sd);
done = adjustPointsForError(coordinates, correlation, error, overshootSize, numsd, mean, sd);
} while(done == false);
// Transformation into range [0, 1]
transformPoints(coordinates, mean, sd);
readjustPoints(coordinates, correlation, error, overshootSize, numsd, mean, sd);
// Since this is code ported from the java codebase, coordinates are plotted
// so that origin as at the top left (this makes the distribution negative instead
// of positive). To force it to be positive, we flip the whole distribution
// across y axis, then add 1 to move it into 1st quadrant.
for (let i = 0; i<coordinates.y_values.length; i++) {
coordinates.y_values[i] = coordinates.y_values[i] * (-1) + 1;
}
let prepared_coordinates = prepare_coordinates(coordinates, size);
return prepared_coordinates;
}
/**
* Converts the coordinates into this format for d3:
* [ [x1, y1], [x2, y2] ... [xn, yn] ]
* And samples the distribution for the specified num_points.
*
* @param coordinates { {x_values: [], y_values: []} }
* num_points {integer}
* @return output_coordinates { [x1, y1], [x2, y2] ... }
*/
function prepare_coordinates(coordinates, num_points){
var array = [];
var reorganized_coordinates = [];
for (let i = 0; i < coordinates.x_values.length; i++){
array.push(coordinates.x_values[i]);
array.push(coordinates.y_values[i]);
reorganized_coordinates.push(array);
array = [];
}
var output_coordinates = sample_coordinates(reorganized_coordinates, num_points);
return output_coordinates;
}
/**
* Randomly picks x number of points from the distribution
* where x = num_points.
*
* @param coordinates { [x1, y1], [x2, y2] ... }
* num_points {integer}
* @return output_coordinates with size num_points { [x1, y1], [x2, y2] ... }
*/
function sample_coordinates(coordinates, num_points){
var output_coordinates = [];
for (let i = num_points; i > 0; i-- ) {
var random_coordinate = coordinates.splice(Math.floor(Math.random() * (i + 1)), 1)[0];
output_coordinates.push(random_coordinate);
}
return output_coordinates;
}
function initializePoints(coordinates, correlation, size, numsd, mean, sd){
var xVal;
var x2Val;
var yVal;
for (let i = 0; i < size; i++) {
do {
xVal = random_bm();
x2Val = random_bm();
// formula for generating gaussian distribution: y = p*x + sqrt(1-p^2)*(x2)
yVal = (correlation * xVal) + (Math.sqrt(1 - (correlation * correlation)) * x2Val);
} while (pointNotWithinRequiredStdDevs(xVal, yVal, numsd));
coordinates.x_values.push(xVal);
coordinates.y_values.push(yVal);
}
}
function adjustPointsForError(coordinates, correlation, error, size, numsd, mean, sd) {
var currTrial = 0;
// Try to correct the correlation value, up to a maximum of TRIAL_LIMIT trials.
while (correlationNotWithinError(coordinates, correlation, error) && (currTrial < TRIAL_LIMIT)) {
currTrial += 1;
coordinates.x_values.splice(size - 1, 1);
coordinates.y_values.splice(size - 1, 1);
var x;
var x2;
var y;
do {
x = random_bm();
x2 = random_bm();
y = (correlation * x) + (Math.sqrt(1 - (correlation * correlation)) * x2);
} while (pointNotWithinRequiredStdDevs(x, y, numsd));
coordinates.x_values.push(x);
coordinates.y_values.push(y);
}
return !(correlationNotWithinError(coordinates, correlation, error));
}
// Replaces the points that are outside of the [0,1] range.
function readjustPoints(coordinates, correlation, error, size, numsd, mean, sd) {
var temp_coordinates = {x_values: [], y_values: []};
var max_iterations = 500;
for (let i = 0; i<size; i++) {
if (pointNotWithinRequiredStdDevsLong(coordinates.x_values[i], coordinates.y_values[i], numsd, mean, sd)){
coordinates.x_values.splice(i, 1);
coordinates.y_values.splice(i, 1);
var x;
var x2;
var y;
var temp = 0;
while (temp < max_iterations){
x = random_bm();
x2 = random_bm();
y = (correlation * x) + (Math.sqrt(1 - (correlation * correlation)) * x2);
x = x*sd + mean;
y = y*sd + mean;
if (i > 0) {
temp_coordinates.x_values = coordinates.x_values.slice(0, i);
temp_coordinates.y_values = coordinates.y_values.slice(0, i);
temp_coordinates.x_values[i] = x;
temp_coordinates.y_values[i] = y;
}
if (!pointNotWithinRequiredStdDevsLong(x, y, numsd, mean, sd) && (i > 0) &&
!correlationNotWithinError(temp_coordinates, correlation, error)){
break;
}
temp++;
}
coordinates.x_values[i] = x;
coordinates.y_values[i] = y;
}
}
/*
If, after replacing the points, the correlation is outside of the error thresholds, regenerate the entire
distribution
*/
//if (correlationNotWithinError(coordinates, correlation, error)) {
//coordinates = {x_values: [], y_values: []};
//generateDistribution(correlation, error, size, numsd, mean, sd);
//}
}
function pointNotWithinRequiredStdDevs(x, y, numsd){
return pointNotWithinRequiredStdDevsLong(x, y, numsd, 0, 1);
}
function pointNotWithinRequiredStdDevsLong(x, y, numsd, mean, sd){
var leftThreshold = mean - numsd*sd;
var rightThreshold = mean + numsd*sd;
return (x < leftThreshold) || (x > rightThreshold) || (y < leftThreshold) || (y > rightThreshold);
}
function correlationNotWithinError(coordinates, correlation, error){
return Math.abs(correlation - getPearsonCorrelation(coordinates.x_values, coordinates.y_values)) > error;
}
function transformPoints(coordinates, mean, sd) {
/** Calculate the needed x- and y- scale/translation amount
* by using the formulas for:
* mean change: newMeanX = scaleX*meanX+translateX
* stdev change: newStdevX = scaleX*stdevX
*/
var scaleX = sd/math.std(coordinates.x_values);
var scaleY = sd/math.std(coordinates.y_values);
var translateX = mean - math.mean(coordinates.x_values)*scaleX;
var translateY = mean - math.mean(coordinates.y_values)*scaleY;
scalePoints(coordinates,scaleX,scaleY);
translatePoints(coordinates,translateX,translateY);
}
// Scales the points in a distribution with the given x and y scale values.
function scalePoints(coordinates, scaleX, scaleY) {
for (let i = 0; i<coordinates.x_values.length; i++) {
coordinates.x_values[i] = coordinates.x_values[i] * scaleX;
coordinates.y_values[i] = coordinates.y_values[i] * scaleY;
}
}
// Translates the points in a distribution with the given x and y translate values.
function translatePoints(coordinates, x, y) {
for (let i = 0; i<coordinates.x_values.length; i++) {
coordinates.x_values[i] = coordinates.x_values[i] + x;
coordinates.y_values[i] = 1 - (coordinates.y_values[i] + y);
}
}
// Returns Pearson Correlation Coefficient.
// SOURCE: https://memory.psych.mun.ca/tech/js/correlation.shtml
function getPearsonCorrelation(x, y) {
var shortestArrayLength = 0;
if(x.length == y.length) {
shortestArrayLength = x.length;
} else if(x.length > y.length) {
shortestArrayLength = y.length;
console.error('x has more items in it, the last ' + (x.length - shortestArrayLength) + ' item(s) will be ignored');
} else {
shortestArrayLength = x.length;
console.error('y has more items in it, the last ' + (y.length - shortestArrayLength) + ' item(s) will be ignored');
}
var xy = [];
var x2 = [];
var y2 = [];
for(var i=0; i<shortestArrayLength; i++) {
xy.push(x[i] * y[i]);
x2.push(x[i] * x[i]);
y2.push(y[i] * y[i]);
}
var sum_x = 0;
var sum_y = 0;
var sum_xy = 0;
var sum_x2 = 0;
var sum_y2 = 0;
for(var i=0; i< shortestArrayLength; i++) {
sum_x += x[i];
sum_y += y[i];
sum_xy += xy[i];
sum_x2 += x2[i];
sum_y2 += y2[i];
}
var step1 = (shortestArrayLength * sum_xy) - (sum_x * sum_y);
var step2 = (shortestArrayLength * sum_x2) - (sum_x * sum_x);
var step3 = (shortestArrayLength * sum_y2) - (sum_y * sum_y);
var step4 = Math.sqrt(step2 * step3);
var answer = step1 / step4;
return answer;
}
// SOURCE: https://stackoverflow.com/questions/25582882/javascript-math-random-normal-distribution-gaussian-bell-curve?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
function random_bm() {
var u = 0, v = 0;
while(u === 0) u = Math.random(); //Converting [0,1) to (0,1)
while(v === 0) v = Math.random();
return Math.sqrt( -2.0 * Math.log( u ) ) * Math.cos( 2.0 * Math.PI * v );
}
function dynamicallyLoadScript(url) {
var script = document.createElement("script"); // Make a script DOM node
script.src = url; // Set it's src to the provided URL
document.head.appendChild(script); // Add it to the end of the head section of the page (could change 'head' to 'body' to add it to the end of the body section instead)
}