import { aggregate } from "./aggregate.js";
import {
sum,
min,
max,
mode,
median,
mean,
variance,
deviation,
} from "d3-array";
const d3 = Object.assign(
{},
{ sum, min, max, mode, median, mean, variance, deviation }
);
import { isarrayofobjects, isgeojson } from "./helpers/helpers.js";
/**
* @function groupby
* @summary This function allows you to group objects according to an identifier. If the input dataset is a geoJSON, then the geometries are grouped using the aggregate function.
* @param {object|array} data - A GeoJSON FeatureCollection or an array of objects
* @param {object} options - Optional parameters
* @param {array} [options.keys] - Properties to be retained after regrouping. By default, all properties are kept.
* @param {array} [options.operators] - Functions to be applied to each variable. You can enter any function to be applied to an array. You can also enter operators directly: `"all"` (to retrieve all values), `"count"`, `"sum"`, `"min"`, `"max"`, `"median"`, `"mode"`, `"mean"`, `"first"`, `"last"`, `"variance"` and `"deviation"`.
* @param {boolean} [options.mutate = false] - Use `true` to update the input data. With false, you create a new object, but the input object remains the same.
* @returns {object|array} - A GeoJSON FeatureCollection or an array of objects. (it depends on what you've set as `data`).
* @example
* geotoolbox.groupby(*a geojson or an array of objects*, {keys: ["pop", "gdp", "gdppc"], operators:["sum", "sum", "mean"]})
*/
export function groupby(data, { by, keys, operators, mutate = false } = {}) {
let x = data;
if (isgeojson(x) && by !== undefined) {
if (!mutate) {
x = JSON.parse(JSON.stringify(data));
}
const prop =
keys ||
[
...new Set(x?.features.map((d) => Object.keys(d?.properties)).flat()),
].filter((d) => d !== by);
const op = operators || Array(prop.length).fill("all");
const func = new Map(
prop.map((d, i) => [
d,
typeof op[i] === "function" ? op[i] : compute(op[i]),
])
);
const ids = removeempty([
...new Set(x?.features.map((d) => d?.properties[by])),
]);
let features = [];
ids.forEach((categ) => {
const subset = x?.features.filter((d) => d?.properties[by] == categ);
const properties = { [by]: categ };
prop.forEach((p) => {
properties[p] = func.get(p)(subset.map((d) => d?.properties[p]));
});
features.push({
type: "Feature",
properties,
geometry: aggregate({ features: subset }, { id: categ }).features[0]
.geometry,
});
});
x.features = features;
// -------------------------
} else if (isarrayofobjects(x) && by !== undefined) {
const prop =
keys ||
[...new Set(x.map((d) => Object.keys(d)).flat())].filter((d) => d !== by);
const op = operators || Array(prop.length).fill("all");
const func = new Map(
prop.map((d, i) => [
d,
typeof op[i] === "function" ? op[i] : compute(op[i]),
])
);
const ids = removeempty([...new Set(x.map((d) => d[by]))]);
let arr = [];
ids.forEach((categ) => {
const subset = x.filter((d) => d[by] == categ);
const obj = { [by]: categ };
prop.forEach((p) => {
obj[p] = func.get(p)(subset.map((d) => d[p]));
});
arr.push(obj);
});
x = arr;
if (mutate) {
data.splice(0, data.length, ...x);
}
}
return x;
}
// helpers
function compute(op) {
switch (op) {
case "all":
return function (arr) {
return arr;
};
break;
case "count":
return function (arr) {
return arr.length;
};
break;
case "sum":
return function (arr) {
return d3.sum(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "min":
return function (arr) {
return d3.min(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "max":
return function (arr) {
return d3.max(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "mode":
return function (arr) {
return d3.mode(removeempty(arr.map((d) => parseFloat(d))));
};
case "median":
return function (arr) {
return d3.median(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "mean":
return function (arr) {
return d3.mean(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "first":
return function (arr) {
return removeempty(arr)[0];
};
case "last":
return function (arr) {
return removeempty(arr).at(-1);
};
break;
case "variance":
return function (arr) {
return d3.variance(removeempty(arr.map((d) => parseFloat(d))));
};
break;
case "deviation":
return function (arr) {
return d3.deviation(removeempty(arr.map((d) => parseFloat(d))));
};
break;
default:
// all
return arr;
}
}
function removeempty(x) {
return x.filter(
(d) =>
![" ", " ", "", undefined, null, NaN, Infinity, -Infinity].includes(d)
);
}