Provide a way to zoom a chart axis to inliers

This utility provides ways to zoom in a chart (more specifically its axes) on inlier data points. The principal use-case is to restore readability to charts that were distorted by outliers. The first (and maybe the last) chart to use this is the daily burnt BSQ chart under 'DAO -> Facts and Figures'.
2025-02-23 23:06:39 +01:00 · 2020-01-18 15:22:55 +02:00 · 2020-01-18 15:22:55 +02:00 · 0775aee6cb
commit 0775aee6cb
parent edec698573
1 changed files with 316 additions and 0 deletions
--- a/desktop/src/main/java/bisq/desktop/util/AxisInlierUtils.java
+++ b/desktop/src/main/java/bisq/desktop/util/AxisInlierUtils.java
@ -0,0 +1,316 @@
+/*
+ * This file is part of Bisq.
+ *
+ * Bisq is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or (at
+ * your option) any later version.
+ *
+ * Bisq is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with Bisq. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package bisq.desktop.util;
+
+import bisq.common.util.DoubleSummaryStatisticsWithStdDev;
+import bisq.common.util.Tuple2;
+
+import javafx.scene.chart.NumberAxis;
+import javafx.scene.chart.XYChart;
+
+import javafx.collections.ListChangeListener;
+
+import java.util.Collections;
+import java.util.DoubleSummaryStatistics;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class AxisInlierUtils {
+
+    /* Returns a ListChangeListener that is meant to be attached to an
+     * ObservableList. On event, it triggers a recalculation of a provided
+     * axis' range so as to zoom in on inliers.
+     */
+    public static ListChangeListener getListenerThatZoomsToInliers(
+            NumberAxis axis,
+            int maxNumberOfTicks,
+            double percentToTrim,
+            double howManyStdDevsConstituteOutlier
+    ) {
+        ListChangeListener listener =
+                change -> {
+                    boolean axisHasBeenInitialized = axis != null;
+                    if (axisHasBeenInitialized) {
+                        zoomToInliers(
+                                axis,
+                                change.getList(),
+                                maxNumberOfTicks,
+                                percentToTrim,
+                                howManyStdDevsConstituteOutlier
+                        );
+                    }
+                };
+        return listener;
+    }
+
+    /* Applies the inlier range to the axis bounds and sets an appropriate tick-unit.
+     * The methods describing the arguments passed here are `computeReferenceTickUnit`,
+     * `trim`, and `computeInlierThreshold`.
+     */
+    public static void zoomToInliers(
+            NumberAxis yAxis,
+            List<XYChart.Data<Number, Number>> xyValues,
+            int maxNumberOfTicks,
+            double percentToTrim,
+            double howManyStdDevsConstituteOutlier
+    ) {
+        List<Double> yValues = extractYValues(xyValues);
+
+        if (yValues.size() < 3) {
+            // with less than 3 elements, there is no meaningful inlier analysis
+            return;
+        }
+
+        Tuple2<Double, Double> inlierRange =
+                findInlierRange(yValues, percentToTrim, howManyStdDevsConstituteOutlier);
+
+        applyRange(yAxis, maxNumberOfTicks, inlierRange);
+    }
+
+    private static List<Double> extractYValues(List<XYChart.Data<Number, Number>> xyValues) {
+        return xyValues
+                .stream()
+                .map(xyData -> (double) xyData.getYValue())
+                .collect(Collectors.toList());
+    }
+
+    /* Finds the minimum and maximum inlier values. The returned values may be NaN.
+     * See `computeInlierThreshold` for the definition of inlier.
+     */
+    private static Tuple2<Double, Double> findInlierRange(
+            List<Double> yValues,
+            double percentToTrim,
+            double howManyStdDevsConstituteOutlier
+    ) {
+        Tuple2<Double, Double> inlierThreshold =
+                computeInlierThreshold(yValues, percentToTrim, howManyStdDevsConstituteOutlier);
+
+        DoubleSummaryStatistics inlierStatistics =
+                yValues
+                        .stream()
+                        .filter(y -> withinBounds(inlierThreshold, y))
+                        .mapToDouble(Double::doubleValue)
+                        .summaryStatistics();
+
+        var inlierMin = inlierStatistics.getMin();
+        var inlierMax = inlierStatistics.getMax();
+
+        return new Tuple2(inlierMin, inlierMax);
+    }
+
+    private static boolean withinBounds(Tuple2<Double, Double> bounds, double number) {
+        var lowerBound = bounds.first;
+        var upperBound = bounds.second;
+        return (lowerBound <= number) && (number <= upperBound);
+    }
+
+    /* Computes the lower and upper inlier thresholds. A point lying outside
+     * these thresholds is considered an outlier, and a point lying within
+     * is considered an inlier.
+     * The thresholds are found by trimming the dataset (see method `trim`),
+     * then adding or subtracting a multiple of its (trimmed) standard
+     * deviation from its (trimmed) mean.
+     */
+    private static Tuple2<Double, Double> computeInlierThreshold(
+            List<Double> numbers, double percentToTrim, double howManyStdDevsConstituteOutlier
+    ) {
+        if (howManyStdDevsConstituteOutlier <= 0) {
+            throw new IllegalArgumentException(
+                    "howManyStdDevsConstituteOutlier should be a positive number");
+        }
+
+        List<Double> trimmed = trim(percentToTrim, numbers);
+
+        DoubleSummaryStatisticsWithStdDev summaryStatistics =
+                trimmed.stream()
+                        .collect(
+                                DoubleSummaryStatisticsWithStdDev::new,
+                                DoubleSummaryStatisticsWithStdDev::accept,
+                                DoubleSummaryStatisticsWithStdDev::combine);
+
+        double mean = summaryStatistics.getAverage();
+        double stdDev = summaryStatistics.getStandardDeviation();
+
+        var inlierLowerThreshold = mean - (stdDev * howManyStdDevsConstituteOutlier);
+        var inlierUpperThreshold = mean + (stdDev * howManyStdDevsConstituteOutlier);
+
+        return new Tuple2(inlierLowerThreshold, inlierUpperThreshold);
+    }
+
+    /* Sorts the data and discards given percentage from the left and right sides each.
+     * E.g. 5% trim implies a total of 10% (2x 5%) of elements discarded.
+     * Used in calculating trimmed mean (and in turn trimmed standard deviation),
+     * which is more robust to outliers than a simple mean.
+     */
+    private static List<Double> trim(double percentToTrim, List<Double> numbers) {
+        var minPercentToTrim = 0;
+        var maxPercentToTrim = 50;
+        if (minPercentToTrim > percentToTrim || percentToTrim > maxPercentToTrim) {
+            throw new IllegalArgumentException(
+                    String.format(
+                            "The percentage of data points to trim must be in the range [%d,%d].",
+                            minPercentToTrim, maxPercentToTrim));
+        }
+
+        var totalPercentTrim = percentToTrim * 2;
+        if (totalPercentTrim == 0) {
+            return numbers;
+        }
+        if (totalPercentTrim == 100) {
+            return Collections.<Double>emptyList();
+        }
+
+        if (numbers.isEmpty()) {
+            return numbers;
+        }
+
+        var count = numbers.size();
+        int countToDropFromEachSide = (int) Math.round((count / 100d) * percentToTrim); // visada >= 0?
+        if (countToDropFromEachSide == 0) {
+            return numbers;
+        }
+
+        var sorted = numbers.stream().sorted();
+
+        var oneSideTrimmed = sorted.skip(countToDropFromEachSide);
+
+        // Here, having already trimmed the left-side, we are implicitly trimming
+        // the right-side by specifying a limit to the stream's length.
+        // An explicit right-side drop/trim/skip is not supported by the Stream API.
+        var countAfterTrim = count - (countToDropFromEachSide * 2); // visada > 0? ir <= count?
+        var bothSidesTrimmed = oneSideTrimmed.limit(countAfterTrim);
+
+        return bothSidesTrimmed.collect(Collectors.toList());
+    }
+
+    /* On the given axis, sets the provided lower and upper bounds, and
+     * computes an appropriate major tick unit (distance between major ticks in data-space).
+     * External computation of tick unit is necessary, because JavaFX doesn't support automatic
+     * tick unit computation when axis bounds are set manually.
+     */
+    private static void applyRange(NumberAxis axis, int maxNumberOfTicks, Tuple2<Double, Double> bounds) {
+        var boundsWidth = getBoundsWidth(bounds);
+        if (boundsWidth < 0) {
+            throw new IllegalArgumentException(
+                    "The lower bound must be a smaller number than the upper bound");
+        }
+        if (boundsWidth == 0 || boundsWidth == Double.NaN) {
+            // less than 2 unique data-points: recalculating axis range doesn't make sense
+            return;
+        }
+
+        axis.setAutoRanging(false);
+
+        var lowerBound = bounds.first;
+        var upperBound = bounds.second;
+
+        // If one of the ends of the range weren't zero,
+        // additional logic would be needed to make ticks "round".
+        // Of course, many, if not most, charts benefit from having 0 on the axis.
+        var shouldKeepZeroWithinRange = true;
+        if (shouldKeepZeroWithinRange) {
+            if (lowerBound > 0) {
+                lowerBound = 0d;
+            } else if (upperBound < 0) {
+                upperBound = 0d;
+            }
+        }
+
+        axis.setLowerBound(lowerBound);
+        axis.setUpperBound(upperBound);
+
+        var referenceTickUnit = computeReferenceTickUnit(maxNumberOfTicks, bounds);
+
+        var tickUnit = computeTickUnit(referenceTickUnit);
+
+        axis.setTickUnit(tickUnit);
+    }
+
+    /* Uses bounds and maximum number of major ticks to find a reference tick unit
+     * for the `computeTickUnit` method. The reference tick unit is later used as a
+     * starting point for tick unit's search.
+     * The rationale behind dividing the range/domain/width of an axis by maximum number
+     * of ticks is that it yields a good number of ticks, but they are not "well rounded",
+     * hence the next step of computing the actual tick unit.
+     * `maxNumberOfTicks` specifies how many subdivisions (major tick units) an axis
+     * should have at most. The final number of subdivisions, after `computeTickUnit`,
+     * usually will be lower, but never higher.
+     */
+    private static double computeReferenceTickUnit(int maxNumberOfTicks, Tuple2<Double, Double> bounds) {
+        if (maxNumberOfTicks <= 0) {
+            throw new IllegalArgumentException("maxNumberOfTicks must be a positive number");
+        }
+        var width = getBoundsWidth(bounds);
+        var referenceTickUnit = (double) width / maxNumberOfTicks;
+        return referenceTickUnit;
+    }
+
+    /* Extracted from cern.extjfx.chart.DefaultTickUnitSupplier (licensed Apache 2.0).
+     * Original description below; note that the `multipliers` vector is hardcoded in the method to the default value
+     * used in the source class:
+     *
+     * Computes tick unit using the following formula: tickUnit = M*10^E, where M is one of the multipliers specified in
+     * the constructor and E is an exponent of 10. Both M and E are selected so that the calculated unit is the smallest
+     * (closest to the zero) value that is greater than or equal to the reference tick unit.
+     *
+     * For example with multipliers [1, 2, 5], the method will give the following results:
+     *
+     * computeTickUnit(0.01) returns 0.01
+     * computeTickUnit(0.42) returns 0.5
+     * computeTickUnit(1.73) returns 2
+     * computeTickUnit(5)    returns 5
+     * computeTickUnit(27)   returns 50
+     *
+     * @param referenceTickUnit the reference tick unit, must be a positive number
+     */
+    private static double computeTickUnit(double referenceTickUnit) {
+        if (referenceTickUnit <= 0) {
+            throw new IllegalArgumentException("The reference tick unit must be a positive number");
+        }
+
+        // Default multipliers vector extracted from the source class.
+        double[] multipliers = {1d, 2.5, 5d};
+
+        int BASE = 10;
+        int exp = (int) Math.floor(Math.log10(referenceTickUnit));
+        double factor = referenceTickUnit / Math.pow(BASE, exp);
+
+        double multiplier = 0;
+        int lastIndex = multipliers.length - 1;
+        if (factor > multipliers[lastIndex]) {
+            exp++;
+            multiplier = multipliers[0];
+        } else {
+            for (int i = lastIndex; i >= 0; i--) {
+                if (factor <= multipliers[i]) {
+                    multiplier = multipliers[i];
+                } else {
+                    break;
+                }
+            }
+        }
+        return multiplier * Math.pow(BASE, exp);
+    }
+
+    private static double getBoundsWidth(Tuple2<Double, Double> bounds) {
+        var lowerBound = bounds.first;
+        var upperBound = bounds.second;
+        var width = Math.abs(upperBound - lowerBound);
+        return width;
+    }
+}