文章/答案/技术大牛

发布

社区首页 >问答首页 >dc.js系列图表-填充缺失数据时图表速度太慢

问dc.js系列图表-填充缺失数据时图表速度太慢
EN

Stack Overflow用户

提问于 2020-03-10 03:38:59

回答 2查看 88关注 0票数 1

我想创建一个多时间线图表作为系列图表。

我读取了有关填充缺失数据dc.js lineChart - fill missing dates and show zero where no data的堆栈溢出

问:我在那里实现了代码，它在单线图上工作得很好。对于序列图，我需要对其进行一些调整。它是有效的，但是性能很糟糕。

以下是我们使用的示例数据：

let data = [{description: "Walmart", location: "40.216403 -74.541296", timeReported: 1581710670184}
 {description: "Target", location: "38.271996 -84.032575", timeReported: 1583524065011}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1583524065011}
 {description: "7-11", location: "34.925349 -78.463977", timeReported: 1583524065011}
 {description: "WaWa", location: "35.716208 -77.741230", timeReported: 1583524065013}
 {description: "7-11", location: "41.258950 -83.888060", timeReported: 1583524065013}
 {description: "Shell", location: "37.879694 -79.836127", timeReported: 1583524065011}
 {description: "Dominos", location: "35.890273 -80.700329", timeReported: 1583524065395}
 {description: "Dominos", location: "39.268777 -78.743366", timeReported: 1583524065397}
 {description: "Walgreens", location: "35.490215 -81.773863", timeReported: 1583524065399}
 {description: "7-11", location: "37.974797 -81.393449", timeReported: 1583524065506}
 {description: "Wendys", location: "40.859685 -76.963065", timeReported: 1583524065521}
 {description: "CVS", location: "38.517910 -78.251419", timeReported: 1583524065553}
 {description: "CVS", location: "35.947033 -81.616061", timeReported: 1583524142169}
 {description: "Shell", location: "39.566535 -77.992499", timeReported: 1583524142176}
 {description: "Target", location: "37.832142 -88.003151", timeReported: 1583524142170}
 {description: "Wendys", location: "40.245397 -80.061998", timeReported: 1583524142223}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1583524142223}
 {description: "Macys", location: "36.631458 -77.803286", timeReported: 1583524142213}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1583524142251}
 {description: "7-11", location: "41.138285 -83.298142", timeReported: 1583524142249}
 {description: "Wendys", location: "34.940485 -77.230388", timeReported: 1583524142249}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1583524142296}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1583524142293}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1583524142310}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1583524142338}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1582058735883}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1582058735883}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1582058735883}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1582058735883}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1582058735883}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1582058735883}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1582058735883}
 {description: "Kohls", location: "40.373533 -101.057470", timeReported: 1582838559493}]

以下是示例代码。顺便说一句，下面代码中的curTimeInterval只是d3 timeIntervlas的一个别名，可以由用户选择。(d3.timeHour，d3.timeDay，d3.timeWeek，d3.timeMonth)。

cf = crossfilter(data);

dateDim = cf.dimension((d) => {
  return curTimeInterval(d.timeReportedDate);
});
reportedGroup = dateDim.group().reduceSum((d) => 1);


let minDate = d3.min(reportedGroup.all(), (kv) => {
  return kv.key;
});
let maxDate = d3.max(reportedGroup.all(), (kv) => {
  return kv.key;
});
minDate = curTimeInterval.offset(minDate, -2);
maxDate = curTimeInterval.offset(maxDate, 2);

const runDimension = cf.dimension((d) => {
  return [d.description, curTimeInterval(d.timeReportedDate)];
});


const runGroup = runDimension.group();

// Fills the missing data in the group
const filledSeries = fill_composite_intervals(runGroup, curTimeInterval);

const seriesChart = new dc.SeriesChart('#series');
seriesChart
  .width(768)
  .height(480)
  .chart(function(c) {
    return new dc.LineChart(c).curve(d3.curveCardinal);
  })
  .x(d3.scaleTime().domain([minDate, maxDate]))
  .xUnits(curTimeInterval.range)
  .brushOn(false)
  .clipPadding(10)
  .elasticY(true)
  .dimension(runDimension)
  .group(filledSeries)
  .mouseZoomable(true)
  .seriesAccessor((d) => {
    return d.key[0];
  })
  .keyAccessor((d) => {
    return d.key[1];
  })
  .valueAccessor((d) => {
    return d.value;
  })
  .legend(dc.legend().x(350).y(350).itemHeight(13).gap(5).horizontal(1).legendWidth(140).itemWidth(70))
  .yAxis()
  .tickValues(d3.range(min > 0 ? min - 1 : min, max + 1));

seriesChart.margins().left += 40;


fill_composite_intervals = (group, interval) => {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all();
      if (!allArray.length) {
        return retVal;
      }
      allArray.sort((a, b) => {
        if (a.key[1].getTime() < b.key[1].getTime()) {
          return -1;
        }
        if (a.key[1].getTime() > b.key[1].getTime()) {
          return 1;
        }
        // a must be equal to b
        return 0;
      });
      const target = interval.range(allArray[0].key[1], allArray[allArray.length-1].key[1]);
      const allMap = new Map();
      allArray.forEach((obj) => {
        let innerArray = allMap.get(obj.key[0]);
        if (!innerArray) {
          innerArray = [];
          allMap.set(obj.key[0], innerArray);
        }
        innerArray.push({key: obj.key[1], value: obj.value});
      });
      allMap.forEach((value, key, map) => {
        const orig = value.map((kv) => ({key: new Date(kv.key), value: kv.value}));

        const result = [];
        if (orig.length) {

          let oi;
          let ti;
          for (oi = 0, ti = 0; oi < orig.length && ti < target.length;) {
            if (orig[oi].key <= target[ti]) {
              result.push(orig[oi]);
             if (orig[oi++].key.valueOf() === target[ti].valueOf()) {
                ++ti;
              }
            } else {
              result.push({key: target[ti], value: 0});
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            Array.prototype.push.apply(result, target.slice(ti).map((t) => ({key: t, value: 0})));
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach((obj) => {
          const newObj = {
            key: [key, obj.key],
            value: obj.value
          };

          retVal.push(newObj);
        });
      });
            return retVal;
    }
  };
};

d3.js

dc.js

crossfilter

回答 2

Stack Overflow用户

发布于 2020-03-15 22:18:17

我从创建a fiddle开始，它说明了这个问题。这里有趣的是一个选择菜单，它显示了哪些时间间隔适合图表的数据和缩放级别(域)。

它不适合显示超过宽度/2的点(因为它们不会被渲染)，也不适合显示少于两个点，所以“不合适的”选项是灰色斜体的：

它使用一个对象将间隔名称映射到相应d3间隔中的毫秒数：

const intervals = {
  timeSecond: 1000,
  timeMinute: 60000,
  timeHour: 3600000,
  timeDay: 86400000,
  timeWeek: 604800000,
  timeMonth: 2628000000,
  timeYear: 31536000000
}

allowed_intervals确定第一个和最后一个适当的间隔：

function allowed_intervals(chart, intervals, dateDomain) {
  const dt = dateDomain[1].getTime() - dateDomain[0].getTime(),
    first = Object.entries(intervals).find(
        ([iname, ms]) => dt / ms < chart.width() / 2);
  if(!first)
    throw new Error('date range too long')
  const last = Object.entries(intervals).reverse().find(
     ([iname, ms]) => d3[iname](dateDomain[0]).getTime() !== d3[iname](dateDomain[1]).getTime());
  return [first[0],last[0]];
}

所以这一切都很好。这个例子打印了结果数据，我们可以看到，如果我们用d3.timeMinute填充示例数据，它会从原始的15个数据点中产生332482个数据点。这显然是太多的数据，特别是对于一个简单的例子。

这是一个用于查找适当的d3时间间隔的okay算法。然而，当我们启用缩放时，它会失败，因为现在我们可以放大到一个小时，比方说，在timeMinute合适的地方，但是如果你对所有的数据使用这个间隔，它就会太多的点，图表会变慢到停止。

所以我开始思考如何让它更有效率。我们实际上不需要填补每个缺失的时间间隔。我们真正需要的是确保在数据从非零变为零时捕获下降沿，以及当数据从零变为非零时捕获上升沿。在这些情况下，我们只需要在输入数据中添加零。

这是一个新版本的fill_composite_intervals，它使用上升沿和下降沿，只添加显示这些边缘所需的数量的零：

// input: a group with keys [category, time] and numeric values; a d3 time interval
// output: the same, but with zeroes filled in per the interval
function fill_composite_intervals(group, interval) {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all().slice();
      if (!allArray.length) {
        return retVal;
      }
      // make sure input data is sorted
      allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());

      // find all time intervals within the data
      // pad at both ends to add leading and trailing zeros
      const target = interval.range(interval.offset(allArray[0].key[1], -1),
        interval.offset(allArray[allArray.length-1].key[1], 2));

      // separate the data for each category
      const allMap = new Map();
      allArray.forEach(({key: [cat, time], value}) => {
        let innerArray = allMap.get(cat);
        if (!innerArray) {
          innerArray = [];
          allMap.set(cat, innerArray);
        }
        innerArray.push({key: time, value});
      });

      // walk each category, adding leading and trailing zeros
      allMap.forEach((value, key, map) => {
        const orig = value.map(({key, value}) => ({key: new Date(key), value}));

        const result = [];
        if (orig.length) {
          let oi = 0, ti = 0, last_filled = false, skipped_fill = false;
          while(oi < orig.length && ti < target.length) {
            if (orig[oi].key <= target[ti]) {
              if(skipped_fill) {
                // in the last iteration, we skipped a zero
                // so add one now (rising edge)
                result.push({key: target[ti-1], value: 0});
                skipped_fill = false;
              }
              result.push(orig[oi]);
              if (orig[oi++].key.getTime() === target[ti].getTime()) {
                ++ti;
              }
              last_filled = false;
            } else {
              if(!last_filled) {
                // last iteration we pushed a value
                // so push a zero now (falling edge)
                result.push({key: target[ti], value: 0});
                last_filled = true;
              }
              else skipped_fill = true;
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            // add one trailing zero at the end
            result.push({key: target[ti], value: 0});
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach(({key: time, value}) => {
          retVal.push({
            key: [key, time],
            value
          });
        });
      });
      return retVal;
    }
  };
}

有关说明，请参阅代码中的注释。它只产生与输入数据成比例的数据，例如具有timeMinute的输入15的67点，而不是300+K。

有趣的是，我发现当零数较少时，d3.curveCardinal会产生奇怪的工件。直觉上，我认为如果跳过点，这条线就会获得太多的“动量”。所以我选择了d3.curveMonotoneX。我认为不管怎样，这是更合适的。

  .curve(d3.curveMonotoneX)

我还在开头和结尾填充了interval.range，这样数据的开头和结尾都是零，这更吸引人。

当您选择d3.timeSecond时，这个示例仍然很慢(它仍然遍历300+K点)，但是它似乎执行得很好，直到timeMinute，它似乎捕捉到了这些数据的分辨率。

进一步可能的改进：

添加更多的前导零和尾随零，以便曲线一致/ symmetric
stop，这样就不会计算和丢弃太多的点；相反，只使用interval.offset和next/last data points检测上升和下降沿(技巧！)

Example fiddle。

票数 1

Stack Overflow用户

发布于 2020-03-21 09:30:23

由于我之前的回答在使用小时间间隔时仍然太慢，所以我重写了循环的核心。

与在开始和结束之间遍历整个日期范围相比，只查看数据并检测在最后一个数据点和这个数据点之间是否应该添加一个或两个零要快得多，也简单得多。

fill_composite_intervals的核心现在看起来像

  const [begin, end] = d3.extent(allArray, ({key}) => key[1]).map(interval);

  // walk each category, adding leading and trailing zeros
  allMap.forEach((value, key, map) => {
    const orig = value.map(({key, value}) => ({key: new Date(key), value}));

    const result = [];
    if (orig.length) {
      let last = interval.offset(begin, -2);
      for(let oi = 0; oi < orig.length; ++oi) {
        const count = interval.count(last, orig[oi].key);
        if(count === 0 || count === 1) ;
        else {
          result.push({key: interval.offset(last, 1), value: 0});
          if(count > 2)
            result.push({key: interval.offset(orig[oi].key, -1), value: 0});
        }
        result.push(orig[oi]);
        last = orig[oi].key;
      }
      result.push({key: interval.offset(orig[orig.length-1].key, 1), value: 0});
    }
    map.set(key, result);
  });

Faster fiddle。

更新:更平滑、对称的曲线

第一条和最后一条曲线变形，因为它们缺少样条线上的控制点，无法使边处的坡度为0。

我们可以在开头和结尾再加一个零。

这里是多时间线图表的快速和流畅的伪组。

function fill_composite_intervals(group, interval) {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all().slice();
      if (!allArray.length) {
        return retVal;
      }
      // make sure input data is sorted
      allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());

      // separate the data for each category
      const allMap = new Map();
      allArray.forEach(({key: [cat, time], value}) => {
        let innerArray = allMap.get(cat);
        if (!innerArray) {
          innerArray = [];
          allMap.set(cat, innerArray);
        }
        innerArray.push({key: time, value});
      });


      // walk each category, adding leading and trailing zeros
      allMap.forEach((value, key, map) => {
        const orig = value.map(({key, value}) => ({key: new Date(key), value}));

        const result = [];
        if (orig.length) {
          let last = interval.offset(orig[0].key, -3);
          for(let oi = 0; oi < orig.length; ++oi) {
            const count = interval.count(last, orig[oi].key);
            if(count === 0 || count === 1) ;
            else {
              result.push({key: interval.offset(last, 1), value: 0});
              if(count > 2)
                result.push({key: interval.offset(orig[oi].key, -1), value: 0});
            }
            result.push(orig[oi]);
            last = orig[oi].key;
          }
          result.push(
            {key: interval.offset(orig[orig.length-1].key, 1), value: 0},
            {key: interval.offset(orig[orig.length-1].key, 2), value: 0},
          );
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach(({key: time, value}) => {
          retVal.push({
            key: [key, time],
            value
          });
        });
      });
      return retVal;
    }
  };
}

Smoother fiddle。

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/60607301

复制

相似问题

问dc.js系列图表-填充缺失数据时图表速度太慢
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问dc.js系列图表-填充缺失数据时图表速度太慢EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问dc.js系列图表-填充缺失数据时图表速度太慢
EN