React Scheduler: Scheduler 源码分析

TLNR#

全文就是在画下面这个图

说明#

Scheduler 作为 react 内任务调度的核心是源码阅读绕不开的点（时间切片这一概念其实就存在于该包内），不过幸运的是，该模块是独立的一个包（可脱离 react 内的一些概念，因此逻辑十分干净），并且在打包后也只有小 700 行左右，因此阅读难度并不算大。

在 Scheduler 内存在两个概念，这里提一下

优先级：Scheduler 内生成的 task 会根据优先级决定执行顺序 (任务的过期时间)，因此可以做到让高优先级的任务尽快的被执行。
时间切片：JS 线程与 GUI 线程互斥，因此如果 JS 任务执行时间过长会阻塞页面的渲染从而造成卡顿，因此引入时间切片（通常为 5ms）这一概念来限制任务的执行时间，一旦超时就打断当前执行的任务，将主线程让给 GUI 线程避免卡顿。

接下来本文将从 unstable_scheduleCallback 这一函数开始对源码进行分析，该函数可以被视为 Scheduler 的入口函数。

unstable_scheduleCallback#

Scheduler 通过暴露该函数给外界以提供在 Scheduler 内注册 task 的能力。阅读该函数需要关注以下几点

Scheduler 会根据传入的优先级（priorityLevel）生成具备不同过期时间的 task 对象。
Scheduler 内部存在两个数组 timerQueue、taskQueue 用以管理 task，前者管理 delay > 0 的 task（并不急于更新的 task）、后者管理 delay <= 0 的 task（急于更新的 task）。

function unstable_scheduleCallback(priorityLevel, callback, options) {
  /**
   * 获取函数运行时的当前时间，后面会根据优先级来计算该 task 的 startTime 与判断
   * task 是否过期的基础
   * */
  var currentTime = exports.unstable_now();
  var startTime;
  /**
   * 外界可能手动传入 delay 以延后任务的执行
  */
  if (typeof options === 'object' && options !== null) {
    var delay = options.delay;

    if (typeof delay === 'number' && delay > 0) {
      startTime = currentTime + delay;
    } else {
      startTime = currentTime;
    }
  } else {
    startTime = currentTime;
  }

  var timeout;
  /**
   * timeout 根据优先级的不同会被赋予不同的值
   * 其中 ImmediatePriority 对应的 timeout 为 -1，因此此优先级的 task 在注册时
   * 便可视为过期的，因此会被快速执行
  */
  switch (priorityLevel) {
    case ImmediatePriority:
      timeout = IMMEDIATE_PRIORITY_TIMEOUT; // -1
      break;

    case UserBlockingPriority:
      timeout = USER_BLOCKING_PRIORITY_TIMEOUT; // 250
      break;

    case IdlePriority:
      timeout = IDLE_PRIORITY_TIMEOUT; // 1073741823
      break;

    case LowPriority:
      timeout = LOW_PRIORITY_TIMEOUT; // 10000
      break;

    case NormalPriority:
    default:
      timeout = NORMAL_PRIORITY_TIMEOUT; // 5000
      break;
  }
  /**
   * 计算过期时间
  */
  var expirationTime = startTime + timeout;
  var newTask = {
    id: taskIdCounter++,
    callback: callback, // task 注册的函数
    priorityLevel: priorityLevel,
    startTime: startTime, // 任务起始时间
    expirationTime: expirationTime, // 过期时间
    sortIndex: -1 // 小顶堆排序的依据
  };

  if (startTime > currentTime) {
    // This is a delayed task. 因为设置了 delay，所以这是个需要设置定时器的 task
    newTask.sortIndex = startTime;
    // 把这个 task 塞到 timerQueue 内
    push(timerQueue, newTask);

    if (peek(taskQueue) === null && newTask === peek(timerQueue)) {
      // All tasks are delayed, and this is the task with the earliest delay.
      // taskQueue 为空且，当前 task 在 timerQueue 内排序最前
      if (isHostTimeoutScheduled) {
        // Cancel an existing timeout.
        cancelHostTimeout();
      } else {
        isHostTimeoutScheduled = true;
      } // Schedule a timeout.

      // requestHostTimeout 只是一个封装的定时器
      /**
       * handleTimeout 是一个启动调度的函数，后面会分析到
       * 在这段代码中可以暂时理解为启动调度以执行当前的 task
       * */
      requestHostTimeout(handleTimeout, startTime - currentTime);
    }
  } else {
    /**
     * 未设置 delay 的情况下，就把 task 直接塞入 taskQueue 内
    */
    newTask.sortIndex = expirationTime;
    push(taskQueue, newTask);
    // wait until the next time we yield.


    if (!isHostCallbackScheduled && !isPerformingWork) {
      isHostCallbackScheduled = true;
      /**
       * flushWork 是一个比较重要的函数，涉及到清空 taskQueue 的操作
       * requestHostCallback 会触发 MessageChannel 从而执行 performWorkUntilDeadline，后续也会详细分析
       * 这里可以把下面这段代码理解为调用 flushWork 以清空 taskQueue 内的 task
      */
      requestHostCallback(flushWork);
    }
  }

  return newTask;
}

至此，根据上述逻辑我们可以绘制出下图

接下来稍微看下 handleTimeout 里面具体做了什么

/**
 * 在 advanceTimers 整理完 timerQ 与 taskQ 后
 * 如果 taskQ 内有 task，便调度该 task
 * 否则检查 timerQ 内是否存在 task，如果有便设置定时器到它的 startTime 再调用 handleTimeout
*/
function handleTimeout(currentTime) {
  isHostTimeoutScheduled = false;
  advanceTimers(currentTime); // 用来整理 timerQ 与 taskQ 的函数，见底下的分析

  if (!isHostCallbackScheduled) {
    if (peek(taskQueue) !== null) {
      /**
       * taskQ 不为空时，开始调度
      */
      isHostCallbackScheduled = true;
      // unstable_scheduleCallback 内也有相同的操作，是调度起始的入口
      requestHostCallback(flushWork);
    } else {
      /**
       * 这个时候 taskQ 里面没有任务可以调度
       * 只能来看 timerQ 里面有没有任务到 startTime 了
      */
      var firstTimer = peek(timerQueue);

      if (firstTimer !== null) {
        /**
         * 给 timerQ 的第一个元素设置定时器，等它到 startTime，
         * 然后用 advanceTimers 把 task 转移到 taskQ 里再调度
        */
        requestHostTimeout(handleTimeout, firstTimer.startTime - currentTime);
      }
    }
  }
}

/**
 * 不断的遍历 timerQ 把里面到达 startTime 的 task 拿出来塞到 taskQ 里面去
 * 除此之外，还负责把被取消掉的 task 丢掉
*/
function advanceTimers(currentTime) {
  // Check for tasks that are no longer delayed and add them to the queue.
  var timer = peek(timerQueue);

  while (timer !== null) {
    if (timer.callback === null) { // unstable_cancelCallback 取消任务时会把 task 的 callback 置空
      // Timer was cancelled.
      pop(timerQueue); // 这里丢掉了被取消的 task
    } else if (timer.startTime <= currentTime) {
      // Timer fired. Transfer to the task queue.
      //  task 到 startTime 了，移到 taskQ 里面去
      pop(timerQueue);
      timer.sortIndex = timer.expirationTime;
      push(taskQueue, timer);
    } else {
      // Remaining timers are pending.
      return;
    }

    timer = peek(timerQueue);
  }
}

入口相关的部分便分析结束，从上面的代码中不难注意到 requestHostCallback (flushWork) 这一个代码片段，而它也将成为我们分析调度行为的一个入口。

requestHostCallback#

requestHostCallback 可以说是 Scheduler 内触发调度行为的一个入口，因此对它的解析也是分析 Scheduler 的重点之一。接下来我们来看一下它的实现。

/**
 * 根据 requestHostCallback(flushWork)
 * 此处的 callback 为 flushWork
*/
function requestHostCallback(callback) {
  scheduledHostCallback = callback; // 将 flushWork 赋值给 scheduledHostCallback

  if (!isMessageLoopRunning) {
    isMessageLoopRunning = true;
    schedulePerformWorkUntilDeadline(); // 关键函数
  }
}

上面代码段内不难看出 schedulePerformWorkUntilDeadline 是其关键函数，然而其声明区分了三种场景

NodeJs 环境，面向服务端渲染 —— setImmediate
浏览器环境，面向 web 应用 —— MessageChannel
兼容环境 —— setTimeout

不过本文只关注分析浏览器环境，下面是其声明。

var channel = new MessageChannel();
var port = channel.port2;
channel.port1.onmessage = performWorkUntilDeadline;

schedulePerformWorkUntilDeadline = function () {
  port.postMessage(null);
};

MessageChannel 请自行看 MDN，这里只强调一点 MessageChannel 触发的异步任务类型为 MacroTask，因此大多数情况下在该任务执行后总是会触发浏览器的 render。

由上述代码，每次调用 schedulePerformWorkUntilDeadline 都会触发 performWorkUntilDeadline，那么解下来看看这个函数里面是什么

var performWorkUntilDeadline = function () {
  /**
   * 这里的 scheduledHostCallback 其实就是 flushWork
   * 详细见 requestHostCallback
  */
  if (scheduledHostCallback !== null) {
    var currentTime = exports.unstable_now();

    startTime = currentTime;
    var hasTimeRemaining = true;

    var hasMoreWork = true;
    try {
      /**
       * 执行 flushWork，其返回值代表着 taskQ 是否为空
       * 如果不为空，则代表着仍然存在着需要被调度的 task
      */
      hasMoreWork = scheduledHostCallback(hasTimeRemaining, currentTime);
    } finally {
      /**
       * 如果
      */
      if (hasMoreWork) {
        // If there's more work, schedule the next message event at the end
        // of the preceding one.
        /**
         * 因为仍然存在需要调度的任务，便再次触发 MessageChannel 
        */
        schedulePerformWorkUntilDeadline();
      } else {
        isMessageLoopRunning = false;
        scheduledHostCallback = null;
      }
    }
  } else {
    isMessageLoopRunning = false;
  } // Yielding to the browser will give it a chance to paint, so we can
};

Scheduler 的示意图在此时其实已经将大致框架画出来了。

接下来需要补全 performWorkUntilDeadline 的内容，在接下来的分析过程中，我们很快就会讲到时间切片相关的内容

flushWork 与 workLoop#

performWorkUntilDeadline 内会调用 scheduledHostCallback，而 scheduledHostCallback 不过是 flushWork 的别名。（见 requestHostCallback）
但 flushWork 内其实也只需要关注 workLoop 就行了，在 workLoop 内会涉及到时间切片与中断恢复这两个核心概念

/**
 * 其意义只是为了调用 workLoop
*/
function flushWork(hasTimeRemaining, initialTime) {
  /**
   * hasTimeRemaining 被赋值为 true
   * initialTime 为 performWorkUntilDeadline 调用时的时间戳
  */
  isHostCallbackScheduled = false;

  if (isHostTimeoutScheduled) {
    // We scheduled a timeout but it's no longer needed. Cancel it.
    /**
     * 优先清理 taskQ，为了防止定时器设置的内容插队
     * 将已有的定时器 cancel 掉
    */
    isHostTimeoutScheduled = false;
    cancelHostTimeout();
  }

  isPerformingWork = true;
  var previousPriorityLevel = currentPriorityLevel;

  try {
    if (enableProfiling) {
      try {
        /**
         * 整个 flushWork 的重点其实就是调用 workLoop
        */
        return workLoop(hasTimeRemaining, initialTime);
      } catch (error) {
        if (currentTask !== null) {
          var currentTime = exports.unstable_now();
          markTaskErrored(currentTask, currentTime);
          currentTask.isQueued = false;
        }

        throw error;
      }
    } else {
      // No catch in prod code path.
      return workLoop(hasTimeRemaining, initialTime);
    }
  } finally {
    currentTask = null;
    currentPriorityLevel = previousPriorityLevel;
    isPerformingWork = false;
  }
}

/**
 * 循环 taskQ，执行每个 task 内的 callback
*/
function workLoop(hasTimeRemaining, initialTime) {
  var currentTime = initialTime;
  /**
   * 整理 timerQ 与 taskQ
  */
  advanceTimers(currentTime);
  currentTask = peek(taskQueue); // 获取第一个 task

  while (currentTask !== null && !(enableSchedulerDebugging )) {
    /**
     * 如果任务已经过期且时间切片还没有用完，才会继续 while 循环，否则跳出
     * shouldYieldToHost 内存在这时间切片的内容，后面单独分析
    */
    if (currentTask.expirationTime > currentTime && (!hasTimeRemaining || shouldYieldToHost())) {
      // This currentTask hasn't expired, and we've reached the deadline.
      break;
    }

    var callback = currentTask.callback;

    /**
     * callback 存在 function/null 两种值
     * 如果是 function 就是有效的 task
     * 如果是 null 就是被 cancel 的 task
    */
    if (typeof callback === 'function') {
      currentTask.callback = null;
      currentPriorityLevel = currentTask.priorityLevel;
      var didUserCallbackTimeout = currentTask.expirationTime <= currentTime;

      /**
       * 如果用 Performance工具对这里做记录，
       * 不难发现这里的 callback 其实是 performConcurrentWorkOnRoot
       * 而在那个函数内，在满足 root.callbackNode === originalCallbackNode 时
       * 也就是原有的任务并没有被执行完便会再次返回 performConcurrentWorkOnRoot 本身以重新恢复中断的任务
       * 
       * 补充一下，这里之所以可能存在中断的情况主要还是因为时间切片用完了，也就是 shouldYieldToHost() 的内容
       * 
       * Tip: 这里不要纠结太多，阅读源码最大的忌惮便是被细节卷进去（后续有时间会补充这一块的调试记录）
      */
      var continuationCallback = callback(didUserCallbackTimeout);
      currentTime = exports.unstable_now();

      if (typeof continuationCallback === 'function') {
        /**
         * 因为没执行结束，因此重新给 task 的 callback 赋值，再下一次调用时也好恢复中断的任务
        */
        currentTask.callback = continuationCallback;
      } else {
        /**
         * 这里的话，是 task 顺利执行结束了，因此可以把这个 task 给丢掉了
        */
        if (currentTask === peek(taskQueue)) {
          pop(taskQueue);
        }
      }
      /**
       * 分析过了，如果忘了往上面翻。。。
      */
      advanceTimers(currentTime);
    } else {
      /**
       * 因为是被 cancle 的 task
       * 所以没有执行的必要，便将其 pop 掉
      */
      pop(taskQueue);
    }

    currentTask = peek(taskQueue);
  } // Return whether there's additional work

  /**
   * 即使因为时间切片用完了导致 while 循环中断，也会进入下面的判断逻辑
  */
  if (currentTask !== null) {
    // 如果是因为 while 循环中断，那么 currentTask 必然不为 null
    // 于是返回 true
    return true;
  } else {
    // 如果是因为 while 循环执行结束，那么 currentTask 必然为 null
    var firstTimer = peek(timerQueue);

    if (firstTimer !== null) {
      // 只要 timerQueue 不为空，于是开始下一轮的调度
      requestHostTimeout(handleTimeout, firstTimer.startTime - currentTime);
    }
    // 因为任务执行完了，便不存在剩余的任务了，于是返回 false
    return false;
  }
  // workLoop 的返回值其实就是被赋予给 hasMoreWork 的
}

经过上方的分析，可以画出 performWorkUntilDeadline 内的大致操作

接下来我们来看看这个时间切片是个什么东西。。

function shouldYieldToHost() {
  /**
   * performWorkUntilDeadline 在执行起始时就会给 startTime 赋值
  */
  const timeElapsed = getCurrentTime() - startTime;
  /**
   * frameInterval 默认值是 5 ms
   * 不过也会根据显示器的 fps（0, 125） 自动计算，计算方式为 Math.floor(1000 / fps)
   * 也就是说一帧的毫秒数，而这个值就是时间切片
  */
  if (timeElapsed < frameInterval) {
    // The main thread has only been blocked for a really short amount of time;
    // smaller than a single frame. Don't yield yet.
    // 因为时间有的多，所以不用中断
    return false;
  }

  // The main thread has been blocked for a non-negligible amount of time. We
  // may want to yield control of the main thread, so the browser can perform
  // high priority tasks. The main ones are painting and user input. If there's
  // a pending paint or a pending input, then we should yield. But if there's
  // neither, then we can yield less often while remaining responsive. We'll
  // eventually yield regardless, since there could be a pending paint that
  // wasn't accompanied by a call to `requestPaint`, or other main thread tasks
  // like network events.
  /**
   * 太长不看：我们要优先响应用户输入 balabala
  */
  if (enableIsInputPending) {
    if (needsPaint) {
      // There's a pending paint (signaled by `requestPaint`). Yield now.
      return true;
    }
    if (timeElapsed < continuousInputInterval) {
      // We haven't blocked the thread for that long. Only yield if there's a
      // pending discrete input (e.g. click). It's OK if there's pending
      // continuous input (e.g. mouseover).
      if (isInputPending !== null) {
        return isInputPending();
      }
    } else if (timeElapsed < maxInterval) {
      // Yield if there's either a pending discrete or continuous input.
      if (isInputPending !== null) {
        return isInputPending(continuousOptions);
      }
    } else {
      // We've blocked the thread for a long time. Even if there's no pending
      // input, there may be some other scheduled work that we don't know about,
      // like a network event. Yield now.
      return true;
    }
  }

  // `isInputPending` isn't available. Yield now.
  // isInputPending不管事，那么中断算了
  return true;
}

时间切片看着这名字牛逼哄哄，其实就是显示器一帧耗时，然而为什么要设计这么个东西，其实道理也很简单，如果 渲染进程 的主线程一直被 JS 线程 给占用，而 GUI 线程 无法介入，那么页面便会一直不刷新从而帧数下降，让用户感到卡顿。因此一旦执行任务的耗时超过了时间切片就需要立刻中断任务从而让浏览器刷新页面。