概念
Android 8.0 中纳入了一个功能,当该功能注意到核心系统组件陷入崩溃循环僵局时,就会派出“救援程序”。然后救援程序会通过一系列操作来上报相关情况,以期恢复设备。最后的解决方法是,Rescue Party 使设备重新启动并进入恢复模式,然后提示用户恢复出厂设置。
救援程序启动策略
在出现以下情况时,救援程序会收到有关启动和崩溃事件的信息,然后即会启动:
- system_server 在 5 分钟内重启 5 次以上。
- 永久性系统应用在 30 秒内崩溃 5 次以上。
当检测到上述某种情况时,救援程序会将其上报给下一救援级别、处理与该级别相关联的任务,并让设备继续运行,看看能否恢复。清除或重置内容的程度随级别而增加。最高级别会提示用户将设备恢复出厂设置。
无需特别的硬件支持即可为救援程序提供支持。实现后,设备的恢复系统必须响应 –prompt_and_wipe_data 命令,且设备必须先提供一种方法,让用户确认用户数据是否有任何损坏,然后再继续运行。此外,恢复系统还应为用户提供有关尝试再次启动设备的选项。
调试验证
当设备具有有效的 USB 数据连接时,系统会停止所有救援事件,因为这是一个较强的信号,表示有人正在调试设备。
如需停止此类抑制行为,请运行以下命令:
adb shell setprop persist.sys.enable_rescue 1
如需触发低级 system_server 崩溃循环,请运行以下命令:
adb shell setprop debug.crash_system 1
如需触发中级 SystemUI 崩溃循环,请运行以下命令:
adb shell setprop debug.crash_sysui 1
这两个崩溃循环都会启动救援逻辑。所有的救援操作也都会记录到存储在 /data/system/uiderrors.txt 中的永久性 PackageManager 日志中,以供日后进行检查和调试。此外,“软件包警告消息”部分下的 bug 报告中也会包含这些永久性的日志。
Android 10 源码
SystemServer.java
private void startBootstrapServices() {
// Now that we have the bare essentials of the OS up and running, take
// note that we just booted, which might send out a rescue party if
// we're stuck in a runtime restart loop.
RescueParty.noteBoot(mSystemContext);
}
AppErrors.java
void crashApplicationInner(ProcessRecord r, ApplicationErrorReport.CrashInfo crashInfo,
int callingPid, int callingUid) {
if (r.isPersistent() || isApexModule) {
// If a persistent app or apex module is stuck in a crash loop, the device isn't
// very usable, so we want to consider sending out a rescue party.
RescueParty.noteAppCrash(mContext, r.uid);
}
}
RescueParty.java
public class RescueParty {
@VisibleForTesting
static final String PROP_ENABLE_RESCUE = "persist.sys.enable_rescue";
@VisibleForTesting
static final int TRIGGER_COUNT = 5;
@VisibleForTesting
static final String PROP_RESCUE_LEVEL = "sys.rescue_level";
@VisibleForTesting
static final int LEVEL_NONE = 0;
@VisibleForTesting
static final int LEVEL_RESET_SETTINGS_UNTRUSTED_DEFAULTS = 1;
@VisibleForTesting
static final int LEVEL_RESET_SETTINGS_UNTRUSTED_CHANGES = 2;
@VisibleForTesting
static final int LEVEL_RESET_SETTINGS_TRUSTED_DEFAULTS = 3;
@VisibleForTesting
static final int LEVEL_FACTORY_RESET = 4;
@VisibleForTesting
static final String PROP_RESCUE_BOOT_COUNT = "sys.rescue_boot_count";
/**
* The boot trigger window size must always be greater than Watchdog's deadlock timeout
* {@link Watchdog#DEFAULT_TIMEOUT}.
*/
@VisibleForTesting
static final long BOOT_TRIGGER_WINDOW_MILLIS = 600 * DateUtils.SECOND_IN_MILLIS;
@VisibleForTesting
static final long PERSISTENT_APP_CRASH_TRIGGER_WINDOW_MILLIS = 30 * DateUtils.SECOND_IN_MILLIS;
@VisibleForTesting
static final String TAG = "RescueParty";
private static final String PROP_DISABLE_RESCUE = "persist.sys.disable_rescue";
private static final String PROP_RESCUE_BOOT_START = "sys.rescue_boot_start";
private static final String PROP_VIRTUAL_DEVICE = "ro.hardware.virtual_device";
/** Threshold for boot loops */
private static final Threshold sBoot = new BootThreshold();
/** Threshold for app crash loops */
private static SparseArray<Threshold> sApps = new SparseArray<>();
private static boolean isDisabled() {
// Check if we're explicitly enabled for testing
if (SystemProperties.getBoolean(PROP_ENABLE_RESCUE, false)) {
return false;
}
// We're disabled on all engineering devices
if (Build.IS_ENG) {
Slog.v(TAG, "Disabled because of eng build");
return true;
}
// We're disabled on userdebug devices connected over USB, since that's
// a decent signal that someone is actively trying to debug the device,
// or that it's in a lab environment.
if (Build.IS_USERDEBUG && isUsbActive()) {
Slog.v(TAG, "Disabled because of active USB connection");
return true;
}
// One last-ditch check
if (SystemProperties.getBoolean(PROP_DISABLE_RESCUE, false)) {
Slog.v(TAG, "Disabled because of manual property");
return true;
}
return false;
}
/**
* Take note of a boot event. If we notice too many of these events
* happening in rapid succession, we'll send out a rescue party.
*/
public static void noteBoot(Context context) {
if (isDisabled()) return;
if (sBoot.incrementAndTest()) {
sBoot.reset();
incrementRescueLevel(sBoot.uid);
executeRescueLevel(context);
}
}
}