Generalized-Core-Counter 3.20
Particle-based generalized core counter firmware
Loading...
Searching...
No Matches
State_Error.cpp
Go to the documentation of this file.
2#include "Config.h"
3#include "Cloud.h"
4#include "LocalTimeRK.h"
5#include "MyPersistentData.h"
6#include "PublishQueuePosixRK.h"
7#include "SensorManager.h"
8#include "device_pinout.h"
9#include "SensorDefinitions.h"
10#include "AB1805_RK.h"
11
12// NOTE:
13// This file was split from StateHandlers.cpp as a mechanical refactor.
14// No behavioral changes were made.
15
16// Decide what corrective action to take for the current alert.
17//
18// Uses the current alert code and resetCount to choose between:
19// - 0: No action (return to IDLE and try again later)
20// - 2: Soft reset via System.reset()
21// - 3: Hard recovery using AB1805 deep power down
22//
23// The mapping is intentionally conservative to avoid thrashing:
24// - Out-of-memory (14): up to 3 soft resets, then stop resetting.
25// - Modem/disconnect failure (15) and connect timeout (31):
26// a couple of soft resets, then a hard power-cycle, then stop.
27// - Sleep failures (16): soft reset, then hard power-cycle, then stop.
28static int resolveErrorAction() {
29 int8_t alert = current.get_alertCode();
30 uint8_t resets = sysStatus.get_resetCount();
31
32 if (alert <= 0) {
33 return 0;
34 }
35
36 switch (alert) {
37 case 14: // out-of-memory
38 if (resets >= 3) {
39 Log.info("OOM alert but reset count=%u; suppressing further resets", resets);
40 return 0;
41 }
42 return 2; // soft reset
43
44 case 15: // modem or disconnect failure
45 case 31: // failed to connect to cloud
46 case 44: // prolonged offline (>3 hours during open hours)
47 if (resets >= 4) {
48 Log.info("Connectivity alert %d with reset count=%u; suppressing further resets", alert, resets);
49 return 0;
50 }
51 if (resets >= 2) {
52 return 3; // escalate to hard power-cycle after a few soft resets
53 }
54 return 2; // start with soft reset
55
56 case 40: { // repeated webhook failures
57 if (!Time.isValid()) {
58 Log.info("Alert 40 set but time is invalid - deferring corrective action");
59 return 0;
60 }
61
62 time_t lastHook = sysStatus.get_lastHookResponse();
63 if (lastHook == 0) {
64 Log.info("Alert 40 set but no recorded lastHookResponse - deferring corrective action");
65 return 0;
66 }
67
68 time_t now = Time.now();
69 if ((now - lastHook) > (3 * 3600L)) {
70 Log.info("Alert 40 - no successful webhook response for >3 hours, scheduling soft reset");
71 return 2; // soft reset to try to recover integration path
72 }
73
74 Log.info("Alert 40 active but webhook response is recent - no reset needed");
75 return 0;
76 }
77
78 case 16: { // repeated sleep failures (HIBERNATE / ULTRA_LOW_POWER)
79 // If both HIBERNATE and ULTRA_LOW_POWER are failing to honour
80 // long sleep requests, treat this as a platform-level fault.
81 // Start with a soft reset, then escalate to an AB1805 deep
82 // power-down, and finally stop resetting to avoid thrash.
83 if (resets >= 4) {
84 Log.info("Alert 16 with reset count=%u; suppressing further resets", resets);
85 return 0;
86 }
87 if (resets >= 1) {
88 return 3; // after first reset, try a hard power-cycle
89 }
90 return 2; // start with a soft reset
91 }
92
93 default:
94 // Unknown or less critical alert: don't take drastic action here.
95 return 0;
96 }
97}
98
99// ERROR_STATE: Error supervisor: decide recovery action
101 static unsigned long resetTimer = 0;
102 static int resolution = 0;
103
104 if (state != oldState) {
106
107 // Safety: regardless of recovery choice, do not leave radio/modem powered
108 // while we sit in ERROR_STATE waiting for reset.
110
111 // In LOW_POWER or DISCONNECTED modes, avoid reset loops for connectivity/sleep alerts.
112 if (sysStatus.get_operatingMode() != CONNECTED) {
113 int8_t alert = current.get_alertCode();
114 if (alert == 15 || alert == 16 || alert == 31) {
115 Log.warn("Low-power mode: clearing alert %d to avoid reset loop", alert);
116 current.set_alertCode(0);
117 current.set_lastAlertTime(0);
118 resolution = 0;
119 } else {
120 resolution = resolveErrorAction();
121 }
122 } else {
123 resolution = resolveErrorAction();
124 }
125 Log.info("Entering ERROR_STATE with alert=%d, resetCount=%u, resolution=%d",
126 current.get_alertCode(), sysStatus.get_resetCount(), resolution);
127 resetTimer = millis();
128 }
129
130 switch (resolution) {
131 case 0:
132 // No automatic recovery; return to IDLE so the normal state machine
133 // can continue and we rely on future hourly reports to surface the
134 // issue.
136 break;
137
138 case 2:
139 // Soft reset after a short delay to allow any queued publishes to
140 // flush.
141 if (millis() - resetTimer > resetWait) {
142 Log.info("Executing soft reset from ERROR_STATE");
143 System.reset();
144 }
145 break;
146
147 case 3:
148 // Hard recovery using AB1805 deep power down after delay. This fully
149 // power-cycles the device and modem but is limited by resolveErrorAction
150 // avoid thrashing.
151 if (millis() - resetTimer > resetWait) {
152 Log.info("Executing deep power down from ERROR_STATE (alert=%d)", current.get_alertCode());
153 ab1805.deepPowerDown();
154 }
155 break;
156
157 default:
158 // Should not happen, but don't get stuck here.
160 break;
161 }
162}
Cloud Configuration Management - Particle Ledger integration for device configuration.
Global compile-time configuration options and enums.
const unsigned long resetWait
void publishStateTransition()
Persistent Data Storage Structures - EEPROM/Retained Memory Management.
#define sysStatus
#define current
@ CONNECTED
Singleton wrapper around ISensor implementations.
void requestFullDisconnectAndRadioOff()
void handleErrorState()
@ IDLE_STATE
AB1805 ab1805
Pinout definitions for the carrier board and sensors.