Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,22 @@ Both `WiFiDriverDemo` and `WiFiDriverTxDemo` honour:
DEBUG (produces ~7 MB per 15 s — has filled `/tmp` mid-capture and adds
0.5-0.8 s to init even with stderr discarded). `DEVOURER_USB_QUIET` is
accepted as a no-op for backwards compatibility.
- `DEVOURER_THERMAL_POLL_MS=N` — emit periodic `<devourer-thermal>` lines from
the chip thermal meter (RF[A][0x42][15:10]) paired with the EFUSE baseline:
`raw` (0..63 thermal units, ~1.5-2 °C each, not absolute °C), `baseline`,
`delta = raw − baseline`, and a coarse `status` bucket (cool/warm/hot/critical,
keyed off delta — the meter has no calibrated °C, so this is deliberately
bucketed rather than a fake temperature). Works on every Jaguar chip; read-only (does not
alter TX-power tracking). 0/unset = disabled. In `WiFiDriverDemo` (RX) this
spawns a background poller at the given cadence; in `WiFiDriverTxDemo` it is
read inline on the TX thread (no extra USB contention) every `N/2` frames.
Jaguar-1 has no hard thermal TX shutdown — a rising `delta` is the early
warning that the PA is heating and TX power is being backed off. NB: on the
8814 the EFUSE baseline is read at the 8812 offset, so the absolute `delta`
may be off there; the raw trend is still valid.
- `DEVOURER_THERMAL_WARN_DELTA=N` — thermal-units-above-baseline threshold at
which a one-shot `warn` fires (default `15`); re-arms once the chip cools
back below it.

`WiFiDriverTxDemo` additionally honours radiotap-encoding knobs that
patch the beacon's MCS info field (or, with `_VHT=1`, replace it with a
Expand Down
40 changes: 40 additions & 0 deletions demo/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,19 @@ static const uint32_t g_qd_poll_ms = []() -> uint32_t {
return e ? static_cast<uint32_t>(std::strtoul(e, nullptr, 0)) : 0u;
}();

/* DEVOURER_THERMAL_POLL_MS=N: periodic snapshot of the chip thermal meter
* (RF[A][0x42][15:10]), one `<devourer-thermal>` line per interval. Works on
* every Jaguar member. 0 = disabled. DEVOURER_THERMAL_WARN_DELTA overrides the
* warn threshold (thermal units above the EFUSE baseline; default 15). */
static const uint32_t g_thermal_poll_ms = []() -> uint32_t {
const char *e = std::getenv("DEVOURER_THERMAL_POLL_MS");
return e ? static_cast<uint32_t>(std::strtoul(e, nullptr, 0)) : 0u;
}();
static const int g_thermal_warn_delta = []() -> int {
const char *e = std::getenv("DEVOURER_THERMAL_WARN_DELTA");
return e ? std::atoi(e) : 15;
}();

/* DEVOURER_RX_DUMP_CSI=hex,hex,... (or "0x1a,0x20,0x40"): F2 research
* spike. On each canonical-SA RX frame (first N frames), read BB
* dbgport 0x8FC at each selector and emit
Expand Down Expand Up @@ -386,6 +399,33 @@ int main() {
}
});
}
std::atomic<bool> therm_emitter_stop{false};
std::thread therm_emitter;
if (g_thermal_poll_ms > 0) {
logger->info("DEVOURER_THERMAL_POLL_MS={} warn_delta={} — starting thermal "
"poller", g_thermal_poll_ms, g_thermal_warn_delta);
rtlDevice->start_thermal_poller(g_thermal_poll_ms, g_thermal_warn_delta);
therm_emitter = std::thread([&therm_emitter_stop]() {
while (!therm_emitter_stop.load()) {
if (g_rtl_device != nullptr) {
auto t = g_rtl_device->get_thermal_snapshot();
if (t.valid) {
printf("<devourer-thermal>raw=%u baseline=%u delta=%+d status=%s\n",
t.raw, t.baseline, t.delta, ThermalBucket(t));
} else {
printf("<devourer-thermal>raw=%u baseline=none status=%s\n",
t.raw, ThermalBucket(t));
}
fflush(stdout);
}
for (uint32_t slept = 0;
slept < g_thermal_poll_ms && !therm_emitter_stop.load();
slept += 50) {
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
}
});
}
/* Default channel 36 (5 GHz) for the 8812 reference. Override with
* DEVOURER_CHANNEL=N env var (e.g. DEVOURER_CHANNEL=6 for busy 2.4 GHz). */
int channel = 36;
Expand Down
19 changes: 19 additions & 0 deletions src/RadioManagementModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,25 @@ void RadioManagementModule::TickPwrTrack() {
_pwrTrk.TickThermalMeter(current_band_type, _currentChannel);
}

ThermalStatus RadioManagementModule::ReadThermalStatus() {
/* Live thermal meter: RF path A, register 0x42, bits [15:10]. phy_query_rf_reg
* already masks + shifts the bits down, so the result is the raw 6-bit reading.
* Same register the 8812A power-track loop samples; here we read it standalone
* (no chip-type gate, no BB-swing write) so the probe works on every Jaguar. */
ThermalStatus s;
uint32_t rf = phy_query_rf_reg(RfPath::RF_PATH_A, 0x42, 0xfc00u);
s.raw = static_cast<uint8_t>(rf & 0x3F);
s.baseline = _eepromManager->GetEepromThermalMeter();
if (s.baseline == 0xFF) {
s.valid = false;
s.delta = 0;
} else {
s.valid = true;
s.delta = static_cast<int>(s.raw) - static_cast<int>(s.baseline);
}
return s;
}

void RadioManagementModule::hw_var_rcr_config(uint32_t rcr) {
_device.rtw_write32(REG_RCR, rcr);
}
Expand Down
36 changes: 36 additions & 0 deletions src/RadioManagementModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,36 @@ enum MGN_RATE {
MGN_UNKNOWN
};

/* Read-only snapshot of the chip's thermal meter. `raw` is the live
* RF[A][0x42][15:10] reading (0..63, Realtek "thermal units" — roughly
* 1.5-2 C each, NOT absolute degrees). `baseline` is the EFUSE
* factory-calibrated reading (0xFF = autoload failed / no baseline).
* `delta = raw - baseline` (signed) is the heat signal — positive means
* the chip is running hotter than calibration. `valid` is false when no
* EFUSE baseline is available, in which case only `raw` is meaningful. */
struct ThermalStatus {
uint8_t raw = 0;
uint8_t baseline = 0xFF;
int delta = 0;
bool valid = false;
};

/* Coarse, honest health label for a thermal reading. The meter is NOT a
* calibrated °C sensor (Realtek publishes no °C transfer function for the AU
* family; the value is an RF/PA-bias tracking index), so we deliberately bucket
* the delta-from-baseline rather than fake a precise temperature — the same
* stance the rtl88x2eu driver takes (cool/warm/hot/...). Thresholds are in
* thermal units above the EFUSE baseline; "hot" aligns with the default
* DEVOURER_THERMAL_WARN_DELTA of 15. Returns "unknown" when no EFUSE baseline
* is available (delta is meaningless without it). */
inline const char *ThermalBucket(const ThermalStatus &s) {
if (!s.valid) return "unknown";
if (s.delta < 8) return "cool";
if (s.delta < 15) return "warm";
if (s.delta < 25) return "hot";
return "critical";
}

class RadioManagementModule {
RtlUsbAdapter _device;
std::shared_ptr<EepromManager> _eepromManager;
Expand Down Expand Up @@ -170,6 +200,12 @@ class RadioManagementModule {
* callback `odm_txpowertracking_callback_thermal_meter` and writes
* the resulting BB-swing index to 0xc1c[31:21] / 0xe1c[31:21]. */
void TickPwrTrack();
/* Read the chip thermal meter (RF[A][0x42][15:10]) and pair it with the
* EFUSE baseline. Read-only — does NOT touch the TX-power-tracking
* BB-swing registers (that correction lives in TickPwrTrack). Works on
* every Jaguar member: path-A RF reads succeed on 8812/8811/8814/8821
* (on the 8814 only paths C/D are write-only). */
ThermalStatus ReadThermalStatus();
/* Run a full I/Q calibration. Mirrors upstream
* `phy_iq_calibrate_8812a` triggered from the channel-set callback
* when `_needIQK` is asserted. Takes ~50-100 ms per invocation. */
Expand Down
73 changes: 73 additions & 0 deletions src/RtlJaguarDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ RtlJaguarDevice::~RtlJaguarDevice() {
if (_qd_thread.joinable()) {
_qd_thread.join();
}
_therm_stop.store(true);
if (_therm_thread.joinable()) {
_therm_thread.join();
}
}

void RtlJaguarDevice::start_queue_depth_poller(uint32_t interval_ms) {
Expand Down Expand Up @@ -409,6 +413,75 @@ std::array<uint32_t, 5> RtlJaguarDevice::get_queue_depth() const {
return out;
}

static uint32_t pack_thermal(const ThermalStatus &s) {
int8_t d = static_cast<int8_t>(
s.delta > 127 ? 127 : (s.delta < -128 ? -128 : s.delta));
return (s.valid ? 1u : 0u) | (uint32_t(s.raw) << 8) |
(uint32_t(s.baseline) << 16) |
(uint32_t(static_cast<uint8_t>(d)) << 24);
}

static ThermalStatus unpack_thermal(uint32_t v) {
ThermalStatus s;
s.valid = (v & 1u) != 0;
s.raw = static_cast<uint8_t>((v >> 8) & 0xFF);
s.baseline = static_cast<uint8_t>((v >> 16) & 0xFF);
s.delta = static_cast<int8_t>((v >> 24) & 0xFF);
return s;
}

ThermalStatus RtlJaguarDevice::GetThermalStatus() {
return _radioManagement->ReadThermalStatus();
}

ThermalStatus RtlJaguarDevice::get_thermal_snapshot() const {
return unpack_thermal(_therm_snap.load(std::memory_order_relaxed));
}

void RtlJaguarDevice::start_thermal_poller(uint32_t interval_ms,
int warn_delta) {
if (interval_ms == 0) return;
if (_therm_thread.joinable()) {
_logger->warn("thermal poller already running");
return;
}
_therm_thread = std::thread([this, interval_ms, warn_delta]() {
bool warned = false;
bool baseline_note = false;
while (!_therm_stop.load()) {
ThermalStatus s = _radioManagement->ReadThermalStatus();
_therm_snap.store(pack_thermal(s), std::memory_order_relaxed);
if (!s.valid) {
if (!baseline_note) {
_logger->info(
"thermal: no EFUSE baseline (0xFF) — reporting raw only "
"(raw={})",
unsigned(s.raw));
baseline_note = true;
}
} else if (s.delta >= warn_delta) {
if (!warned) {
_logger->warn(
"thermal: chip running hot ({}) — raw={} baseline={} delta=+{} "
"(>= {}); TX power tracking backing off, sustained TX may "
"degrade the PA",
ThermalBucket(s), unsigned(s.raw), unsigned(s.baseline), s.delta,
warn_delta);
warned = true;
}
} else {
warned = false; /* re-arm once it cools back under the threshold */
}
/* Sleep in short slices so destruction doesn't block for a full
* interval after _therm_stop is set. */
for (uint32_t slept = 0; slept < interval_ms && !_therm_stop.load();
slept += 50) {
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
}
});
}

uint32_t RtlJaguarDevice::read_bb_dbgport(uint32_t selector) {
if (!_bb_dbgport) {
_bb_dbgport = std::make_unique<devourer::BbDbgportReader>(_device, _logger);
Expand Down
27 changes: 27 additions & 0 deletions src/RtlJaguarDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,26 @@ class RtlJaguarDevice {
void start_queue_depth_poller(uint32_t interval_ms);
std::array<uint32_t, 5> get_queue_depth() const;

/* Read the chip thermal meter (RF[A][0x42][15:10]) paired with the EFUSE
* baseline. Read-only — leaves the TX-power-tracking BB-swing registers
* untouched. Works on every Jaguar member. Safe to call from the thread
* that owns the device (e.g. inline in a TX loop) — no USB contention.
* See ThermalStatus in RadioManagementModule.h for field semantics. */
ThermalStatus GetThermalStatus();

/* Spawn a background thread that samples the thermal meter every
* interval_ms and stores a snapshot (queryable via get_thermal_snapshot).
* Emits a logger->warn when delta >= warn_delta. 0 interval = disabled.
* Intended for the RX demo, whose Init() blocks the main thread.
*
* CONCURRENCY: an RF read is a multi-step BB register sequence over the
* shared libusb handle. Background phydm-style polling has wedged the chip
* before (ch100 second-channel-set), so this poller is opt-in and should
* use a conservative cadence (>= 1 s). A TX loop on the owning thread
* should prefer the synchronous GetThermalStatus() instead. */
void start_thermal_poller(uint32_t interval_ms, int warn_delta);
ThermalStatus get_thermal_snapshot() const;

/* F2 research helper: read a u32 from the BB debug port at `selector`,
* with save/restore around register 0x8FC. Lazy-constructs the reader
* on first call. Returns 0 if the chip wedged on a prior call. See
Expand All @@ -82,6 +102,13 @@ class RtlJaguarDevice {
std::thread _qd_thread;
std::atomic<bool> _qd_stop{false};

std::thread _therm_thread;
std::atomic<bool> _therm_stop{false};
/* Packed last thermal snapshot: bit0 = valid, [8:15] = raw,
* [16:23] = baseline, [24:31] = signed delta (clamped to int8). Stored as
* one atomic so a reader sees a consistent tuple without a mutex. */
std::atomic<uint32_t> _therm_snap{0};

std::unique_ptr<devourer::BbDbgportReader> _bb_dbgport;
};

Expand Down
89 changes: 89 additions & 0 deletions tests/thermal_hwcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env bash
# Hardware smoke-test for the thermal monitor probe.
#
# Runs WiFiDriverTxDemo against each plugged Jaguar adapter for a few seconds
# with DEVOURER_THERMAL_POLL_MS enabled, and prints the <devourer-thermal>
# lines it emits. Read-only w.r.t. the probe — this just confirms the thermal
# meter reads back a live, plausible value per chip.
#
# Usage: sudo tests/thermal_hwcheck.sh
set -u

BUILD_DIR="$(cd "$(dirname "$0")/.." && pwd)/build"
TXDEMO="$BUILD_DIR/WiFiDriverTxDemo"
RUN_SECS=6
POLL_MS=500 # ~ every 250 TX frames inline
WARN_DELTA=15

CHILD_PID=""
cleanup() {
if [[ -n "$CHILD_PID" ]] && kill -0 "$CHILD_PID" 2>/dev/null; then
kill -INT "$CHILD_PID" 2>/dev/null
sleep 0.3
kill -KILL "$CHILD_PID" 2>/dev/null
fi
# Backstop: reap any stray demo by exact comm name.
pkill -KILL -x WiFiDriverTxDemo 2>/dev/null
}
trap cleanup EXIT INT TERM

if [[ ! -x "$TXDEMO" ]]; then
echo "ERROR: $TXDEMO not built — run: cmake --build build -j" >&2
exit 1
fi

# pid -> human label
declare -A CHIPS=(
[0x8812]="RTL8812AU"
[0x8813]="RTL8814AU"
)
# 8821AU is OEM-rebadged on the T2U Plus (2357:0120) — needs VID override.
declare -A VID_OVERRIDE=(
[0x0120]="0x2357"
)
declare -A CHIPS_OEM=(
[0x0120]="RTL8821AU (T2U Plus)"
)

run_one() {
local pid="$1" label="$2" vid="${3:-0x0bda}"
echo
echo "==================================================================="
echo " $label (VID=$vid PID=$pid) — ${RUN_SECS}s"
echo "==================================================================="
local log
log="$(mktemp)"
DEVOURER_VID="$vid" DEVOURER_PID="$pid" \
DEVOURER_THERMAL_POLL_MS="$POLL_MS" \
DEVOURER_THERMAL_WARN_DELTA="$WARN_DELTA" \
"$TXDEMO" >"$log" 2>&1 &
CHILD_PID=$!
sleep "$RUN_SECS"
if kill -0 "$CHILD_PID" 2>/dev/null; then
kill -INT "$CHILD_PID" 2>/dev/null; sleep 0.3
kill -KILL "$CHILD_PID" 2>/dev/null
fi
wait "$CHILD_PID" 2>/dev/null
CHILD_PID=""

echo "--- thermal monitor lines ---"
grep -E "<devourer-thermal>|thermal:|ThermalMeter|thermal monitor on" "$log" | head -20
local n
n="$(grep -c "<devourer-thermal>" "$log")"
echo "--- ($n <devourer-thermal> lines total) ---"
if [[ "$n" -eq 0 ]]; then
echo " no thermal lines — tail of log for context:"
tail -15 "$log" | sed 's/^/ /'
fi
rm -f "$log"
}

for pid in "${!CHIPS[@]}"; do
run_one "$pid" "${CHIPS[$pid]}"
done
for pid in "${!CHIPS_OEM[@]}"; do
run_one "$pid" "${CHIPS_OEM[$pid]}" "${VID_OVERRIDE[$pid]}"
done

echo
echo "done."
Loading
Loading