/* * Kitty: WiFi RKY audio receiver -> USB Audio Class 2 microphone. * * WiFi RX path copied verbatim from lucy/receiver (dual-path: ISR hook + * promisc callback). Output changed from I2S DMA to ring buffer -> UAC2 mic. * * Incoming audio is 24-bit packed (from bubbles TX). Unpacked to 16-bit * for USB output: top 2 bytes of each 3-byte sample. * * Test tone: 1kHz sine injected into ring buffer when no WiFi for 500ms. */ #include #include #include "freertos/FreeRTOS.h" #include "freertos/task.h" #include "esp_system.h" #include "esp_wifi.h" #include "esp_timer.h" #include "esp_log.h" #include "nvs_flash.h" #include "esp_netif.h" #include "esp_event.h" #include "soc/lldesc.h" #include "esp_private/wifi_os_adapter.h" #include "xtensa/xtensa_api.h" #include "esp_cpu.h" #include "ring_buf.h" #include "usb_audio.h" #define TAG "kitty" #define CHANNEL 1 #define SAMPLE_RATE 48000 #define FRAMES_PER_PKT 48 #define NUM_CHANNELS 2 #define PACKED_BYTES 3 /* 48 stereo frames * 2ch * 3 bytes = 288 bytes packed audio per packet */ #define AUDIO_PAYLOAD_SIZE (FRAMES_PER_PKT * NUM_CHANNELS * PACKED_BYTES) /* 48 stereo frames * 2ch * 2 bytes = 192 bytes after 16-bit truncation */ #define AUDIO_16BIT_SIZE (FRAMES_PER_PKT * NUM_CHANNELS * 2) /* Test tone: 1kHz sine at -12dBFS, injected when no WiFi for >500ms */ #define TONE_FREQ 1000 #define TONE_AMPLITUDE 0x1000 #define TONE_TIMEOUT_US 500000 /* WiFi MAC RX DMA registers (ESP32-S3) */ #define REG_RX_DSCR_LAST (*(volatile uint32_t *)0x60033090) /* DMA SRAM address range on S3 */ #define DMA_SRAM_BASE 0x3FC88000 #define DMA_SRAM_END 0x3FD00000 #define DMA_ADDR_MASK 0x000FFFFF #define DMA_ADDR_OR 0x3FC00000 typedef struct __attribute__((packed)) { uint32_t frame_seq; uint8_t sub_index; uint8_t pkt_type; uint16_t payload_len; uint16_t burst_width; } rky_hdr_t; #define MAC_HDR_LEN 24 #define ACTION_HDR_LEN 4 #define RKY_HDR_LEN sizeof(rky_hdr_t) #define FRAME_OVERHEAD (MAC_HDR_LEN + ACTION_HDR_LEN + RKY_HDR_LEN) static ring_buf_t s_ring; static DRAM_ATTR uint8_t s_paired_mac[6]; static volatile bool s_paired; static uint32_t s_stat_isr; static uint32_t s_stat_prom; static uint32_t s_stat_dedup; static uint32_t s_stat_gaps; static uint32_t s_stat_overruns; static uint32_t s_stat_isrs; static volatile uint32_t s_last_seq; static volatile int64_t s_last_rx_time; /* ---------- 24-bit to 16-bit unpack + ring write ---------- */ static inline void IRAM_ATTR unpack_24_to_16_ring(const uint8_t *packed, uint16_t packed_len) { uint32_t n_samples = packed_len / PACKED_BYTES; uint8_t tmp[AUDIO_16BIT_SIZE]; if (n_samples > FRAMES_PER_PKT * NUM_CHANNELS) n_samples = FRAMES_PER_PKT * NUM_CHANNELS; uint32_t j = 0; for (uint32_t i = 0; i < n_samples; i++) { /* packed[j]=MSB packed[j+1]=mid packed[j+2]=LSB -> 16-bit LE = {mid, MSB} */ tmp[i * 2] = packed[j + 1]; tmp[i * 2 + 1] = packed[j]; j += PACKED_BYTES; } size_t written = ring_buf_write(&s_ring, tmp, n_samples * 2); if (written < n_samples * 2) s_stat_overruns++; s_last_rx_time = esp_timer_get_time(); } /* ---------- shared ingest ---------- */ static inline void IRAM_ATTR ingest_audio(const uint8_t *audio, uint16_t plen, uint32_t seq, const uint8_t *sa) { /* first-seen pairing */ if (!s_paired) { for (int i = 0; i < 6; i++) s_paired_mac[i] = sa[i]; s_paired = true; } else { if (sa[0] != s_paired_mac[0] || sa[1] != s_paired_mac[1] || sa[2] != s_paired_mac[2] || sa[3] != s_paired_mac[3] || sa[4] != s_paired_mac[4] || sa[5] != s_paired_mac[5]) return; } /* sequence dedup: both paths call ingest, skip if already seen */ if (seq == s_last_seq && s_last_seq != 0) { s_stat_dedup++; return; } /* gap detection */ if (s_last_seq != 0 && seq != s_last_seq + 1) s_stat_gaps++; s_last_seq = seq; /* unpack 24-bit packed to 16-bit and write to ring buffer */ unpack_24_to_16_ring(audio, plen); } /* ---------- ISR hook ---------- */ static DRAM_ATTR xt_handler s_orig_isr; static DRAM_ATTR void *s_orig_arg; static DRAM_ATTR int s_orig_intr_num = -1; static DRAM_ATTR uint32_t s_isr_last_raw; static void IRAM_ATTR wifi_isr_hook(void *arg) { s_stat_isrs++; uint32_t raw = REG_RX_DSCR_LAST; if (raw != s_isr_last_raw) { s_isr_last_raw = raw; uint32_t addr = (raw & DMA_ADDR_MASK) | DMA_ADDR_OR; if (addr >= DMA_SRAM_BASE && addr < DMA_SRAM_END) { lldesc_t *desc = (lldesc_t *)addr; if (desc->owner == 0 && desc->length > FRAME_OVERHEAD) { const uint8_t *buf = (const uint8_t *)desc->buf; if (buf[0] == 0xD0 && buf[24] == 127 && buf[25] == 0x52 && buf[26] == 0x4B && buf[27] == 0x59) { rky_hdr_t *hdr = (rky_hdr_t *)(buf + MAC_HDR_LEN + ACTION_HDR_LEN); uint16_t plen = hdr->payload_len; uint32_t seq = hdr->frame_seq; const uint8_t *audio = buf + FRAME_OVERHEAD; if (plen > desc->length - FRAME_OVERHEAD) plen = desc->length - FRAME_OVERHEAD; if (plen > 0) { ingest_audio(audio, plen, seq, buf + 10); s_stat_isr++; } } } } } /* chain to blob's original ISR */ if (s_orig_isr) s_orig_isr(s_orig_arg); } /* patched _set_isr: intercept when blob installs wDev_ProcessFiq */ static void IRAM_ATTR patched_set_isr(int32_t n, void *f, void *arg) { if (s_orig_intr_num < 0 && f != NULL) { s_orig_isr = (xt_handler)f; s_orig_arg = arg; s_orig_intr_num = n; xt_set_interrupt_handler(n, wifi_isr_hook, arg); ESP_EARLY_LOGI(TAG, "captured WiFi ISR: intr=%d handler=%p arg=%p", (int)n, f, arg); } else { xt_set_interrupt_handler(n, (xt_handler)f, arg); } } /* ---------- promiscuous callback (fallback path) ---------- */ static void IRAM_ATTR promisc_cb(void *buf, wifi_promiscuous_pkt_type_t type) { wifi_promiscuous_pkt_t *pkt = (wifi_promiscuous_pkt_t *)buf; uint8_t *frame = pkt->payload; int len = pkt->rx_ctrl.sig_len - 4; if (len < (int)FRAME_OVERHEAD) return; if ((frame[0] & 0xFC) != 0xD0) return; if (frame[24] != 127) return; if (frame[25] != 0x52 || frame[26] != 0x4B || frame[27] != 0x59) return; rky_hdr_t *hdr = (rky_hdr_t *)(frame + MAC_HDR_LEN + ACTION_HDR_LEN); uint16_t plen = hdr->payload_len; uint32_t seq = hdr->frame_seq; uint8_t *audio = frame + FRAME_OVERHEAD; int avail = len - (int)FRAME_OVERHEAD; if (plen > (uint16_t)avail) plen = (uint16_t)avail; if (plen == 0) return; ingest_audio(audio, plen, seq, frame + 10); s_stat_prom++; } /* ---------- WiFi init ---------- */ static void wifi_init(void) { ESP_ERROR_CHECK(nvs_flash_init()); ESP_ERROR_CHECK(esp_netif_init()); ESP_ERROR_CHECK(esp_event_loop_create_default()); /* patch _set_isr before esp_wifi_init so we capture the blob's ISR install */ g_wifi_osi_funcs._set_isr = patched_set_isr; wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT(); ESP_ERROR_CHECK(esp_wifi_init(&cfg)); ESP_ERROR_CHECK(esp_wifi_set_storage(WIFI_STORAGE_RAM)); ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); ESP_ERROR_CHECK(esp_wifi_start()); ESP_ERROR_CHECK(esp_wifi_set_channel(CHANNEL, WIFI_SECOND_CHAN_NONE)); wifi_promiscuous_filter_t filt = { .filter_mask = WIFI_PROMIS_FILTER_MASK_ALL, }; ESP_ERROR_CHECK(esp_wifi_set_promiscuous_filter(&filt)); ESP_ERROR_CHECK(esp_wifi_set_promiscuous_rx_cb(promisc_cb)); ESP_ERROR_CHECK(esp_wifi_set_promiscuous(true)); } /* ---------- test tone ---------- */ static int16_t s_sine_table[FRAMES_PER_PKT]; static void tone_table_init(void) { for (int i = 0; i < FRAMES_PER_PKT; i++) s_sine_table[i] = (int16_t)(TONE_AMPLITUDE * sinf(2.0f * M_PI * TONE_FREQ * i / SAMPLE_RATE)); } static void tone_task(void *arg) { TickType_t xLastWake = xTaskGetTickCount(); uint32_t phase = 0; bool was_toning = false; while (1) { vTaskDelayUntil(&xLastWake, pdMS_TO_TICKS(1)); int64_t now = esp_timer_get_time(); if ((now - s_last_rx_time) < TONE_TIMEOUT_US) { if (was_toning) { ESP_LOGI(TAG, "tone: WiFi data resumed, stopping tone"); was_toning = false; } continue; } if (!was_toning) { ESP_LOGW(TAG, "tone: no WiFi for %dms, injecting 1kHz test tone", (int)(TONE_TIMEOUT_US / 1000)); was_toning = true; phase = 0; } int16_t buf[FRAMES_PER_PKT * NUM_CHANNELS]; for (int i = 0; i < FRAMES_PER_PKT; i++) { int16_t s = s_sine_table[(phase + i) % FRAMES_PER_PKT]; buf[i * 2] = s; buf[i * 2 + 1] = s; } phase = (phase + FRAMES_PER_PKT) % FRAMES_PER_PKT; ring_buf_write(&s_ring, (const uint8_t *)buf, sizeof(buf)); } } /* ---------- stats ---------- */ static void stats_task(void *arg) { int64_t last = esp_timer_get_time(); while (1) { vTaskDelay(pdMS_TO_TICKS(5000)); int64_t now = esp_timer_get_time(); float elapsed = (now - last) / 1e6f; uint32_t fill = ring_buf_fill(&s_ring); uint32_t total = s_stat_isr + s_stat_prom; bool toning = (now - s_last_rx_time) >= TONE_TIMEOUT_US; ESP_LOGI(TAG, "isr=%lu prom=%lu dedup=%lu | gaps=%lu over=%lu | ring=%lu/%d | isrs=%lu pps=%.0f%s", (unsigned long)s_stat_isr, (unsigned long)s_stat_prom, (unsigned long)s_stat_dedup, (unsigned long)s_stat_gaps, (unsigned long)s_stat_overruns, (unsigned long)fill, RING_BUF_SIZE, (unsigned long)s_stat_isrs, total / elapsed, toning ? " [TONE]" : ""); s_stat_isr = 0; s_stat_prom = 0; s_stat_dedup = 0; s_stat_gaps = 0; s_stat_overruns = 0; s_stat_isrs = 0; last = now; } } /* ---------- main ---------- */ void app_main(void) { tone_table_init(); ring_buf_init(&s_ring); usb_audio_init(&s_ring); wifi_init(); ESP_LOGI(TAG, "rx: wifi ch=%d -> UAC2 mic 48kHz/16/stereo, isr_hook=%s, ring=%d bytes", CHANNEL, s_orig_intr_num >= 0 ? "active" : "inactive", RING_BUF_SIZE); xTaskCreatePinnedToCore(stats_task, "stats", 4096, NULL, 5, NULL, 0); xTaskCreatePinnedToCore(tone_task, "tone", 4096, NULL, 6, NULL, 1); }