esp-idf: Memory leak: queue not freed in esp_mesh_stop() (IDFGH-4525)

Environment

  • Development Kit: none
  • Kit version (for WroverKit/PicoKit/DevKitC): none
  • Module or chip used: ESP32-WROOM-32U
  • IDF version (run git describe --tags to find it): release/4.2
  • Build System: CMake
  • Compiler version (run xtensa-esp32-elf-gcc --version to find it): xtensa-esp32-elf-gcc.exe (crosstool-NG esp-2020r3) 8.4.0
  • Operating System: Windows
  • (Windows only) environment type: PowerShell
  • Using an IDE?: Yes - VS Code with ESP-IDF extension
  • Power Supply: Battery

Problem Description

There appears to be a queue created in esp_mesh_start() that is not freed on esp_mesh_stop(). In my application I periodically start and stop ESP-MESH to conserve battery, and therefore run esp_mesh_stop() and esp_mesh_start() repeatedly. However, on every cycle I lose 92 bytes of memory which appears to be queue created in esp_mesh_start() which is not freed, see trace.

Expected Behavior

esp_mesh_stop() should free all allocated memory from esp_mesh_start().

Actual Behavior

A queue is orphaned on each cycle of esp_mesh_stop()/esp_mesh_start().

Steps to reproduce

  1. Repeatedly run esp_mesh_start() and then esp_mesh_stop(). (In my case, I use the functions mesh_enable()/mesh_disable() in the code below).

Code to reproduce this issue

esp_err_t mesh_enable(void) {
  esp_err_t err = ESP_OK;
  if (!mesh_on) {
    if (!mesh_initialized) {
      ESP_LOGW(MESH_TAG, "Initializing mesh network");
    } else {
      ESP_LOGW(MESH_TAG, "Enabling mesh network");
    }
    if (!mesh_initialized) {
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_netif_init());
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_event_loop_create_default());NULL));");
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_netif_create_default_wifi_mesh_netifs(&netif_sta, NULL));
      wifi_init_config_t config = WIFI_INIT_CONFIG_DEFAULT();
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_init(&config));&ip_event_handler, NULL));");
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, &ip_event_handler, NULL));
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_set_storage(WIFI_STORAGE_RAM));
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_set_ps(WIFI_PS_NONE));
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_set_mode(WIFI_MODE_APSTA));
      if (mesh_settings.lr_mode == 1) {
        ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_set_protocol(WIFI_IF_AP, WIFI_PROTOCOL_LR));
        ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_set_protocol(WIFI_IF_STA, WIFI_PROTOCOL_LR));
      }
    }
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_start());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_init());
    ESP_ERROR_CHECK_WITHOUT_ABORT(setup_root());    if (!mesh_initialized) {
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_event_handler_register(MESH_EVENT, ESP_EVENT_ANY_ID, &mesh_event_handler, NULL));
    }
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_topology(MESH_TOPO_CHAIN));
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_max_layer(mesh_settings.max_layer));
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_vote_percentage(1));
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_xon_qsize(64));
    if (mesh_settings.pwr_save) {
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_enable_ps());
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_ap_assoc_expire(60));
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_announce_interval(5000, 30000));
    } else {
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_disable_ps());
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_ap_assoc_expire(10));
    }
    mesh_cfg_t cfg = MESH_INIT_CONFIG_DEFAULT();
    memcpy((uint8_t *)&cfg.mesh_id, mesh_settings.id, 6);
    cfg.channel = mesh_settings.channel;
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_ap_authmode(mesh_settings.auth_mode));
    cfg.mesh_ap.max_connection = mesh_settings.max_conn;
    memcpy((uint8_t *)&cfg.mesh_ap.password, mesh_settings.ap_pass, strlen(mesh_settings.ap_pass));
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_config(&cfg));
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_start());
    mesh_on = true;
    if (!mesh_initialized) {
      ESP_ERROR_CHECK_WITHOUT_ABORT(esp_read_mac(mesh_self_addr.addr, 0));
    }
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_set_self_organized(true, false));
    ESP_LOGW(MESH_TAG, "Mesh starts successfully for " MACSTR ", max. layer:%d heap:%d, %s<%d>%s, ps:%d\n", MAC2STR(mesh_self_addr.addr), esp_mesh_get_max_layer(), esp_get_minimum_free_heap_size(), esp_mesh_is_root_fixed() ? "root fixed" : "root not fixed", esp_mesh_get_topology(), esp_mesh_get_topology() ? "(chain)" : "(tree)", esp_mesh_is_ps_enabled());
  } else {
    ESP_LOGW(MESH_TAG, "Mesh network already enabled!");
  }
  return err;
}

esp_err_t mesh_disable(void) {
  if (mesh_on) {
    ESP_LOGW(MESH_TAG, "Disabling mesh network, heap: %u", esp_get_free_heap_size());
    is_mesh_connected = false;
    vTaskDelay(100 / portTICK_PERIOD_MS);
    if (!esp_mesh_is_root()) {
      esp_wifi_scan_stop();
    }
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_disconnect());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_flush_scan_result());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_flush_upstream_packets());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_stop());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_deinit());
    vTaskDelay(100 / portTICK_PERIOD_MS);
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_mesh_comm_p2p_stop());
    ESP_ERROR_CHECK_WITHOUT_ABORT(esp_wifi_stop());
    mesh_on = false;
  } else {
    ESP_LOGE(MESH_TAG, "Mesh network already disabled!");
  }
  return ESP_OK;
}
void app_main()
{
  for (uint8_t i = 0; i < 200; i++) {    
    heap_trace_start(HEAP_TRACE_LEAKS);
    mesh_enable();
    vTaskDelay(12000 / portTICK_PERIOD_MS);
    
    mesh_disable();
    vTaskDelay(3000 / portTICK_PERIOD_MS);
    heap_trace_stop();
    heap_trace_dump();
  }
}

Debug Logs

For some reason, when using heap tracing stack depth 2, this queue is described as occupying 92 bytes every time, but with stack depth 4 it shows as 88, maybe it is not counting a pointer somewhere? I am losing 92 bytes every cycle, and the only things consistently in the trace are this and about 6 LWIP mallocs, however these are freed asynchronously.

88 bytes (@ 0x3ffe19b8) allocated CPU 0 ccount 0xa0555294 caller 0x4008f00e:0x4008f27b:0x400eece8:0x40112c00
0x4008f00e: xQueueGenericCreate at C:/Users/prebe/esp-idf/components/freertos/queue.c:391

0x4008f27b: xQueueCreateMutex at C:/Users/prebe/esp-idf/components/freertos/queue.c:498

0x400eece8: mutex_create_wrapper at C:/Users/prebe/esp-idf/components/esp_wifi/esp32/esp_adapter.c:301

0x40112c00: esp_mesh_start at ??:?

Other items if possible

skdconfig: https://gist.github.com/MisterScience5/d52fba06f87e5ca2e5cfa7a82f58b809

About this issue

  • Original URL
  • State: closed
  • Created 3 years ago
  • Comments: 17 (2 by maintainers)

Most upvoted comments

The root cause has found, will backport the fix to v4.4 ASAP.

@pedrominatel @MisterScience5 @Satierfa @VitorBFreitas We have reproduced this issue locally, and will find the root cause ASAP.

Thanks for reporting and letting us know, we will look into.