ESPHome  2025.2.0
audio_resampler.cpp
Go to the documentation of this file.
1 #include "audio_resampler.h"
2 
3 #ifdef USE_ESP32
4 
5 #include "esphome/core/hal.h"
6 
7 namespace esphome {
8 namespace audio {
9 
10 static const uint32_t READ_WRITE_TIMEOUT_MS = 20;
11 
12 AudioResampler::AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
13  : input_buffer_size_(input_buffer_size), output_buffer_size_(output_buffer_size) {
15  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
16 }
17 
18 esp_err_t AudioResampler::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
19  if (this->input_transfer_buffer_ != nullptr) {
20  this->input_transfer_buffer_->set_source(input_ring_buffer);
21  return ESP_OK;
22  }
23  return ESP_ERR_NO_MEM;
24 }
25 
26 esp_err_t AudioResampler::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
27  if (this->output_transfer_buffer_ != nullptr) {
28  this->output_transfer_buffer_->set_sink(output_ring_buffer);
29  return ESP_OK;
30  }
31  return ESP_ERR_NO_MEM;
32 }
33 
34 #ifdef USE_SPEAKER
36  if (this->output_transfer_buffer_ != nullptr) {
37  this->output_transfer_buffer_->set_sink(speaker);
38  return ESP_OK;
39  }
40  return ESP_ERR_NO_MEM;
41 }
42 #endif
43 
44 esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info,
45  uint16_t number_of_taps, uint16_t number_of_filters) {
46  this->input_stream_info_ = input_stream_info;
47  this->output_stream_info_ = output_stream_info;
48 
49  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
50  return ESP_ERR_NO_MEM;
51  }
52 
53  if ((input_stream_info.get_bits_per_sample() > 32) || (output_stream_info.get_bits_per_sample() > 32) ||
54  (input_stream_info_.get_channels() != output_stream_info.get_channels())) {
55  return ESP_ERR_NOT_SUPPORTED;
56  }
57 
58  if ((input_stream_info.get_sample_rate() != output_stream_info.get_sample_rate()) ||
59  (input_stream_info.get_bits_per_sample() != output_stream_info.get_bits_per_sample())) {
60  this->resampler_ = make_unique<esp_audio_libs::resampler::Resampler>(
61  input_stream_info.bytes_to_samples(this->input_buffer_size_),
62  output_stream_info.bytes_to_samples(this->output_buffer_size_));
63 
64  // Use cascaded biquad filters when downsampling to avoid aliasing
65  bool use_pre_filter = output_stream_info.get_sample_rate() < input_stream_info.get_sample_rate();
66 
67  esp_audio_libs::resampler::ResamplerConfiguration resample_config = {
68  .source_sample_rate = static_cast<float>(input_stream_info.get_sample_rate()),
69  .target_sample_rate = static_cast<float>(output_stream_info.get_sample_rate()),
70  .source_bits_per_sample = input_stream_info.get_bits_per_sample(),
71  .target_bits_per_sample = output_stream_info.get_bits_per_sample(),
72  .channels = input_stream_info_.get_channels(),
73  .use_pre_or_post_filter = use_pre_filter,
74  .subsample_interpolate = false, // Doubles the CPU load. Using more filters is a better alternative
75  .number_of_taps = number_of_taps,
76  .number_of_filters = number_of_filters,
77  };
78 
79  if (!this->resampler_->initialize(resample_config)) {
80  // Failed to allocate the resampler's internal buffers
81  return ESP_ERR_NO_MEM;
82  }
83  }
84 
85  return ESP_OK;
86 }
87 
88 AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_differential) {
89  if (stop_gracefully) {
90  if (!this->input_transfer_buffer_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) {
92  }
93  }
94 
95  if (!this->pause_output_) {
96  // Move audio data to the sink
97  this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
98  } else {
99  // If paused, block to avoid wasting CPU resources
100  delay(READ_WRITE_TIMEOUT_MS);
101  }
102 
103  this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
104 
105  if (this->input_transfer_buffer_->available() == 0) {
106  // No samples available to process
108  }
109 
110  const size_t bytes_free = this->output_transfer_buffer_->free();
111  const uint32_t frames_free = this->output_stream_info_.bytes_to_frames(bytes_free);
112 
113  const size_t bytes_available = this->input_transfer_buffer_->available();
114  const uint32_t frames_available = this->input_stream_info_.bytes_to_frames(bytes_available);
115 
118  esp_audio_libs::resampler::ResamplerResults results =
119  this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(),
120  this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3);
121 
122  this->input_transfer_buffer_->decrease_buffer_length(this->input_stream_info_.frames_to_bytes(results.frames_used));
123  this->output_transfer_buffer_->increase_buffer_length(
124  this->output_stream_info_.frames_to_bytes(results.frames_generated));
125 
126  // Resampling causes slight differences in the durations used versus generated. Computes the difference in
127  // millisconds. The callback function passing the played audio duration uses the difference to convert from output
128  // duration to input duration.
129  this->accumulated_frames_used_ += results.frames_used;
130  this->accumulated_frames_generated_ += results.frames_generated;
131 
132  const int32_t used_ms =
134  const int32_t generated_ms =
136 
137  *ms_differential = used_ms - generated_ms;
138 
139  } else {
140  // No resampling required, copy samples directly to the output transfer buffer
141  *ms_differential = 0;
142 
143  const size_t bytes_to_transfer = std::min(this->output_stream_info_.frames_to_bytes(frames_free),
144  this->input_stream_info_.frames_to_bytes(frames_available));
145 
146  std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(),
147  (void *) this->input_transfer_buffer_->get_buffer_start(), bytes_to_transfer);
148 
149  this->input_transfer_buffer_->decrease_buffer_length(bytes_to_transfer);
150  this->output_transfer_buffer_->increase_buffer_length(bytes_to_transfer);
151  }
152 
154 }
155 
156 } // namespace audio
157 } // namespace esphome
158 
159 #endif
uint8_t get_channels() const
Definition: audio.h:29
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
uint8_t get_bits_per_sample() const
Definition: audio.h:28
std::unique_ptr< esp_audio_libs::resampler::Resampler > resampler_
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for audio data.
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const
Computes the duration, in milliseconds, the given amount of frames represents.
Definition: audio.cpp:26
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
uint32_t bytes_to_samples(size_t bytes) const
Convert bytes to samples.
Definition: audio.h:48
size_t frames_to_bytes(uint32_t frames) const
Converts frames to bytes.
Definition: audio.h:53
AudioResamplerState resample(bool stop_gracefully, int32_t *ms_differential)
Resamples audio from the ring buffer source and writes to the sink.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
uint32_t get_sample_rate() const
Definition: audio.h:30
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for resampled audio.
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
uint32_t bytes_to_frames(size_t bytes) const
Convert bytes to frames.
Definition: audio.h:43
esp_err_t start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info, uint16_t number_of_taps, uint16_t number_of_filters)
Sets up the class to resample.
void IRAM_ATTR HOT delay(uint32_t ms)
Definition: core.cpp:26