ESPHome  2025.2.0
audio_decoder.cpp
Go to the documentation of this file.
1 #include "audio_decoder.h"
2 
3 #ifdef USE_ESP32
4 
5 #include "esphome/core/hal.h"
6 
7 namespace esphome {
8 namespace audio {
9 
10 static const uint32_t DECODING_TIMEOUT_MS = 50; // The decode function will yield after this duration
11 static const uint32_t READ_WRITE_TIMEOUT_MS = 20; // Timeout for transferring audio data
12 
13 static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
14 
15 AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {
17  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
18 }
19 
21 #ifdef USE_AUDIO_MP3_SUPPORT
22  if (this->audio_file_type_ == AudioFileType::MP3) {
23  esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
24  }
25 #endif
26 }
27 
28 esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
29  if (this->input_transfer_buffer_ != nullptr) {
30  this->input_transfer_buffer_->set_source(input_ring_buffer);
31  return ESP_OK;
32  }
33  return ESP_ERR_NO_MEM;
34 }
35 
36 esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
37  if (this->output_transfer_buffer_ != nullptr) {
38  this->output_transfer_buffer_->set_sink(output_ring_buffer);
39  return ESP_OK;
40  }
41  return ESP_ERR_NO_MEM;
42 }
43 
44 #ifdef USE_SPEAKER
46  if (this->output_transfer_buffer_ != nullptr) {
47  this->output_transfer_buffer_->set_sink(speaker);
48  return ESP_OK;
49  }
50  return ESP_ERR_NO_MEM;
51 }
52 #endif
53 
54 esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
55  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
56  return ESP_ERR_NO_MEM;
57  }
58 
59  this->audio_file_type_ = audio_file_type;
60 
61  this->potentially_failed_count_ = 0;
62  this->end_of_file_ = false;
63 
64  switch (this->audio_file_type_) {
65 #ifdef USE_AUDIO_FLAC_SUPPORT
67  this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();
68  this->free_buffer_required_ =
69  this->output_transfer_buffer_->capacity(); // We'll revise this after reading the header
70  break;
71 #endif
72 #ifdef USE_AUDIO_MP3_SUPPORT
73  case AudioFileType::MP3:
74  this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
75  this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels
76  break;
77 #endif
78  case AudioFileType::WAV:
79  this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
80  this->wav_decoder_->reset();
81  this->free_buffer_required_ = 1024;
82  break;
84  default:
85  return ESP_ERR_NOT_SUPPORTED;
86  break;
87  }
88 
89  return ESP_OK;
90 }
91 
92 AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
93  if (stop_gracefully) {
94  if (this->output_transfer_buffer_->available() == 0) {
95  if (this->end_of_file_) {
96  // The file decoder indicates it reached the end of file
98  }
99 
100  if (!this->input_transfer_buffer_->has_buffered_data()) {
101  // If all the internal buffers are empty, the decoding is done
103  }
104  }
105  }
106 
107  if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
108  if (stop_gracefully) {
109  // No more new data is going to come in, so decoding is done
111  }
113  }
114 
116 
117  uint32_t decoding_start = millis();
118 
119  while (state == FileDecoderState::MORE_TO_PROCESS) {
120  // Transfer decoded out
121  if (!this->pause_output_) {
122  size_t bytes_written = this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
123  if (this->audio_stream_info_.has_value()) {
124  this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
125  this->playback_ms_ +=
126  this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
127  }
128  } else {
129  // If paused, block to avoid wasting CPU resources
130  delay(READ_WRITE_TIMEOUT_MS);
131  }
132 
133  // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
134  if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
135  (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
137  }
138 
139  // Decode more audio
140 
141  size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
142 
143  if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
144  // Failed to decode in last attempt and there is no new data
145 
146  if (this->input_transfer_buffer_->free() == 0) {
147  // The input buffer is full. Since it previously failed on the exact same data, we can never recover
148  state = FileDecoderState::FAILED;
149  } else {
150  // Attempt to get more data next time
151  state = FileDecoderState::IDLE;
152  }
153  } else if (this->input_transfer_buffer_->available() == 0) {
154  // No data to decode, attempt to get more data next time
155  state = FileDecoderState::IDLE;
156  } else {
157  switch (this->audio_file_type_) {
158 #ifdef USE_AUDIO_FLAC_SUPPORT
159  case AudioFileType::FLAC:
160  state = this->decode_flac_();
161  break;
162 #endif
163 #ifdef USE_AUDIO_MP3_SUPPORT
164  case AudioFileType::MP3:
165  state = this->decode_mp3_();
166  break;
167 #endif
168  case AudioFileType::WAV:
169  state = this->decode_wav_();
170  break;
171  case AudioFileType::NONE:
172  default:
173  state = FileDecoderState::IDLE;
174  break;
175  }
176  }
177 
180  } else if (state == FileDecoderState::END_OF_FILE) {
181  this->end_of_file_ = true;
182  } else if (state == FileDecoderState::FAILED) {
184  } else if (state == FileDecoderState::MORE_TO_PROCESS) {
185  this->potentially_failed_count_ = 0;
186  }
187  }
189 }
190 
191 #ifdef USE_AUDIO_FLAC_SUPPORT
193  if (!this->audio_stream_info_.has_value()) {
194  // Header hasn't been read
195  auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),
196  this->input_transfer_buffer_->available());
197 
198  if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
200  }
201 
202  if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {
203  // Couldn't read FLAC header
205  }
206 
207  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
208  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
209 
210  this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
211  if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
212  // Output buffer is not big enough
213  if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
214  // Couldn't reallocate output buffer
216  }
217  }
218 
219  this->audio_stream_info_ =
220  audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
221  this->flac_decoder_->get_sample_rate());
222 
224  }
225 
226  uint32_t output_samples = 0;
227  auto result = this->flac_decoder_->decode_frame(
228  this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),
229  reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);
230 
231  if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
232  // Not an issue, just needs more data that we'll get next time.
234  }
235 
236  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
237  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
238 
239  if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
240  // Corrupted frame, don't retry with current buffer content, wait for new sync
242  }
243 
244  // We have successfully decoded some input data and have new output data
245  this->output_transfer_buffer_->increase_buffer_length(
246  this->audio_stream_info_.value().samples_to_bytes(output_samples));
247 
248  if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
250  }
251 
253 }
254 #endif
255 
256 #ifdef USE_AUDIO_MP3_SUPPORT
258  // Look for the next sync word
259  int buffer_length = (int) this->input_transfer_buffer_->available();
260  int32_t offset =
261  esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);
262 
263  if (offset < 0) {
264  // New data may have the sync word
265  this->input_transfer_buffer_->decrease_buffer_length(buffer_length);
267  }
268 
269  // Advance read pointer to match the offset for the syncword
270  this->input_transfer_buffer_->decrease_buffer_length(offset);
271  uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();
272 
273  buffer_length = (int) this->input_transfer_buffer_->available();
274  int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
275  (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
276 
277  size_t consumed = this->input_transfer_buffer_->available() - buffer_length;
278  this->input_transfer_buffer_->decrease_buffer_length(consumed);
279 
280  if (err) {
281  switch (err) {
282  case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
283  // Intentional fallthrough
284  case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
286  break;
287  default:
288  // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
290  break;
291  }
292  } else {
293  esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
294  esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
295  if (mp3_frame_info.outputSamps > 0) {
296  int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
297  this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
298 
299  if (!this->audio_stream_info_.has_value()) {
300  this->audio_stream_info_ =
301  audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
302  }
303  }
304  }
305 
307 }
308 #endif
309 
311  if (!this->audio_stream_info_.has_value()) {
312  // Header hasn't been processed
313 
314  esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(
315  this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());
316 
317  if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
318  this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());
319 
321  this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
322 
323  this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
324  this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
326  } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
327  // Available data didn't have the full header
329  } else {
331  }
332  } else {
333  if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
334  size_t bytes_to_copy = this->input_transfer_buffer_->available();
335 
336  if (this->wav_has_known_end_) {
337  bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
338  }
339 
340  bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
341 
342  if (bytes_to_copy > 0) {
343  std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),
344  bytes_to_copy);
345  this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);
346  this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
347  if (this->wav_has_known_end_) {
348  this->wav_bytes_left_ -= bytes_to_copy;
349  }
350  }
351  return FileDecoderState::IDLE;
352  }
353  }
354 
356 }
357 
358 } // namespace audio
359 } // namespace esphome
360 
361 #endif
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
uint32_t IRAM_ATTR HOT millis()
Definition: core.cpp:25
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
FileDecoderState decode_wav_()
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
~AudioDecoder()
Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically) ...
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
FileDecoderState decode_flac_()
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7
FileDecoderState decode_mp3_()
bool state
Definition: fan.h:34
void IRAM_ATTR HOT delay(uint32_t ms)
Definition: core.cpp:26
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
optional< AudioStreamInfo > audio_stream_info_