ESPHome: /opt/build/esphome/esphome/components/audio/audio_decoder.cpp Source File

 #include "audio_decoder.h"

 #ifdef USE_ESP32

 #include "esphome/core/hal.h"

 namespace esphome {
 namespace audio {

 static const uint32_t DECODING_TIMEOUT_MS = 50;    // The decode function will yield after this duration
 static const uint32_t READ_WRITE_TIMEOUT_MS = 20;  // Timeout for transferring audio data

 static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;

 AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {
   this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size);
   this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
 }

 AudioDecoder::~AudioDecoder() {
 #ifdef USE_AUDIO_MP3_SUPPORT
   if (this->audio_file_type_ == AudioFileType::MP3) {
     esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
   }
 #endif
 }

 esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
   if (this->input_transfer_buffer_ != nullptr) {
     this->input_transfer_buffer_->set_source(input_ring_buffer);
     return ESP_OK;
   }
   return ESP_ERR_NO_MEM;
 }

 esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
   if (this->output_transfer_buffer_ != nullptr) {
     this->output_transfer_buffer_->set_sink(output_ring_buffer);
     return ESP_OK;
   }
   return ESP_ERR_NO_MEM;
 }

 #ifdef USE_SPEAKER
 esp_err_t AudioDecoder::add_sink(speaker::Speaker *speaker) {
   if (this->output_transfer_buffer_ != nullptr) {
     this->output_transfer_buffer_->set_sink(speaker);
     return ESP_OK;
   }
   return ESP_ERR_NO_MEM;
 }
 #endif

 esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
   if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
     return ESP_ERR_NO_MEM;
   }

   this->audio_file_type_ = audio_file_type;

   this->potentially_failed_count_ = 0;
   this->end_of_file_ = false;

   switch (this->audio_file_type_) {
 #ifdef USE_AUDIO_FLAC_SUPPORT
     case AudioFileType::FLAC:
       this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();
       this->free_buffer_required_ =
           this->output_transfer_buffer_->capacity();  // We'll revise this after reading the header
       break;
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
     case AudioFileType::MP3:
       this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
       this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2;  // samples * size per sample * channels
       break;
 #endif
     case AudioFileType::WAV:
       this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
       this->wav_decoder_->reset();
       this->free_buffer_required_ = 1024;
       break;
     case AudioFileType::NONE:
     default:
       return ESP_ERR_NOT_SUPPORTED;
       break;
   }

   return ESP_OK;
 }

 AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
   if (stop_gracefully) {
     if (this->output_transfer_buffer_->available() == 0) {
       if (this->end_of_file_) {
         // The file decoder indicates it reached the end of file
         return AudioDecoderState::FINISHED;
       }

       if (!this->input_transfer_buffer_->has_buffered_data()) {
         // If all the internal buffers are empty, the decoding is done
         return AudioDecoderState::FINISHED;
       }
     }
   }

   if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
     if (stop_gracefully) {
       // No more new data is going to come in, so decoding is done
       return AudioDecoderState::FINISHED;
     }
     return AudioDecoderState::FAILED;
   }

   FileDecoderState state = FileDecoderState::MORE_TO_PROCESS;

   uint32_t decoding_start = millis();

   while (state == FileDecoderState::MORE_TO_PROCESS) {
     // Transfer decoded out
     if (!this->pause_output_) {
       size_t bytes_written = this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
       if (this->audio_stream_info_.has_value()) {
         this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
         this->playback_ms_ +=
             this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
       }
     } else {
       // If paused, block to avoid wasting CPU resources
       delay(READ_WRITE_TIMEOUT_MS);
     }

     // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
     if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
         (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
       return AudioDecoderState::DECODING;
     }

     // Decode more audio

     size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));

     if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
       // Failed to decode in last attempt and there is no new data

       if (this->input_transfer_buffer_->free() == 0) {
         // The input buffer is full. Since it previously failed on the exact same data, we can never recover
         state = FileDecoderState::FAILED;
       } else {
         // Attempt to get more data next time
         state = FileDecoderState::IDLE;
       }
     } else if (this->input_transfer_buffer_->available() == 0) {
       // No data to decode, attempt to get more data next time
       state = FileDecoderState::IDLE;
     } else {
       switch (this->audio_file_type_) {
 #ifdef USE_AUDIO_FLAC_SUPPORT
         case AudioFileType::FLAC:
           state = this->decode_flac_();
           break;
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
         case AudioFileType::MP3:
           state = this->decode_mp3_();
           break;
 #endif
         case AudioFileType::WAV:
           state = this->decode_wav_();
           break;
         case AudioFileType::NONE:
         default:
           state = FileDecoderState::IDLE;
           break;
       }
     }

     if (state == FileDecoderState::POTENTIALLY_FAILED) {
       ++this->potentially_failed_count_;
     } else if (state == FileDecoderState::END_OF_FILE) {
       this->end_of_file_ = true;
     } else if (state == FileDecoderState::FAILED) {
       return AudioDecoderState::FAILED;
     } else if (state == FileDecoderState::MORE_TO_PROCESS) {
       this->potentially_failed_count_ = 0;
     }
   }
   return AudioDecoderState::DECODING;
 }

 #ifdef USE_AUDIO_FLAC_SUPPORT
 FileDecoderState AudioDecoder::decode_flac_() {
   if (!this->audio_stream_info_.has_value()) {
     // Header hasn't been read
     auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),
                                                    this->input_transfer_buffer_->available());

     if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
       return FileDecoderState::POTENTIALLY_FAILED;
     }

     if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {
       // Couldn't read FLAC header
       return FileDecoderState::FAILED;
     }

     size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
     this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);

     this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
     if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
       // Output buffer is not big enough
       if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
         // Couldn't reallocate output buffer
         return FileDecoderState::FAILED;
       }
     }

     this->audio_stream_info_ =
         audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
                                this->flac_decoder_->get_sample_rate());

     return FileDecoderState::MORE_TO_PROCESS;
   }

   uint32_t output_samples = 0;
   auto result = this->flac_decoder_->decode_frame(
       this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),
       reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);

   if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
     // Not an issue, just needs more data that we'll get next time.
     return FileDecoderState::POTENTIALLY_FAILED;
   }

   size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
   this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);

   if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
     // Corrupted frame, don't retry with current buffer content, wait for new sync
     return FileDecoderState::POTENTIALLY_FAILED;
   }

   // We have successfully decoded some input data and have new output data
   this->output_transfer_buffer_->increase_buffer_length(
       this->audio_stream_info_.value().samples_to_bytes(output_samples));

   if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
     return FileDecoderState::END_OF_FILE;
   }

   return FileDecoderState::MORE_TO_PROCESS;
 }
 #endif

 #ifdef USE_AUDIO_MP3_SUPPORT
 FileDecoderState AudioDecoder::decode_mp3_() {
   // Look for the next sync word
   int buffer_length = (int) this->input_transfer_buffer_->available();
   int32_t offset =
       esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);

   if (offset < 0) {
     // New data may have the sync word
     this->input_transfer_buffer_->decrease_buffer_length(buffer_length);
     return FileDecoderState::POTENTIALLY_FAILED;
   }

   // Advance read pointer to match the offset for the syncword
   this->input_transfer_buffer_->decrease_buffer_length(offset);
   uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();

   buffer_length = (int) this->input_transfer_buffer_->available();
   int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
                                                      (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);

   size_t consumed = this->input_transfer_buffer_->available() - buffer_length;
   this->input_transfer_buffer_->decrease_buffer_length(consumed);

   if (err) {
     switch (err) {
       case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
         // Intentional fallthrough
       case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
         return FileDecoderState::FAILED;
         break;
       default:
         // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
         return FileDecoderState::POTENTIALLY_FAILED;
         break;
     }
   } else {
     esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
     esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
     if (mp3_frame_info.outputSamps > 0) {
       int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
       this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);

       if (!this->audio_stream_info_.has_value()) {
         this->audio_stream_info_ =
             audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
       }
     }
   }

   return FileDecoderState::MORE_TO_PROCESS;
 }
 #endif

 FileDecoderState AudioDecoder::decode_wav_() {
   if (!this->audio_stream_info_.has_value()) {
     // Header hasn't been processed

     esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(
         this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());

     if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
       this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());

       this->audio_stream_info_ = audio::AudioStreamInfo(
           this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());

       this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
       this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
       return FileDecoderState::MORE_TO_PROCESS;
     } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
       // Available data didn't have the full header
       return FileDecoderState::POTENTIALLY_FAILED;
     } else {
       return FileDecoderState::FAILED;
     }
   } else {
     if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
       size_t bytes_to_copy = this->input_transfer_buffer_->available();

       if (this->wav_has_known_end_) {
         bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
       }

       bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());

       if (bytes_to_copy > 0) {
         std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),
                     bytes_to_copy);
         this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);
         this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
         if (this->wav_has_known_end_) {
           this->wav_bytes_left_ -= bytes_to_copy;
         }
       }
       return FileDecoderState::IDLE;
     }
   }

   return FileDecoderState::END_OF_FILE;
 }

 }  // namespace audio
 }  // namespace esphome

 #endif
esphome::audio::AudioDecoder::audio_file_type_
AudioFileType audio_file_type_
Definition: audio_decoder.h:117

esphome::audio::FileDecoderState::FAILED

esphome::speaker::Speaker
Definition: speaker.h:29

esphome::audio::FileDecoderState::MORE_TO_PROCESS

esphome::audio::AudioDecoder::add_source
esp_err_t add_source(std::weak_ptr< RingBuffer > &input_ring_buffer)
Adds a source ring buffer for raw file data.
Definition: audio_decoder.cpp:28

esphome::audio::AudioSinkTransferBuffer::create
static std::unique_ptr< AudioSinkTransferBuffer > create(size_t buffer_size)
Creates a new sink transfer buffer.
Definition: audio_transfer_buffer.cpp:12

esphome::audio::AudioFileType::FLAC

esphome::audio::AudioDecoder::flac_decoder_
std::unique_ptr< esp_audio_libs::flac::FLACDecoder > flac_decoder_
Definition: audio_decoder.h:106

esphome::audio::AudioDecoderState::FAILED

esphome::audio::AudioDecoder::wav_has_known_end_
bool wav_has_known_end_
Definition: audio_decoder.h:125

esphome::audio::AudioFileType::NONE

hal.h

esphome::audio::FileDecoderState
FileDecoderState
Definition: audio_decoder.h:37

esphome::audio::AudioDecoder::start
esp_err_t start(AudioFileType audio_file_type)
Sets up decoding the file.
Definition: audio_decoder.cpp:54

esphome::millis
uint32_t IRAM_ATTR HOT millis()
Definition: core.cpp:25

esphome::audio::AudioDecoder::mp3_decoder_
esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_
Definition: audio_decoder.h:110

esphome::audio::AudioStreamInfo
Definition: audio.h:11

esphome::audio::AudioDecoder::decode_wav_
FileDecoderState decode_wav_()
Definition: audio_decoder.cpp:310

esphome::audio::AudioDecoder::add_sink
esp_err_t add_sink(std::weak_ptr< RingBuffer > &output_ring_buffer)
Adds a sink ring buffer for decoded audio.
Definition: audio_decoder.cpp:36

esphome::audio::AudioDecoderState::DECODING

esphome::audio::FileDecoderState::END_OF_FILE

esphome::audio::AudioDecoder::potentially_failed_count_
uint32_t potentially_failed_count_
Definition: audio_decoder.h:123

esphome::audio::AudioDecoder::wav_bytes_left_
size_t wav_bytes_left_
Definition: audio_decoder.h:121

esphome::audio::AudioDecoder::~AudioDecoder
~AudioDecoder()
Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically) ...
Definition: audio_decoder.cpp:20

esphome::audio::AudioDecoder::output_transfer_buffer_
std::unique_ptr< AudioSinkTransferBuffer > output_transfer_buffer_
Definition: audio_decoder.h:115

esphome::audio::AudioDecoder::AudioDecoder
AudioDecoder(size_t input_buffer_size, size_t output_buffer_size)
Allocates the input and output transfer buffers.
Definition: audio_decoder.cpp:15

esphome::audio::AudioSourceTransferBuffer::create
static std::unique_ptr< AudioSourceTransferBuffer > create(size_t buffer_size)
Creates a new source transfer buffer.
Definition: audio_transfer_buffer.cpp:22

esphome::audio::AudioDecoder::input_transfer_buffer_
std::unique_ptr< AudioSourceTransferBuffer > input_transfer_buffer_
Definition: audio_decoder.h:114

esphome::audio::FileDecoderState::IDLE

esphome::audio::AudioDecoder::end_of_file_
bool end_of_file_
Definition: audio_decoder.h:124

esphome::audio::AudioDecoder::wav_decoder_
std::unique_ptr< esp_audio_libs::wav_decoder::WAVDecoder > wav_decoder_
Definition: audio_decoder.h:103

esphome::audio::AudioFileType
AudioFileType
Definition: audio.h:108

esphome::audio::AudioDecoder::decode_flac_
FileDecoderState decode_flac_()
Definition: audio_decoder.cpp:192

esphome::audio::AudioFileType::MP3

esphome::audio::AudioDecoder::pause_output_
bool pause_output_
Definition: audio_decoder.h:127

esphome::audio::AudioDecoderState::FINISHED

esphome::audio::AudioDecoder::free_buffer_required_
size_t free_buffer_required_
Definition: audio_decoder.h:120

esphome
Implementation of SPI Controller mode.
Definition: a01nyub.cpp:7

esphome::audio::AudioFileType::WAV

audio_decoder.h

esphome::audio::AudioDecoderState
AudioDecoderState
Definition: audio_decoder.h:30

esphome::audio::AudioDecoder::decode_mp3_
FileDecoderState decode_mp3_()
Definition: audio_decoder.cpp:257

esphome::audio::AudioDecoder::playback_ms_
uint32_t playback_ms_
Definition: audio_decoder.h:130

esphome::audio::FileDecoderState::POTENTIALLY_FAILED

state
bool state
Definition: fan.h:34

esphome::audio::AudioDecoder::accumulated_frames_written_
uint32_t accumulated_frames_written_
Definition: audio_decoder.h:129

esphome::delay
void IRAM_ATTR HOT delay(uint32_t ms)
Definition: core.cpp:26

esphome::audio::AudioDecoder::decode
AudioDecoderState decode(bool stop_gracefully)
Decodes audio from the ring buffer source and writes to the sink.
Definition: audio_decoder.cpp:92

esphome::audio::AudioDecoder::audio_stream_info_
optional< AudioStreamInfo > audio_stream_info_
Definition: audio_decoder.h:118