From aeaabc5ed38dcf94687955ba25cc257a86e0cf57 Mon Sep 17 00:00:00 2001
From: B3N30 <benediktthomas@gmail.com>
Date: Wed, 19 Dec 2018 17:12:57 +0100
Subject: [PATCH] audio_core: dsp_hle: add Media Foundation decoder...

* appveyor: switch to Media Foundation API
* Travis CI MinGW build needs an update with the container image
---
 CMakeLists.txt                           |   2 +
 appveyor.yml                             |   4 +-
 src/audio_core/CMakeLists.txt            |  12 +-
 src/audio_core/hle/adts.h                |  31 ++
 src/audio_core/hle/adts_reader.c         |  49 +++
 src/audio_core/hle/hle.cpp               |  10 +-
 src/audio_core/hle/wmf_decoder.cpp       | 254 ++++++++++++++++
 src/audio_core/hle/wmf_decoder.h         |  22 ++
 src/audio_core/hle/wmf_decoder_utils.cpp | 366 +++++++++++++++++++++++
 src/audio_core/hle/wmf_decoder_utils.h   |  48 +++
 src/tests/CMakeLists.txt                 |   3 +-
 src/tests/audio_core/audio_fixures.h     |   5 +
 src/tests/audio_core/decoder_tests.cpp   |  50 ++++
 13 files changed, 840 insertions(+), 16 deletions(-)
 create mode 100644 src/audio_core/hle/adts.h
 create mode 100644 src/audio_core/hle/adts_reader.c
 create mode 100644 src/audio_core/hle/wmf_decoder.cpp
 create mode 100644 src/audio_core/hle/wmf_decoder.h
 create mode 100644 src/audio_core/hle/wmf_decoder_utils.cpp
 create mode 100644 src/audio_core/hle/wmf_decoder_utils.h
 create mode 100644 src/tests/audio_core/audio_fixures.h
 create mode 100644 src/tests/audio_core/decoder_tests.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6c4a7228b..b9101b114 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,8 @@ option(ENABLE_SCRIPTING "Enables scripting support" OFF)
 
 CMAKE_DEPENDENT_OPTION(CITRA_USE_BUNDLED_FFMPEG "Download bundled FFmpeg binaries" ON "MSVC" OFF)
 
+CMAKE_DEPENDENT_OPTION(ENABLE_MF "Use Media Foundation decoder" ON "WIN32;NOT ENABLE_FFMPEG" OFF)
+
 if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.git/hooks/pre-commit)
     message(STATUS "Copying pre-commit hook")
     file(COPY hooks/pre-commit
diff --git a/appveyor.yml b/appveyor.yml
index 3c8646a88..0e1ee94c4 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -43,9 +43,9 @@ before_build:
         $COMPAT = if ($env:ENABLE_COMPATIBILITY_REPORTING -eq $null) {0} else {$env:ENABLE_COMPATIBILITY_REPORTING}
         if ($env:BUILD_TYPE -eq 'msvc') {
           # redirect stderr and change the exit code to prevent powershell from cancelling the build if cmake prints a warning
-          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DCITRA_USE_BUNDLED_QT=1 -DCITRA_USE_BUNDLED_SDL2=1 -DCITRA_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON  -DENABLE_FFMPEG=ON .. 2>&1 && exit 0'
+          cmd /C 'cmake -G "Visual Studio 15 2017 Win64" -DCITRA_USE_BUNDLED_QT=1 -DCITRA_USE_BUNDLED_SDL2=1 -DCITRA_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON  -DENABLE_MF=ON .. 2>&1 && exit 0'
         } else {
-          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DCMAKE_BUILD_TYPE=Release -DENABLE_QT_TRANSLATION=ON -DCITRA_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON  -DENABLE_FFMPEG=ON .. 2>&1"
+          C:\msys64\usr\bin\bash.exe -lc "cmake -G 'MSYS Makefiles' -DCMAKE_BUILD_TYPE=Release -DENABLE_QT_TRANSLATION=ON -DCITRA_ENABLE_COMPATIBILITY_REPORTING=${COMPAT} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON  -DENABLE_MF=ON .. 2>&1"
         }
   - cd ..
 
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 2d0e2ebd0..4adba0452 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -27,7 +27,7 @@ add_library(audio_core STATIC
 
     $<$<BOOL:${SDL2_FOUND}>:sdl2_sink.cpp sdl2_sink.h>
     $<$<BOOL:${ENABLE_CUBEB}>:cubeb_sink.cpp cubeb_sink.h>
-    $<$<BOOL:${FFMPEG_FOUND}>:hle/aac_decoder.cpp hle/aac_decoder.h hle/ffmpeg_dl.cpp hle/ffmpeg_dl.h>
+    $<$<BOOL:${ENABLE_MF}>:hle/wmf_decoder.cpp hle/wmf_decoder.h hle/wmf_decoder_utils.cpp hle/wmf_decoder_utils.h hle/adts_reader.c>
 )
 
 create_target_directory_groups(audio_core)
@@ -35,13 +35,9 @@ create_target_directory_groups(audio_core)
 target_link_libraries(audio_core PUBLIC common core)
 target_link_libraries(audio_core PRIVATE SoundTouch)
 
-if(FFMPEG_FOUND)
-    if(UNIX)
-        target_link_libraries(audio_core PRIVATE FFmpeg::avcodec)
-    else()
-        target_include_directories(audio_core PRIVATE ${FFMPEG_DIR}/include)
-    endif()
-    target_compile_definitions(audio_core PRIVATE HAVE_FFMPEG)
+if(ENABLE_MF)
+    target_link_libraries(audio_core PRIVATE mf.lib mfplat.lib mfuuid.lib)
+    target_compile_definitions(audio_core PUBLIC HAVE_MF)
 endif()
 
 if(SDL2_FOUND)
diff --git a/src/audio_core/hle/adts.h b/src/audio_core/hle/adts.h
new file mode 100644
index 000000000..cba952a22
--- /dev/null
+++ b/src/audio_core/hle/adts.h
@@ -0,0 +1,31 @@
+#pragma once
+#ifndef ADTS_ADT
+#define ADTS_ADT
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+struct ADTSData {
+    bool MPEG2;
+    uint8_t profile;
+    uint8_t channels;
+    uint8_t channel_idx;
+    uint8_t framecount;
+    uint8_t samplerate_idx;
+    uint32_t length;
+    uint32_t samplerate;
+};
+
+typedef struct ADTSData ADTSData;
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+uint32_t parse_adts(char* buffer, struct ADTSData* out);
+// last two bytes of MF AAC decoder user data
+uint16_t mf_get_aac_tag(struct ADTSData input);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+#endif // ADTS_ADT
diff --git a/src/audio_core/hle/adts_reader.c b/src/audio_core/hle/adts_reader.c
new file mode 100644
index 000000000..7be57d4fc
--- /dev/null
+++ b/src/audio_core/hle/adts_reader.c
@@ -0,0 +1,49 @@
+
+#include "adts.h"
+
+const uint32_t freq_table[16] = {96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
+                                 16000, 12000, 11025, 8000,  7350,  0,     0,     0};
+const short channel_table[8] = {0, 1, 2, 3, 4, 5, 6, 8};
+
+uint32_t parse_adts(char* buffer, struct ADTSData* out) {
+    uint32_t tmp = 0;
+
+    // sync word 0xfff
+    tmp = (buffer[0] << 8) | (buffer[1] & 0xf0);
+    if ((tmp & 0xffff) != 0xfff0)
+        return 0;
+    out->MPEG2 = (buffer[1] >> 3) & 0x1;
+    // bit 17 to 18
+    out->profile = (buffer[2] >> 6) + 1;
+    // bit 19 to 22
+    tmp = (buffer[2] >> 2) & 0xf;
+    out->samplerate_idx = tmp;
+    out->samplerate = (tmp > 15) ? 0 : freq_table[tmp];
+    // bit 24 to 26
+    tmp = ((buffer[2] & 0x1) << 2) | ((buffer[3] >> 6) & 0x3);
+    out->channel_idx = tmp;
+    out->channels = (tmp > 7) ? 0 : channel_table[tmp];
+
+    // bit 55 to 56
+    out->framecount = (buffer[6] & 0x3) + 1;
+
+    // bit 31 to 43
+    tmp = (buffer[3] & 0x3) << 11;
+    tmp |= (buffer[4] << 3) & 0x7f8;
+    tmp |= (buffer[5] >> 5) & 0x7;
+
+    out->length = tmp;
+
+    return tmp;
+}
+
+// last two bytes of MF AAC decoder user data
+uint16_t mf_get_aac_tag(struct ADTSData input) {
+    uint16_t tag = 0;
+
+    tag |= input.profile << 11;
+    tag |= input.samplerate_idx << 7;
+    tag |= input.channel_idx << 3;
+
+    return tag;
+}
diff --git a/src/audio_core/hle/hle.cpp b/src/audio_core/hle/hle.cpp
index fca606218..f204d5041 100644
--- a/src/audio_core/hle/hle.cpp
+++ b/src/audio_core/hle/hle.cpp
@@ -3,8 +3,8 @@
 // Refer to the license.txt file included.
 
 #include "audio_core/audio_types.h"
-#ifdef HAVE_FFMPEG
-#include "audio_core/hle/aac_decoder.h"
+#ifdef HAVE_MF
+#include "audio_core/hle/wmf_decoder.h"
 #endif
 #include "audio_core/hle/common.h"
 #include "audio_core/hle/decoder.h"
@@ -82,12 +82,12 @@ DspHle::Impl::Impl(DspHle& parent_, Memory::MemorySystem& memory) : parent(paren
         source.SetMemory(memory);
     }
 
-#ifdef HAVE_FFMPEG
-    decoder = std::make_unique<HLE::AACDecoder>(memory);
+#ifdef HAVE_MF
+    decoder = std::make_unique<HLE::WMFDecoder>(memory);
 #else
     LOG_WARNING(Audio_DSP, "FFmpeg missing, this could lead to missing audio");
     decoder = std::make_unique<HLE::NullDecoder>();
-#endif // HAVE_FFMPEG
+#endif // HAVE_MF
 
     Core::Timing& timing = Core::System::GetInstance().CoreTiming();
     tick_event =
diff --git a/src/audio_core/hle/wmf_decoder.cpp b/src/audio_core/hle/wmf_decoder.cpp
new file mode 100644
index 000000000..f612b8983
--- /dev/null
+++ b/src/audio_core/hle/wmf_decoder.cpp
@@ -0,0 +1,254 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "audio_core/hle/wmf_decoder.h"
+#include "audio_core/hle/wmf_decoder_utils.h"
+
+namespace AudioCore::HLE {
+
+class WMFDecoder::Impl {
+public:
+    explicit Impl(Memory::MemorySystem& memory);
+    ~Impl();
+    std::optional<BinaryResponse> ProcessRequest(const BinaryRequest& request);
+
+private:
+    std::optional<BinaryResponse> Initalize(const BinaryRequest& request);
+
+    void Clear();
+
+    std::optional<BinaryResponse> Decode(const BinaryRequest& request);
+
+    int DecodingLoop(ADTSData adts_header, std::array<std::vector<u8>, 2>& out_streams);
+
+    bool initalized = false;
+    bool selected = false;
+
+    Memory::MemorySystem& memory;
+
+    IMFTransform* transform = NULL;
+    DWORD in_stream_id = 0;
+    DWORD out_stream_id = 0;
+};
+
+WMFDecoder::Impl::Impl(Memory::MemorySystem& memory) : memory(memory) {
+    mf_coinit();
+}
+
+WMFDecoder::Impl::~Impl() = default;
+
+std::optional<BinaryResponse> WMFDecoder::Impl::ProcessRequest(const BinaryRequest& request) {
+    if (request.codec != DecoderCodec::AAC) {
+        LOG_ERROR(Audio_DSP, "Got unknown codec {}", static_cast<u16>(request.codec));
+        return {};
+    }
+
+    switch (request.cmd) {
+    case DecoderCommand::Init: {
+        LOG_INFO(Audio_DSP, "AACDecoder initializing");
+        return Initalize(request);
+    }
+    case DecoderCommand::Decode: {
+        return Decode(request);
+    }
+    case DecoderCommand::Unknown: {
+        BinaryResponse response;
+        std::memcpy(&response, &request, sizeof(response));
+        response.unknown1 = 0x0;
+        return response;
+    }
+    default:
+        LOG_ERROR(Audio_DSP, "Got unknown binary request: {}", static_cast<u16>(request.cmd));
+        return {};
+    }
+}
+
+std::optional<BinaryResponse> WMFDecoder::Impl::Initalize(const BinaryRequest& request) {
+    if (initalized) {
+        Clear();
+    }
+
+    BinaryResponse response;
+    std::memcpy(&response, &request, sizeof(response));
+    response.unknown1 = 0x0;
+
+    if (mf_decoder_init(&transform) != 0) {
+        LOG_CRITICAL(Audio_DSP, "Can't init decoder");
+        return response;
+    }
+
+    HRESULT hr = transform->GetStreamIDs(1, &in_stream_id, 1, &out_stream_id);
+    if (hr == E_NOTIMPL) {
+        // if not implemented, it means this MFT does not assign stream ID for you
+        in_stream_id = 0;
+        out_stream_id = 0;
+    } else if (FAILED(hr)) {
+        ReportError("Decoder failed to initialize the stream ID", hr);
+        SafeRelease(&transform);
+        return response;
+    }
+
+    initalized = true;
+    return response;
+}
+
+void WMFDecoder::Impl::Clear() {
+    if (initalized) {
+        mf_flush(&transform);
+        mf_deinit(&transform);
+    }
+    initalized = false;
+    selected = false;
+}
+
+int WMFDecoder::Impl::DecodingLoop(ADTSData adts_header,
+                                   std::array<std::vector<u8>, 2>& out_streams) {
+    int output_status = 0;
+    char* output_buffer = NULL;
+    DWORD output_len = 0;
+    IMFSample* output = NULL;
+
+    while (true) {
+        output_status = receive_sample(transform, out_stream_id, &output);
+
+        // 0 -> okay; 3 -> okay but more data available (buffer too small)
+        if (output_status == 0 || output_status == 3) {
+            copy_sample_to_buffer(output, (void**)&output_buffer, &output_len);
+
+            // the following was taken from ffmpeg version of the decoder
+            f32 val_f32;
+            for (size_t i = 0; i < output_len;) {
+                for (std::size_t channel = 0; channel < adts_header.channels; channel++) {
+                    std::memcpy(&val_f32, output_buffer + i, sizeof(val_f32));
+                    s16 val = static_cast<s16>(0x7FFF * val_f32);
+                    out_streams[channel].push_back(val & 0xFF);
+                    out_streams[channel].push_back(val >> 8);
+                    i += sizeof(val_f32);
+                }
+            }
+
+            if (output_buffer)
+                free(output_buffer);
+        }
+
+        // in case of "ok" only, just return quickly
+        if (output_status == 0)
+            return 0;
+
+        // for status = 2, reset MF
+        if (output_status == 2) {
+            Clear();
+            return -1;
+        }
+
+        // for status = 3, try again with new buffer
+        if (output_status == 3)
+            continue;
+
+        return output_status; // return on other status
+    }
+
+    return -1;
+}
+
+std::optional<BinaryResponse> WMFDecoder::Impl::Decode(const BinaryRequest& request) {
+    BinaryResponse response;
+    response.codec = request.codec;
+    response.cmd = request.cmd;
+    response.size = request.size;
+    response.num_channels = 2;
+    response.num_samples = 1024;
+
+    if (!initalized) {
+        LOG_DEBUG(Audio_DSP, "Decoder not initalized");
+        // This is a hack to continue games that are not compiled with the aac codec
+        return response;
+    }
+
+    if (request.src_addr < Memory::FCRAM_PADDR ||
+        request.src_addr + request.size > Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
+        LOG_ERROR(Audio_DSP, "Got out of bounds src_addr {:08x}", request.src_addr);
+        return {};
+    }
+    u8* data = memory.GetFCRAMPointer(request.src_addr - Memory::FCRAM_PADDR);
+
+    std::array<std::vector<u8>, 2> out_streams;
+    IMFSample* sample = NULL;
+    ADTSData adts_header;
+    char* aac_tag = (char*)calloc(1, 14);
+    int input_status = 0;
+
+    if (detect_mediatype((char*)data, request.size, &adts_header, &aac_tag) != 0) {
+        LOG_ERROR(Audio_DSP, "Unable to deduce decoding parameters from ADTS stream");
+        return response;
+    }
+
+    if (!selected) {
+        LOG_DEBUG(Audio_DSP, "New ADTS stream: channels = {}, sample rate = {}",
+                  adts_header.channels, adts_header.samplerate);
+        select_input_mediatype(transform, in_stream_id, adts_header, (UINT8*)aac_tag, 14);
+        select_output_mediatype(transform, out_stream_id);
+        send_sample(transform, in_stream_id, NULL);
+        // cache the result from detect_mediatype and call select_*_mediatype only once
+        // This could increase performance very slightly
+        transform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, 0);
+        selected = true;
+    }
+
+    sample = create_sample((void*)data, request.size, 1, 0);
+    sample->SetUINT32(MFSampleExtension_CleanPoint, 1);
+
+    while (true) {
+        input_status = send_sample(transform, in_stream_id, sample);
+
+        if (DecodingLoop(adts_header, out_streams) < 0) {
+            // if the decode issues is caused by MFT not accepting new samples, try again
+            // NOTICE: you are required to check the output even if you already knew/guessed
+            // MFT didn't accept the input sample
+            if (input_status == 1) {
+                // try again
+                continue;
+            }
+
+            return response;
+        }
+
+        break; // jump out of the loop if at least we don't have obvious issues
+    }
+
+    if (out_streams[0].size() != 0) {
+        if (request.dst_addr_ch0 < Memory::FCRAM_PADDR ||
+            request.dst_addr_ch0 + out_streams[0].size() >
+                Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
+            LOG_ERROR(Audio_DSP, "Got out of bounds dst_addr_ch0 {:08x}", request.dst_addr_ch0);
+            return {};
+        }
+        std::memcpy(memory.GetFCRAMPointer(request.dst_addr_ch0 - Memory::FCRAM_PADDR),
+                    out_streams[0].data(), out_streams[0].size());
+    }
+
+    if (out_streams[1].size() != 0) {
+        if (request.dst_addr_ch1 < Memory::FCRAM_PADDR ||
+            request.dst_addr_ch1 + out_streams[1].size() >
+                Memory::FCRAM_PADDR + Memory::FCRAM_SIZE) {
+            LOG_ERROR(Audio_DSP, "Got out of bounds dst_addr_ch1 {:08x}", request.dst_addr_ch1);
+            return {};
+        }
+        std::memcpy(memory.GetFCRAMPointer(request.dst_addr_ch1 - Memory::FCRAM_PADDR),
+                    out_streams[1].data(), out_streams[1].size());
+    }
+
+    response.num_channels = adts_header.channels;
+    return response;
+}
+
+WMFDecoder::WMFDecoder(Memory::MemorySystem& memory) : impl(std::make_unique<Impl>(memory)) {}
+
+WMFDecoder::~WMFDecoder() = default;
+
+std::optional<BinaryResponse> WMFDecoder::ProcessRequest(const BinaryRequest& request) {
+    return impl->ProcessRequest(request);
+}
+
+} // namespace AudioCore::HLE
diff --git a/src/audio_core/hle/wmf_decoder.h b/src/audio_core/hle/wmf_decoder.h
new file mode 100644
index 000000000..34e223740
--- /dev/null
+++ b/src/audio_core/hle/wmf_decoder.h
@@ -0,0 +1,22 @@
+// Copyright 2018 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "audio_core/hle/decoder.h"
+
+namespace AudioCore::HLE {
+
+class WMFDecoder final : public DecoderBase {
+public:
+    explicit WMFDecoder(Memory::MemorySystem& memory);
+    ~WMFDecoder() override;
+    std::optional<BinaryResponse> ProcessRequest(const BinaryRequest& request) override;
+
+private:
+    class Impl;
+    std::unique_ptr<Impl> impl;
+};
+
+} // namespace AudioCore::HLE
diff --git a/src/audio_core/hle/wmf_decoder_utils.cpp b/src/audio_core/hle/wmf_decoder_utils.cpp
new file mode 100644
index 000000000..58b00505b
--- /dev/null
+++ b/src/audio_core/hle/wmf_decoder_utils.cpp
@@ -0,0 +1,366 @@
+#include "common/logging/log.h"
+#include "wmf_decoder_utils.h"
+
+// utility functions
+void ReportError(std::string msg, HRESULT hr) {
+    if (SUCCEEDED(hr)) {
+        return;
+    }
+    LPSTR err;
+    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                      FORMAT_MESSAGE_IGNORE_INSERTS,
+                  NULL, hr,
+                  // hardcode to use en_US because if any user had problems with this
+                  // we can help them w/o translating anything
+                  MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPSTR)&err, 0, NULL);
+    if (err != NULL) {
+        LOG_CRITICAL(Audio_DSP, "{}: {}", msg, err);
+    }
+    LOG_CRITICAL(Audio_DSP, "{}: {:08x}", msg, hr);
+}
+
+int mf_coinit() {
+    HRESULT hr = S_OK;
+
+    // lite startup is faster and all what we need is included
+    hr = MFStartup(MF_VERSION, MFSTARTUP_LITE);
+    if (hr != S_OK) {
+        // Do you know you can't initialize MF in test mode or safe mode?
+        ReportError("Failed to initialize Media Foundation", hr);
+        return -1;
+    }
+
+    LOG_INFO(Audio_DSP, "Media Foundation activated");
+
+    return 0;
+}
+
+int mf_decoder_init(IMFTransform** transform, GUID audio_format) {
+    HRESULT hr = S_OK;
+    MFT_REGISTER_TYPE_INFO reg = {0};
+    GUID category = MFT_CATEGORY_AUDIO_DECODER;
+    IMFActivate** activate;
+    UINT32 num_activate;
+
+    reg.guidMajorType = MFMediaType_Audio;
+    reg.guidSubtype = audio_format;
+
+    hr = MFTEnumEx(category,
+                   MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_LOCALMFT | MFT_ENUM_FLAG_SORTANDFILTER,
+                   &reg, NULL, &activate, &num_activate);
+    if (FAILED(hr) || num_activate < 1) {
+        ReportError("Failed to enumerate decoders", hr);
+        CoTaskMemFree(activate);
+        return -1;
+    }
+    LOG_INFO(Audio_DSP, "Windows(R) Media Foundation found {} suitable decoder(s)", num_activate);
+    for (unsigned int n = 0; n < num_activate; n++) {
+        hr = activate[n]->ActivateObject(IID_IMFTransform, (void**)transform);
+        if (FAILED(hr))
+            *transform = NULL;
+        activate[n]->Release();
+    }
+    if (*transform == NULL) {
+        ReportError("Failed to initialize MFT", hr);
+        CoTaskMemFree(activate);
+        return -1;
+    }
+    CoTaskMemFree(activate);
+    return 0;
+}
+
+void mf_deinit(IMFTransform** transform) {
+    MFShutdownObject(*transform);
+    SafeRelease(transform);
+    CoUninitialize();
+}
+
+IMFSample* create_sample(void* data, DWORD len, DWORD alignment, LONGLONG duration) {
+    HRESULT hr = S_OK;
+    IMFMediaBuffer* buf = NULL;
+    IMFSample* sample = NULL;
+
+    hr = MFCreateSample(&sample);
+    if (FAILED(hr)) {
+        ReportError("Unable to allocate a sample", hr);
+        return NULL;
+    }
+    // Yes, the argument for alignment is the actual alignment - 1
+    hr = MFCreateAlignedMemoryBuffer(len, alignment - 1, &buf);
+    if (FAILED(hr)) {
+        ReportError("Unable to allocate a memory buffer for sample", hr);
+        return NULL;
+    }
+    if (data) {
+        BYTE* buffer;
+        // lock the MediaBuffer
+        // this is actually not a thread-safe lock
+        hr = buf->Lock(&buffer, NULL, NULL);
+        if (FAILED(hr)) {
+            SafeRelease(&sample);
+            SafeRelease(&buf);
+            return NULL;
+        }
+
+        memcpy(buffer, data, len);
+
+        buf->SetCurrentLength(len);
+        buf->Unlock();
+    }
+
+    sample->AddBuffer(buf);
+    hr = sample->SetSampleDuration(duration);
+    SafeRelease(&buf);
+    return sample;
+}
+
+int select_input_mediatype(IMFTransform* transform, int in_stream_id, ADTSData adts,
+                           UINT8* user_data, UINT32 user_data_len, GUID audio_format) {
+    HRESULT hr = S_OK;
+    IMFMediaType* t;
+
+    // actually you can get rid of the whole block of searching and filtering mess
+    // if you know the exact parameters of your media stream
+    hr = MFCreateMediaType(&t);
+    if (FAILED(hr)) {
+        ReportError("Unable to create an empty MediaType", hr);
+        return -1;
+    }
+
+    // basic definition
+    t->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
+    t->SetGUID(MF_MT_SUBTYPE, audio_format);
+
+    // see https://docs.microsoft.com/en-us/windows/desktop/medfound/aac-decoder#example-media-types
+    // and https://docs.microsoft.com/zh-cn/windows/desktop/api/mmreg/ns-mmreg-heaacwaveinfo_tag
+    // for the meaning of the byte array below
+
+    // for integrate into a larger project, it is recommended to wrap the parameters into a struct
+    // and pass that struct into the function
+    // const UINT8 aac_data[] = { 0x01, 0x00, 0xfe, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0x11, 0x90
+    // }; 0: raw aac 1: adts 2: adif 3: latm/laos
+    t->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, 1);
+    t->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, adts.channels);
+    t->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, adts.samplerate);
+    // 0xfe = 254 = "unspecified"
+    t->SetUINT32(MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 254);
+    t->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 1);
+    t->SetBlob(MF_MT_USER_DATA, user_data, user_data_len);
+    hr = transform->SetInputType(in_stream_id, t, 0);
+    if (FAILED(hr)) {
+        ReportError("failed to select input types for MFT", hr);
+        return -1;
+    }
+
+    return 0;
+}
+
+int select_output_mediatype(IMFTransform* transform, int out_stream_id, GUID audio_format) {
+    HRESULT hr = S_OK;
+    UINT32 tmp;
+    IMFMediaType* t;
+
+    // If you know what you need and what you are doing, you can specify the condition instead of
+    // searching but it's better to use search since MFT may or may not support your output
+    // parameters
+    for (DWORD i = 0;; i++) {
+        hr = transform->GetOutputAvailableType(out_stream_id, i, &t);
+        if (hr == MF_E_NO_MORE_TYPES || hr == E_NOTIMPL) {
+            return 0;
+        }
+        if (FAILED(hr)) {
+            ReportError("failed to get output types for MFT", hr);
+            return -1;
+        }
+
+        hr = t->GetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, &tmp);
+
+        if (FAILED(hr))
+            continue;
+        // select PCM-16 format
+        if (tmp == 32) {
+            hr = t->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, 1);
+            if (FAILED(hr)) {
+                ReportError("failed to set MF_MT_AUDIO_BLOCK_ALIGNMENT for MFT on output stream",
+                            hr);
+                return -1;
+            }
+            hr = transform->SetOutputType(out_stream_id, t, 0);
+            if (FAILED(hr)) {
+                ReportError("failed to select output types for MFT", hr);
+                return -1;
+            }
+            return 0;
+        } else {
+            continue;
+        }
+
+        return -1;
+    }
+
+    ReportError("MFT: Unable to find preferred output format", E_NOTIMPL);
+    return -1;
+}
+
+int detect_mediatype(char* buffer, size_t len, ADTSData* output, char** aac_tag) {
+    if (len < 7) {
+        return -1;
+    }
+
+    ADTSData tmp;
+    UINT8 aac_tmp[] = {0x01, 0x00, 0xfe, 00, 00, 00, 00, 00, 00, 00, 00, 00, 0x00, 0x00};
+    uint16_t tag = 0;
+
+    uint32_t result = parse_adts(buffer, &tmp);
+    if (result == 0) {
+        return -1;
+    }
+
+    tag = mf_get_aac_tag(tmp);
+    aac_tmp[12] |= (tag & 0xff00) >> 8;
+    aac_tmp[13] |= (tag & 0x00ff);
+    memcpy(*aac_tag, aac_tmp, 14);
+    memcpy(output, &tmp, sizeof(ADTSData));
+    return 0;
+}
+
+int mf_flush(IMFTransform** transform) {
+    HRESULT hr = (*transform)->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, 0);
+    if (FAILED(hr)) {
+        ReportError("MFT: Flush command failed", hr);
+    }
+    hr = (*transform)->ProcessMessage(MFT_MESSAGE_NOTIFY_END_OF_STREAM, 0);
+    if (FAILED(hr)) {
+        ReportError("Failed to end streaming for MFT", hr);
+    }
+
+    return 0;
+}
+
+int send_sample(IMFTransform* transform, DWORD in_stream_id, IMFSample* in_sample) {
+    HRESULT hr = S_OK;
+
+    if (in_sample) {
+        hr = transform->ProcessInput(in_stream_id, in_sample, 0);
+        if (hr == MF_E_NOTACCEPTING) {
+            return 1; // try again
+        } else if (FAILED(hr)) {
+            ReportError("MFT: Failed to process input", hr);
+            return -1;
+        } // FAILED(hr)
+    } else {
+        hr = transform->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, 0);
+        // ffmpeg: Some MFTs (AC3) will send a frame after each drain command (???), so
+        // ffmpeg: this is required to make draining actually terminate.
+        if (FAILED(hr)) {
+            ReportError("MFT: Failed to drain when processing input", hr);
+        }
+    }
+
+    return 0;
+}
+
+// return: 0: okay; 1: needs more sample; 2: needs reconfiguring; 3: more data available
+int receive_sample(IMFTransform* transform, DWORD out_stream_id, IMFSample** out_sample) {
+    HRESULT hr;
+    MFT_OUTPUT_DATA_BUFFER out_buffers;
+    IMFSample* sample = NULL;
+    MFT_OUTPUT_STREAM_INFO out_info;
+    DWORD status = 0;
+    bool mft_create_sample = false;
+
+    if (!out_sample) {
+        ReportError("NULL pointer passed to receive_sample()", MF_E_SAMPLE_NOT_WRITABLE);
+        return -1;
+    }
+
+    hr = transform->GetOutputStreamInfo(out_stream_id, &out_info);
+
+    if (FAILED(hr)) {
+        ReportError("MFT: Failed to get stream info", hr);
+        return -1;
+    }
+    mft_create_sample = (out_info.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES) ||
+                        (out_info.dwFlags & MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES);
+
+    while (true) {
+        sample = NULL;
+        *out_sample = NULL;
+        status = 0;
+
+        if (!mft_create_sample) {
+            sample = create_sample(NULL, out_info.cbSize, out_info.cbAlignment);
+            if (!sample) {
+                ReportError("MFT: Unable to allocate memory for samples", hr);
+                return -1;
+            }
+        }
+
+        out_buffers.dwStreamID = out_stream_id;
+        out_buffers.pSample = sample;
+
+        hr = transform->ProcessOutput(0, 1, &out_buffers, &status);
+
+        if (!FAILED(hr)) {
+            *out_sample = out_buffers.pSample;
+            break;
+        }
+
+        if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
+            // TODO: better handling try again and EOF cases using drain value
+            return 1;
+        }
+
+        if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
+            ReportError("MFT: stream format changed, re-configuration required", hr);
+            return 2;
+        }
+
+        break;
+    }
+
+    if (out_buffers.dwStatus & MFT_OUTPUT_DATA_BUFFER_INCOMPLETE) {
+        return 3;
+    }
+
+    // TODO: better handling try again and EOF cases using drain value
+    if (*out_sample == NULL) {
+        ReportError("MFT: decoding failure", hr);
+        return -1;
+    }
+
+    return 0;
+}
+
+int copy_sample_to_buffer(IMFSample* sample, void** output, DWORD* len) {
+    IMFMediaBuffer* buffer;
+    HRESULT hr = S_OK;
+    BYTE* data;
+
+    hr = sample->GetTotalLength(len);
+    if (FAILED(hr)) {
+        ReportError("Failed to get the length of sample buffer", hr);
+        return -1;
+    }
+
+    sample->ConvertToContiguousBuffer(&buffer);
+    if (FAILED(hr)) {
+        ReportError("Failed to get sample buffer", hr);
+        return -1;
+    }
+
+    hr = buffer->Lock(&data, NULL, NULL);
+    if (FAILED(hr)) {
+        ReportError("Failed to lock the buffer", hr);
+        SafeRelease(&buffer);
+        return -1;
+    }
+
+    *output = malloc(*len);
+    memcpy(*output, data, *len);
+
+    // if buffer unlock fails, then... whatever, we have already got data
+    buffer->Unlock();
+    SafeRelease(&buffer);
+    return 0;
+}
diff --git a/src/audio_core/hle/wmf_decoder_utils.h b/src/audio_core/hle/wmf_decoder_utils.h
new file mode 100644
index 000000000..ac7e522d7
--- /dev/null
+++ b/src/audio_core/hle/wmf_decoder_utils.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#ifndef MF_DECODER
+#define MF_DECODER
+
+#define WINVER _WIN32_WINNT_WIN7
+
+#include <assert.h>
+#include <comdef.h>
+#include <mfapi.h>
+#include <mferror.h>
+#include <mfidl.h>
+#include <mftransform.h>
+#include <stdio.h>
+
+#include <iostream>
+#include <string>
+
+#include "adts.h"
+
+// utility functions
+template <class T>
+void SafeRelease(T** ppT) {
+    if (*ppT) {
+        (*ppT)->Release();
+        *ppT = NULL;
+    }
+}
+
+void ReportError(std::string msg, HRESULT hr);
+
+// exported functions
+int mf_coinit();
+int mf_decoder_init(IMFTransform** transform, GUID audio_format = MFAudioFormat_AAC);
+void mf_deinit(IMFTransform** transform);
+IMFSample* create_sample(void* data, DWORD len, DWORD alignment = 1, LONGLONG duration = 0);
+int select_input_mediatype(IMFTransform* transform, int in_stream_id, ADTSData adts,
+                           UINT8* user_data, UINT32 user_data_len,
+                           GUID audio_format = MFAudioFormat_AAC);
+int detect_mediatype(char* buffer, size_t len, ADTSData* output, char** aac_tag);
+int select_output_mediatype(IMFTransform* transform, int out_stream_id,
+                            GUID audio_format = MFAudioFormat_PCM);
+int mf_flush(IMFTransform** transform);
+int send_sample(IMFTransform* transform, DWORD in_stream_id, IMFSample* in_sample);
+int receive_sample(IMFTransform* transform, DWORD out_stream_id, IMFSample** out_sample);
+int copy_sample_to_buffer(IMFSample* sample, void** output, DWORD* len);
+
+#endif // MF_DECODER
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index f048714a7..0c9cc7e55 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -8,6 +8,7 @@ add_executable(tests
     core/hle/kernel/hle_ipc.cpp
     core/memory/memory.cpp
     core/memory/vm_manager.cpp
+    audio_core/decoder_tests.cpp
     tests.cpp
 )
 
@@ -20,7 +21,7 @@ endif()
 
 create_target_directory_groups(tests)
 
-target_link_libraries(tests PRIVATE common core video_core)
+target_link_libraries(tests PRIVATE common core video_core audio_core)
 target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} catch-single-include nihstro-headers Threads::Threads)
 
 add_test(NAME tests COMMAND tests)
diff --git a/src/tests/audio_core/audio_fixures.h b/src/tests/audio_core/audio_fixures.h
new file mode 100644
index 000000000..3035840a3
--- /dev/null
+++ b/src/tests/audio_core/audio_fixures.h
@@ -0,0 +1,5 @@
+const int fixure_buffer_size = 41;
+const unsigned char fixure_buffer[41] = {
+    0xff, 0xf1, 0x4c, 0x80, 0x05, 0x3f, 0xfc, 0x21, 0x1a, 0x4e, 0xb0, 0x00, 0x00, 0x00,
+    0x05, 0xfc, 0x4e, 0x1f, 0x08, 0x88, 0x00, 0x00, 0x00, 0xc4, 0x1a, 0x03, 0xfc, 0x9c,
+    0x3e, 0x1d, 0x08, 0x84, 0x03, 0xd8, 0x3f, 0xe4, 0xe1, 0x20, 0x00, 0x0b, 0x38};
diff --git a/src/tests/audio_core/decoder_tests.cpp b/src/tests/audio_core/decoder_tests.cpp
new file mode 100644
index 000000000..3a197f0dc
--- /dev/null
+++ b/src/tests/audio_core/decoder_tests.cpp
@@ -0,0 +1,50 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#ifdef HAVE_MF
+
+#include <catch2/catch.hpp>
+#include "core/core.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/memory.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/shared_page.h"
+#include "core/memory.h"
+
+#include "audio_core/hle/decoder.h"
+#include "audio_core/hle/wmf_decoder.h"
+#include "audio_fixures.h"
+
+TEST_CASE("DSP HLE Audio Decoder", "[audio_core]") {
+    // HACK: see comments of member timing
+    Core::System::GetInstance().timing = std::make_unique<Core::Timing>();
+    Core::System::GetInstance().memory = std::make_unique<Memory::MemorySystem>();
+    Kernel::KernelSystem kernel(*Core::System::GetInstance().memory, 0);
+    SECTION("decoder should produce correct samples") {
+        auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
+        auto decoder =
+            std::make_unique<AudioCore::HLE::WMFDecoder>(*Core::System::GetInstance().memory);
+        AudioCore::HLE::BinaryRequest request;
+
+        request.codec = AudioCore::HLE::DecoderCodec::AAC;
+        request.cmd = AudioCore::HLE::DecoderCommand::Init;
+        // initialize decoder
+        std::optional<AudioCore::HLE::BinaryResponse> response = decoder->ProcessRequest(request);
+
+        request.cmd = AudioCore::HLE::DecoderCommand::Decode;
+        u8* fcram = Core::System::GetInstance().memory->GetFCRAMPointer(0);
+
+        memcpy(fcram, fixure_buffer, fixure_buffer_size);
+        request.src_addr = Memory::FCRAM_PADDR;
+        request.dst_addr_ch0 = Memory::FCRAM_PADDR + 1024;
+        request.dst_addr_ch1 = Memory::FCRAM_PADDR + 1048576; // 1 MB
+        request.size = fixure_buffer_size;
+
+        response = decoder->ProcessRequest(request);
+        response = decoder->ProcessRequest(request);
+        // remove this line
+        request.src_addr = Memory::FCRAM_PADDR;
+    }
+}
+
+#endif