123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- // Text2Speech.h : CText2Speech 的实现;
- #include "StdAfx.h"
- #include "Text2Speech.h"
- CText2Speech::CText2Speech(void)
- {
- m_cpVoice = NULL;
- m_cpOutAudio = NULL;
- //m_sError=_T("");
- }
- CText2Speech::~CText2Speech(void)
- {
- Destroy();
- }
- // 初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。
- // 然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。
- // 该窗口句柄是作为函数的参数传入的
- BOOL CText2Speech::Initialize(IN HWND hWnd)
- {
- if (FAILED(CoInitialize(NULL))) {
- //m_sError=_T("Error intialization COM");
- return FALSE;
- }
- HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice);
- if (FAILED(hr)) {
- //m_sError=_T("Error creating voice");
- return FALSE;
- }
- // Sets the type of events which will invoke a notification and become queued
- hr = m_cpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME));
- if (FAILED(hr))
- {
- //m_sError=_T("Error creating interest...seriously");
- return FALSE;
- }
- if (::IsWindow(hWnd))
- {
- hr = m_cpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0);
- if (FAILED(hr))
- {
- //m_sError=_T("Error setting notification window");
- return FALSE;
- }
- }
- return TRUE;
- }
- // 释放函数则释放语音引擎接口和COM库,其代码如下:
- void CText2Speech::Destroy()
- {
- if (m_cpVoice)
- m_cpVoice.Release();
- CoUninitialize();
- }
- // 要朗读的文字必须位于宽字符串中;
- HRESULT CText2Speech::Speak(IN const TCHAR *pText, IN DWORD dwFlags /* = SPF_DEFAULT */)
- {
- #ifndef UNICODE
- size_t len = strlen(pText) + 1;
- wchar_t *WStr;
- WStr = (wchar_t*)malloc(len*sizeof(wchar_t));
- MultiByteToWideChar(CP_ACP, 0, pText, -1, WStr, len*sizeof(wchar_t));
- HRESULT hr = m_cpVoice->Speak(WStr, dwFlags, NULL);
- free(WStr);
- return hr;
- #else
- return m_cpVoice->Speak(pText, dwFlags, NULL);
- #endif
- }
- /*
- 语言函数的实现比较复杂。
- 由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关,
- 而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。
- 因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目,
- 设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。
- */
- ULONG CText2Speech::GetVoiceCount()
- {
- HRESULT hr = S_OK;
- CComPtr<ISpObjectToken> cpVoiceToken;
- CComPtr<IEnumSpObjectTokens> cpEnum;
- ULONG ulCount = -1;
- //Enumerate the available voices
- hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to enumerate voices");
- return -1;
- }
- //Get the number of voices
- hr = cpEnum->GetCount(&ulCount);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get voice count");
- return -1;
- }
- return ulCount;
- }
- HRESULT CText2Speech::GetVoice(OUT WCHAR **ppszDescription, IN ULONG lIndex)
- {
- HRESULT hr = S_OK;
- CComPtr<ISpObjectToken> cpVoiceToken;
- CComPtr<IEnumSpObjectTokens> cpEnum;
- ULONG ulCount = 0;
- // lIndex==-1,获取当前默认的语声;
- if (lIndex == -1)
- {
- // current voice
- hr = m_cpVoice->GetVoice(&cpVoiceToken);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get current voice");
- return hr;
- }
- SpGetDescription(cpVoiceToken, ppszDescription);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get current voice description");
- return hr;
- }
- }
- else
- {
- // else other voices, we should enumerate the voice list first
- //Enumerate the available voices
- hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to enumerate voices");
- return hr;
- }
- //Get the number of voices
- hr = cpEnum->GetCount(&ulCount);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to voice count");
- return hr;
- }
- // range control
- ASSERT(lIndex >= 0);
- ASSERT(lIndex < ulCount);
- // Obtain specified voice id
- ULONG l = 0;
- while (SUCCEEDED(hr))
- {
- cpVoiceToken.Release();
- hr = cpEnum->Next(1, &cpVoiceToken, NULL);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get voice token");
- return hr;
- }
- if (l == lIndex)
- {
- hr = SpGetDescription(cpVoiceToken, ppszDescription);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get voice description");
- return hr;
- }
- break;
- }
- l++;
- }
- }
- return hr;
- }
- HRESULT CText2Speech::SetVoice(IN WCHAR **ppszDescription)
- {
- HRESULT hr = S_OK;
- CComPtr<ISpObjectToken> cpVoiceToken;
- CComPtr<IEnumSpObjectTokens> cpEnum;
- ULONG ulCount = 0;
- //Enumerate the available voices
- hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to enumerate voices");
- return hr;
- }
- //Get the number of voices
- hr = cpEnum->GetCount(&ulCount);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to voice count");
- return hr;
- }
- // Obtain specified voice id
- while (SUCCEEDED(hr) && ulCount--)
- {
- cpVoiceToken.Release();
- hr = cpEnum->Next(1, &cpVoiceToken, NULL);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to voice token");
- return hr;
- }
- WCHAR *pszDescription1;
- hr = SpGetDescription(cpVoiceToken, &pszDescription1);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to get voice description");
- return hr;
- }
- if (!wcsicmp(pszDescription1, *ppszDescription))
- {
- hr = m_cpVoice->SetVoice(cpVoiceToken);
- if (FAILED(hr))
- {
- //m_sError = _T("Error to set voice");
- return hr;
- }
- break;
- }
- }
- return hr;
- }
- HRESULT CText2Speech::SetVoice(IN const ULONG &uIndex)
- {
- HRESULT hr;
- CComPtr<ISpObjectToken> cpVoiceToken; // 用下面的函数获取当前正在使用的语音;
- CComPtr<IEnumSpObjectTokens> cpEnum;
- ULONG ulCount = 0;
- hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
- if (FAILED(hr)){
- return hr;
- }
- // Get the number of voices
- // 获取语音语言数量;
- hr = cpEnum->GetCount(&ulCount);
- if (FAILED(hr)) {
- return hr;
- }
- // 根据参数 nLang ,设置为所需要的语音语言;
- hr = cpEnum->Item(uIndex, &cpVoiceToken);
- if (SUCCEEDED(hr)){
- m_cpVoice->SetVoice(cpVoiceToken);
- }
- else
- {
- return hr;
- }
- return hr;
- }
- HRESULT CText2Speech::GetOutAudio(OUT SPSTREAMFORMAT &eFmt)
- {
- CComPtr<ISpStreamFormat> cpStream;
- HRESULT hr = m_cpVoice->GetOutputStream(&cpStream);
- //if (hr == S_OK)
- if (SUCCEEDED(hr))
- {
- CSpStreamFormat Fmt;
- HRESULT hStreamFmt = Fmt.AssignFormat(cpStream);
- if (SUCCEEDED(hStreamFmt))
- {
- eFmt = Fmt.ComputeFormatEnum();
- }
- else
- {
- return hStreamFmt;
- }
- }
- return hr;
- }
- HRESULT CText2Speech::SetOutAudio(IN const SPSTREAMFORMAT &eFmt)
- {
- HRESULT hr = S_FALSE;
- if (!m_cpOutAudio)
- {
- SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &m_cpOutAudio); //创建接口
- }
- else
- {
- return hr;
- }
- //SPSF_CCITT_ALaw_8kHzMono 8Bit Stereo
- //SPSTREAMFORMAT eFmt = 21; //SPSF_22kHz 8Bit Stereo
- CSpStreamFormat Fmt;
- hr = Fmt.AssignFormat(eFmt);
- if (SUCCEEDED(hr))
- {
- hr = m_cpOutAudio->SetFormat(Fmt.FormatId(), Fmt.WaveFormatExPtr());
- }
- else
- {
- return hr;//hr = E_FAIL;
- }
- if (SUCCEEDED(hr)){
- m_cpVoice->SetOutput(m_cpOutAudio, FALSE);
- }
- return hr;
- }
- // 播放WAV,通过ISpStream接口实现;
- HRESULT CText2Speech::PlayWav(IN const TCHAR *szWavFileName)
- {
- CComPtr<ISpStream> cpWavStream;
- #ifndef UNICODE
- WCHAR szwWavFileName[MAX_PATH] = L"";
- USES_CONVERSION;
- wcscpy(szwWavFileName, T2W(szWavFileName));//从ANSI将WAV文件的名字转换成宽字符串
- //使用sphelper.h 提供的这个函数打开 wav 文件,并得到一个 IStream 指针
- HRESULT hr = SPBindToFile(szwWavFileName, SPFM_OPEN_READONLY, &cpWavStream);
- #else
- HRESULT hr = SPBindToFile(szWavFileName, SPFM_OPEN_READONLY, &cpWavStream);
- #endif
- if (SUCCEEDED(hr))
- {
- m_cpVoice->SpeakStream(cpWavStream, SPF_ASYNC, NULL);//播放WAV文件
- }
- return hr;
- }
- HRESULT CText2Speech::Save2Wav(IN const TCHAR *pText, IN const TCHAR *szWavFileName)
- {
- #ifndef UNICODE
- //TCHAR szFileName[256];//假设这里面保存着目标文件的路径
- USES_CONVERSION;
- WCHAR szWFileName[MAX_PATH];
- wcscpy(szWFileName, T2W(szWavFileName)); // ANSI转换成宽字符串;
- #endif
- //创建一个输出流,绑定到wav文件;
- CSpStreamFormat OriginalFmt;
- CComPtr<ISpStream> cpWavStream;
- CComPtr<ISpStreamFormat> cpOldStream;
- HRESULT hr = m_cpVoice->GetOutputStream(&cpOldStream);
- //if (hr == S_OK)
- // hr = OriginalFmt.AssignFormat(cpOldStream);
- //else hr = E_FAIL;
- if (FAILED(hr)) {
- return hr;
- }
- hr = OriginalFmt.AssignFormat(cpOldStream);
- // 使用sphelper.h中提供的函数创建 wav 文件;
- if (SUCCEEDED(hr))
- {
- // 设置WAV的编码格式???有何意义在这里???;
- // -- 因为语音卡要获取WAV文件时,需要指定的编码格式才能将文件读出来,详看http://hi.baidu.com/sunsee/item/a537343cdb83ad5b80f1a70fACM音频压缩管理器VC++源代码;
- //OriginalFmt.m_pCoMemWaveFormatEx->wBitsPerSample = 16;
- //OriginalFmt.m_pCoMemWaveFormatEx->nSamplesPerSec = 8000;
- OriginalFmt.m_pCoMemWaveFormatEx->wFormatTag = WAVE_FORMAT_PCM;
- #ifndef UNICODE
- hr = SPBindToFile(szWFileName, SPFM_CREATE_ALWAYS, &cpWavStream,
- &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr());
- #else
- hr = SPBindToFile(szWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream,
- &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr());
- #endif
- }
- else
- {
- return hr;
- }
- if (SUCCEEDED(hr))
- {
- //设置声音的输出到 wav 文件,而不是 speakers;
- hr = m_cpVoice->SetOutput(cpWavStream, TRUE);
- }
- else
- {
- return hr;
- }
- // 开始朗读;
- hr = Speak(pText, SPF_ASYNC | SPF_IS_NOT_XML);
- // 等待朗读结束;
- m_cpVoice->WaitUntilDone(INFINITE);
- cpWavStream.Release();
- // 把输出重新定位到原来的流;
- m_cpVoice->SetOutput(cpOldStream, FALSE);
- return hr;
- }
|