// Text2Speech.h : CText2Speech 的实现; #include "StdAfx.h" #include "Text2Speech.h" CText2Speech::CText2Speech(void) { m_cpVoice = NULL; m_cpOutAudio = NULL; //m_sError=_T(""); } CText2Speech::~CText2Speech(void) { Destroy(); } // 初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。 // 然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。 // 该窗口句柄是作为函数的参数传入的 BOOL CText2Speech::Initialize(IN HWND hWnd) { if (FAILED(CoInitialize(NULL))) { //m_sError=_T("Error intialization COM"); return FALSE; } HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice); if (FAILED(hr)) { //m_sError=_T("Error creating voice"); return FALSE; } // Sets the type of events which will invoke a notification and become queued hr = m_cpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME)); if (FAILED(hr)) { //m_sError=_T("Error creating interest...seriously"); return FALSE; } if (::IsWindow(hWnd)) { hr = m_cpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0); if (FAILED(hr)) { //m_sError=_T("Error setting notification window"); return FALSE; } } return TRUE; } // 释放函数则释放语音引擎接口和COM库,其代码如下: void CText2Speech::Destroy() { if (m_cpVoice) m_cpVoice.Release(); CoUninitialize(); } // 要朗读的文字必须位于宽字符串中; HRESULT CText2Speech::Speak(IN const TCHAR *pText, IN DWORD dwFlags /* = SPF_DEFAULT */) { #ifndef UNICODE size_t len = strlen(pText) + 1; wchar_t *WStr; WStr = (wchar_t*)malloc(len*sizeof(wchar_t)); MultiByteToWideChar(CP_ACP, 0, pText, -1, WStr, len*sizeof(wchar_t)); HRESULT hr = m_cpVoice->Speak(WStr, dwFlags, NULL); free(WStr); return hr; #else return m_cpVoice->Speak(pText, dwFlags, NULL); #endif } /* 语言函数的实现比较复杂。 由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关, 而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。 因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目, 设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。 */ ULONG CText2Speech::GetVoiceCount() { HRESULT hr = S_OK; CComPtr cpVoiceToken; CComPtr cpEnum; ULONG ulCount = -1; //Enumerate the available voices hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (FAILED(hr)) { //m_sError = _T("Error to enumerate voices"); return -1; } //Get the number of voices hr = cpEnum->GetCount(&ulCount); if (FAILED(hr)) { //m_sError = _T("Error to get voice count"); return -1; } return ulCount; } HRESULT CText2Speech::GetVoice(OUT WCHAR **ppszDescription, IN ULONG lIndex) { HRESULT hr = S_OK; CComPtr cpVoiceToken; CComPtr cpEnum; ULONG ulCount = 0; // lIndex==-1,获取当前默认的语声; if (lIndex == -1) { // current voice hr = m_cpVoice->GetVoice(&cpVoiceToken); if (FAILED(hr)) { //m_sError = _T("Error to get current voice"); return hr; } SpGetDescription(cpVoiceToken, ppszDescription); if (FAILED(hr)) { //m_sError = _T("Error to get current voice description"); return hr; } } else { // else other voices, we should enumerate the voice list first //Enumerate the available voices hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (FAILED(hr)) { //m_sError = _T("Error to enumerate voices"); return hr; } //Get the number of voices hr = cpEnum->GetCount(&ulCount); if (FAILED(hr)) { //m_sError = _T("Error to voice count"); return hr; } // range control ASSERT(lIndex >= 0); ASSERT(lIndex < ulCount); // Obtain specified voice id ULONG l = 0; while (SUCCEEDED(hr)) { cpVoiceToken.Release(); hr = cpEnum->Next(1, &cpVoiceToken, NULL); if (FAILED(hr)) { //m_sError = _T("Error to get voice token"); return hr; } if (l == lIndex) { hr = SpGetDescription(cpVoiceToken, ppszDescription); if (FAILED(hr)) { //m_sError = _T("Error to get voice description"); return hr; } break; } l++; } } return hr; } HRESULT CText2Speech::SetVoice(IN WCHAR **ppszDescription) { HRESULT hr = S_OK; CComPtr cpVoiceToken; CComPtr cpEnum; ULONG ulCount = 0; //Enumerate the available voices hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (FAILED(hr)) { //m_sError = _T("Error to enumerate voices"); return hr; } //Get the number of voices hr = cpEnum->GetCount(&ulCount); if (FAILED(hr)) { //m_sError = _T("Error to voice count"); return hr; } // Obtain specified voice id while (SUCCEEDED(hr) && ulCount--) { cpVoiceToken.Release(); hr = cpEnum->Next(1, &cpVoiceToken, NULL); if (FAILED(hr)) { //m_sError = _T("Error to voice token"); return hr; } WCHAR *pszDescription1; hr = SpGetDescription(cpVoiceToken, &pszDescription1); if (FAILED(hr)) { //m_sError = _T("Error to get voice description"); return hr; } if (!wcsicmp(pszDescription1, *ppszDescription)) { hr = m_cpVoice->SetVoice(cpVoiceToken); if (FAILED(hr)) { //m_sError = _T("Error to set voice"); return hr; } break; } } return hr; } HRESULT CText2Speech::SetVoice(IN const ULONG &uIndex) { HRESULT hr; CComPtr cpVoiceToken; // 用下面的函数获取当前正在使用的语音; CComPtr cpEnum; ULONG ulCount = 0; hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); if (FAILED(hr)){ return hr; } // Get the number of voices // 获取语音语言数量; hr = cpEnum->GetCount(&ulCount); if (FAILED(hr)) { return hr; } // 根据参数 nLang ,设置为所需要的语音语言; hr = cpEnum->Item(uIndex, &cpVoiceToken); if (SUCCEEDED(hr)){ m_cpVoice->SetVoice(cpVoiceToken); } else { return hr; } return hr; } HRESULT CText2Speech::GetOutAudio(OUT SPSTREAMFORMAT &eFmt) { CComPtr cpStream; HRESULT hr = m_cpVoice->GetOutputStream(&cpStream); //if (hr == S_OK) if (SUCCEEDED(hr)) { CSpStreamFormat Fmt; HRESULT hStreamFmt = Fmt.AssignFormat(cpStream); if (SUCCEEDED(hStreamFmt)) { eFmt = Fmt.ComputeFormatEnum(); } else { return hStreamFmt; } } return hr; } HRESULT CText2Speech::SetOutAudio(IN const SPSTREAMFORMAT &eFmt) { HRESULT hr = S_FALSE; if (!m_cpOutAudio) { SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &m_cpOutAudio); //创建接口 } else { return hr; } //SPSF_CCITT_ALaw_8kHzMono 8Bit Stereo //SPSTREAMFORMAT eFmt = 21; //SPSF_22kHz 8Bit Stereo CSpStreamFormat Fmt; hr = Fmt.AssignFormat(eFmt); if (SUCCEEDED(hr)) { hr = m_cpOutAudio->SetFormat(Fmt.FormatId(), Fmt.WaveFormatExPtr()); } else { return hr;//hr = E_FAIL; } if (SUCCEEDED(hr)){ m_cpVoice->SetOutput(m_cpOutAudio, FALSE); } return hr; } // 播放WAV,通过ISpStream接口实现; HRESULT CText2Speech::PlayWav(IN const TCHAR *szWavFileName) { CComPtr cpWavStream; #ifndef UNICODE WCHAR szwWavFileName[MAX_PATH] = L""; USES_CONVERSION; wcscpy(szwWavFileName, T2W(szWavFileName));//从ANSI将WAV文件的名字转换成宽字符串 //使用sphelper.h 提供的这个函数打开 wav 文件,并得到一个 IStream 指针 HRESULT hr = SPBindToFile(szwWavFileName, SPFM_OPEN_READONLY, &cpWavStream); #else HRESULT hr = SPBindToFile(szWavFileName, SPFM_OPEN_READONLY, &cpWavStream); #endif if (SUCCEEDED(hr)) { m_cpVoice->SpeakStream(cpWavStream, SPF_ASYNC, NULL);//播放WAV文件 } return hr; } HRESULT CText2Speech::Save2Wav(IN const TCHAR *pText, IN const TCHAR *szWavFileName) { #ifndef UNICODE //TCHAR szFileName[256];//假设这里面保存着目标文件的路径 USES_CONVERSION; WCHAR szWFileName[MAX_PATH]; wcscpy(szWFileName, T2W(szWavFileName)); // ANSI转换成宽字符串; #endif //创建一个输出流,绑定到wav文件; CSpStreamFormat OriginalFmt; CComPtr cpWavStream; CComPtr cpOldStream; HRESULT hr = m_cpVoice->GetOutputStream(&cpOldStream); //if (hr == S_OK) // hr = OriginalFmt.AssignFormat(cpOldStream); //else hr = E_FAIL; if (FAILED(hr)) { return hr; } hr = OriginalFmt.AssignFormat(cpOldStream); // 使用sphelper.h中提供的函数创建 wav 文件; if (SUCCEEDED(hr)) { // 设置WAV的编码格式???有何意义在这里???; // -- 因为语音卡要获取WAV文件时,需要指定的编码格式才能将文件读出来,详看http://hi.baidu.com/sunsee/item/a537343cdb83ad5b80f1a70fACM音频压缩管理器VC++源代码; //OriginalFmt.m_pCoMemWaveFormatEx->wBitsPerSample = 16; //OriginalFmt.m_pCoMemWaveFormatEx->nSamplesPerSec = 8000; OriginalFmt.m_pCoMemWaveFormatEx->wFormatTag = WAVE_FORMAT_PCM; #ifndef UNICODE hr = SPBindToFile(szWFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr()); #else hr = SPBindToFile(szWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr()); #endif } else { return hr; } if (SUCCEEDED(hr)) { //设置声音的输出到 wav 文件,而不是 speakers; hr = m_cpVoice->SetOutput(cpWavStream, TRUE); } else { return hr; } // 开始朗读; hr = Speak(pText, SPF_ASYNC | SPF_IS_NOT_XML); // 等待朗读结束; m_cpVoice->WaitUntilDone(INFINITE); cpWavStream.Release(); // 把输出重新定位到原来的流; m_cpVoice->SetOutput(cpOldStream, FALSE); return hr; }