Text2Speech.cpp 9.3 KB


  1. // Text2Speech.h : CText2Speech 的实现;
  2. #include "StdAfx.h"
  3. #include "Text2Speech.h"
  4. CText2Speech::CText2Speech(void)
  5. {
  6. m_cpVoice = NULL;
  7. m_cpOutAudio = NULL;
  8. //m_sError=_T("");
  9. }
  10. CText2Speech::~CText2Speech(void)
  11. {
  12. Destroy();
  13. }
  14. // 初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。
  15. // 然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。
  16. // 该窗口句柄是作为函数的参数传入的
  17. BOOL CText2Speech::Initialize(IN HWND hWnd)
  18. {
  19. if (FAILED(CoInitialize(NULL))) {
  20. //m_sError=_T("Error intialization COM");
  21. return FALSE;
  22. }
  23. HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice);
  24. if (FAILED(hr)) {
  25. //m_sError=_T("Error creating voice");
  26. return FALSE;
  27. }
  28. // Sets the type of events which will invoke a notification and become queued
  29. hr = m_cpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME));
  30. if (FAILED(hr))
  31. {
  32. //m_sError=_T("Error creating interest...seriously");
  33. return FALSE;
  34. }
  35. if (::IsWindow(hWnd))
  36. {
  37. hr = m_cpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0);
  38. if (FAILED(hr))
  39. {
  40. //m_sError=_T("Error setting notification window");
  41. return FALSE;
  42. }
  43. }
  44. return TRUE;
  45. }
  46. // 释放函数则释放语音引擎接口和COM库,其代码如下:
  47. void CText2Speech::Destroy()
  48. {
  49. if (m_cpVoice)
  50. m_cpVoice.Release();
  51. CoUninitialize();
  52. }
  53. // 要朗读的文字必须位于宽字符串中;
  54. HRESULT CText2Speech::Speak(IN const TCHAR *pText, IN DWORD dwFlags /* = SPF_DEFAULT */)
  55. {
  56. #ifndef UNICODE
  57. size_t len = strlen(pText) + 1;
  58. wchar_t *WStr;
  59. WStr = (wchar_t*)malloc(len*sizeof(wchar_t));
  60. MultiByteToWideChar(CP_ACP, 0, pText, -1, WStr, len*sizeof(wchar_t));
  61. HRESULT hr = m_cpVoice->Speak(WStr, dwFlags, NULL);
  62. free(WStr);
  63. return hr;
  64. #else
  65. return m_cpVoice->Speak(pText, dwFlags, NULL);
  66. #endif
  67. }
  68. /*
  69. 语言函数的实现比较复杂。
  70. 由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关,
  71. 而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。
  72. 因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目,
  73. 设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。
  74. */
  75. ULONG CText2Speech::GetVoiceCount()
  76. {
  77. HRESULT hr = S_OK;
  78. CComPtr<ISpObjectToken> cpVoiceToken;
  79. CComPtr<IEnumSpObjectTokens> cpEnum;
  80. ULONG ulCount = -1;
  81. //Enumerate the available voices
  82. hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
  83. if (FAILED(hr))
  84. {
  85. //m_sError = _T("Error to enumerate voices");
  86. return -1;
  87. }
  88. //Get the number of voices
  89. hr = cpEnum->GetCount(&ulCount);
  90. if (FAILED(hr))
  91. {
  92. //m_sError = _T("Error to get voice count");
  93. return -1;
  94. }
  95. return ulCount;
  96. }
  97. HRESULT CText2Speech::GetVoice(OUT WCHAR **ppszDescription, IN ULONG lIndex)
  98. {
  99. HRESULT hr = S_OK;
  100. CComPtr<ISpObjectToken> cpVoiceToken;
  101. CComPtr<IEnumSpObjectTokens> cpEnum;
  102. ULONG ulCount = 0;
  103. // lIndex==-1,获取当前默认的语声;
  104. if (lIndex == -1)
  105. {
  106. // current voice
  107. hr = m_cpVoice->GetVoice(&cpVoiceToken);
  108. if (FAILED(hr))
  109. {
  110. //m_sError = _T("Error to get current voice");
  111. return hr;
  112. }
  113. SpGetDescription(cpVoiceToken, ppszDescription);
  114. if (FAILED(hr))
  115. {
  116. //m_sError = _T("Error to get current voice description");
  117. return hr;
  118. }
  119. }
  120. else
  121. {
  122. // else other voices, we should enumerate the voice list first
  123. //Enumerate the available voices
  124. hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
  125. if (FAILED(hr))
  126. {
  127. //m_sError = _T("Error to enumerate voices");
  128. return hr;
  129. }
  130. //Get the number of voices
  131. hr = cpEnum->GetCount(&ulCount);
  132. if (FAILED(hr))
  133. {
  134. //m_sError = _T("Error to voice count");
  135. return hr;
  136. }
  137. // range control
  138. ASSERT(lIndex >= 0);
  139. ASSERT(lIndex < ulCount);
  140. // Obtain specified voice id
  141. ULONG l = 0;
  142. while (SUCCEEDED(hr))
  143. {
  144. cpVoiceToken.Release();
  145. hr = cpEnum->Next(1, &cpVoiceToken, NULL);
  146. if (FAILED(hr))
  147. {
  148. //m_sError = _T("Error to get voice token");
  149. return hr;
  150. }
  151. if (l == lIndex)
  152. {
  153. hr = SpGetDescription(cpVoiceToken, ppszDescription);
  154. if (FAILED(hr))
  155. {
  156. //m_sError = _T("Error to get voice description");
  157. return hr;
  158. }
  159. break;
  160. }
  161. l++;
  162. }
  163. }
  164. return hr;
  165. }
  166. HRESULT CText2Speech::SetVoice(IN WCHAR **ppszDescription)
  167. {
  168. HRESULT hr = S_OK;
  169. CComPtr<ISpObjectToken> cpVoiceToken;
  170. CComPtr<IEnumSpObjectTokens> cpEnum;
  171. ULONG ulCount = 0;
  172. //Enumerate the available voices
  173. hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
  174. if (FAILED(hr))
  175. {
  176. //m_sError = _T("Error to enumerate voices");
  177. return hr;
  178. }
  179. //Get the number of voices
  180. hr = cpEnum->GetCount(&ulCount);
  181. if (FAILED(hr))
  182. {
  183. //m_sError = _T("Error to voice count");
  184. return hr;
  185. }
  186. // Obtain specified voice id
  187. while (SUCCEEDED(hr) && ulCount--)
  188. {
  189. cpVoiceToken.Release();
  190. hr = cpEnum->Next(1, &cpVoiceToken, NULL);
  191. if (FAILED(hr))
  192. {
  193. //m_sError = _T("Error to voice token");
  194. return hr;
  195. }
  196. WCHAR *pszDescription1;
  197. hr = SpGetDescription(cpVoiceToken, &pszDescription1);
  198. if (FAILED(hr))
  199. {
  200. //m_sError = _T("Error to get voice description");
  201. return hr;
  202. }
  203. if (!wcsicmp(pszDescription1, *ppszDescription))
  204. {
  205. hr = m_cpVoice->SetVoice(cpVoiceToken);
  206. if (FAILED(hr))
  207. {
  208. //m_sError = _T("Error to set voice");
  209. return hr;
  210. }
  211. break;
  212. }
  213. }
  214. return hr;
  215. }
  216. HRESULT CText2Speech::SetVoice(IN const ULONG &uIndex)
  217. {
  218. HRESULT hr;
  219. CComPtr<ISpObjectToken> cpVoiceToken; // 用下面的函数获取当前正在使用的语音;
  220. CComPtr<IEnumSpObjectTokens> cpEnum;
  221. ULONG ulCount = 0;
  222. hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
  223. if (FAILED(hr)){
  224. return hr;
  225. }
  226. // Get the number of voices
  227. // 获取语音语言数量;
  228. hr = cpEnum->GetCount(&ulCount);
  229. if (FAILED(hr)) {
  230. return hr;
  231. }
  232. // 根据参数 nLang ,设置为所需要的语音语言;
  233. hr = cpEnum->Item(uIndex, &cpVoiceToken);
  234. if (SUCCEEDED(hr)){
  235. m_cpVoice->SetVoice(cpVoiceToken);
  236. }
  237. else
  238. {
  239. return hr;
  240. }
  241. return hr;
  242. }
  243. HRESULT CText2Speech::GetOutAudio(OUT SPSTREAMFORMAT &eFmt)
  244. {
  245. CComPtr<ISpStreamFormat> cpStream;
  246. HRESULT hr = m_cpVoice->GetOutputStream(&cpStream);
  247. //if (hr == S_OK)
  248. if (SUCCEEDED(hr))
  249. {
  250. CSpStreamFormat Fmt;
  251. HRESULT hStreamFmt = Fmt.AssignFormat(cpStream);
  252. if (SUCCEEDED(hStreamFmt))
  253. {
  254. eFmt = Fmt.ComputeFormatEnum();
  255. }
  256. else
  257. {
  258. return hStreamFmt;
  259. }
  260. }
  261. return hr;
  262. }
  263. HRESULT CText2Speech::SetOutAudio(IN const SPSTREAMFORMAT &eFmt)
  264. {
  265. HRESULT hr = S_FALSE;
  266. if (!m_cpOutAudio)
  267. {
  268. SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOOUT, &m_cpOutAudio); //创建接口
  269. }
  270. else
  271. {
  272. return hr;
  273. }
  274. //SPSF_CCITT_ALaw_8kHzMono 8Bit Stereo
  275. //SPSTREAMFORMAT eFmt = 21; //SPSF_22kHz 8Bit Stereo
  276. CSpStreamFormat Fmt;
  277. hr = Fmt.AssignFormat(eFmt);
  278. if (SUCCEEDED(hr))
  279. {
  280. hr = m_cpOutAudio->SetFormat(Fmt.FormatId(), Fmt.WaveFormatExPtr());
  281. }
  282. else
  283. {
  284. return hr;//hr = E_FAIL;
  285. }
  286. if (SUCCEEDED(hr)){
  287. m_cpVoice->SetOutput(m_cpOutAudio, FALSE);
  288. }
  289. return hr;
  290. }
  291. // 播放WAV,通过ISpStream接口实现;
  292. HRESULT CText2Speech::PlayWav(IN const TCHAR *szWavFileName)
  293. {
  294. CComPtr<ISpStream> cpWavStream;
  295. #ifndef UNICODE
  296. WCHAR szwWavFileName[MAX_PATH] = L"";
  297. USES_CONVERSION;
  298. wcscpy(szwWavFileName, T2W(szWavFileName));//从ANSI将WAV文件的名字转换成宽字符串
  299. //使用sphelper.h 提供的这个函数打开 wav 文件,并得到一个 IStream 指针
  300. HRESULT hr = SPBindToFile(szwWavFileName, SPFM_OPEN_READONLY, &cpWavStream);
  301. #else
  302. HRESULT hr = SPBindToFile(szWavFileName, SPFM_OPEN_READONLY, &cpWavStream);
  303. #endif
  304. if (SUCCEEDED(hr))
  305. {
  306. m_cpVoice->SpeakStream(cpWavStream, SPF_ASYNC, NULL);//播放WAV文件
  307. }
  308. return hr;
  309. }
  310. HRESULT CText2Speech::Save2Wav(IN const TCHAR *pText, IN const TCHAR *szWavFileName)
  311. {
  312. #ifndef UNICODE
  313. //TCHAR szFileName[256];//假设这里面保存着目标文件的路径
  314. USES_CONVERSION;
  315. WCHAR szWFileName[MAX_PATH];
  316. wcscpy(szWFileName, T2W(szWavFileName)); // ANSI转换成宽字符串;
  317. #endif
  318. //创建一个输出流,绑定到wav文件;
  319. CSpStreamFormat OriginalFmt;
  320. CComPtr<ISpStream> cpWavStream;
  321. CComPtr<ISpStreamFormat> cpOldStream;
  322. HRESULT hr = m_cpVoice->GetOutputStream(&cpOldStream);
  323. //if (hr == S_OK)
  324. // hr = OriginalFmt.AssignFormat(cpOldStream);
  325. //else hr = E_FAIL;
  326. if (FAILED(hr)) {
  327. return hr;
  328. }
  329. hr = OriginalFmt.AssignFormat(cpOldStream);
  330. // 使用sphelper.h中提供的函数创建 wav 文件;
  331. if (SUCCEEDED(hr))
  332. {
  333. // 设置WAV的编码格式???有何意义在这里???;
  334. // -- 因为语音卡要获取WAV文件时,需要指定的编码格式才能将文件读出来,详看http://hi.baidu.com/sunsee/item/a537343cdb83ad5b80f1a70fACM音频压缩管理器VC++源代码;
  335. //OriginalFmt.m_pCoMemWaveFormatEx->wBitsPerSample = 16;
  336. //OriginalFmt.m_pCoMemWaveFormatEx->nSamplesPerSec = 8000;
  337. OriginalFmt.m_pCoMemWaveFormatEx->wFormatTag = WAVE_FORMAT_PCM;
  338. #ifndef UNICODE
  339. hr = SPBindToFile(szWFileName, SPFM_CREATE_ALWAYS, &cpWavStream,
  340. &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr());
  341. #else
  342. hr = SPBindToFile(szWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream,
  343. &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr());
  344. #endif
  345. }
  346. else
  347. {
  348. return hr;
  349. }
  350. if (SUCCEEDED(hr))
  351. {
  352. //设置声音的输出到 wav 文件,而不是 speakers;
  353. hr = m_cpVoice->SetOutput(cpWavStream, TRUE);
  354. }
  355. else
  356. {
  357. return hr;
  358. }
  359. // 开始朗读;
  360. hr = Speak(pText, SPF_ASYNC | SPF_IS_NOT_XML);
  361. // 等待朗读结束;
  362. m_cpVoice->WaitUntilDone(INFINITE);
  363. cpWavStream.Release();
  364. // 把输出重新定位到原来的流;
  365. m_cpVoice->SetOutput(cpOldStream, FALSE);
  366. return hr;
  367. }