Markup.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. // Markup.h: interface for the CMarkup class.
  2. //
  3. // Markup Release 11.5
  4. // Copyright (C) 2011 First Objective Software, Inc. All rights reserved
  5. // Go to www.firstobject.com for the latest CMarkup and EDOM documentation
  6. // Use in commercial applications requires written permission
  7. // This software is provided "as is", with no warranty.
  8. #if !defined(_MARKUP_H_INCLUDED_)
  9. #define _MARKUP_H_INCLUDED_
  10. #include <stdlib.h>
  11. #include <string.h> // memcpy, memset, strcmp...
  12. // Major build options
  13. // MARKUP_WCHAR wide char (2-byte UTF-16 on Windows, 4-byte UTF-32 on Linux and OS X)
  14. // MARKUP_MBCS ANSI/double-byte strings on Windows
  15. // MARKUP_STL (default except VC++) use STL strings instead of MFC strings
  16. // MARKUP_SAFESTR to use string _s functions in VC++ 2005 (_MSC_VER >= 1400)
  17. // MARKUP_WINCONV (default for VC++) for Windows API character conversion
  18. // MARKUP_ICONV (default for GNU) for character conversion on Linux and OS X and other platforms
  19. // MARKUP_STDCONV to use neither WINCONV or ICONV, falls back to setlocale based conversion for ANSI
  20. //
  21. #if ! defined(MARKUP_WINDOWS)
  22. #if defined(_WIN32) || defined(WIN32)
  23. #define MARKUP_WINDOWS
  24. #endif // WIN32 or _WIN32
  25. #endif // not MARKUP_WINDOWS
  26. #if _MSC_VER > 1000 // VC++
  27. #pragma once
  28. #if ! defined(MARKUP_SAFESTR) // not VC++ safe strings
  29. #pragma warning(disable:4996) // VC++ 2005 deprecated function warnings
  30. #endif // not VC++ safe strings
  31. #if defined(MARKUP_STL) && _MSC_VER < 1400 // STL pre VC++ 2005
  32. #pragma warning(disable:4786) // std::string long names
  33. #endif // VC++ 2005 STL
  34. #else // not VC++
  35. #if ! defined(MARKUP_STL)
  36. #define MARKUP_STL
  37. #endif // not STL
  38. #if defined(__GNUC__) && ! defined(MARKUP_ICONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_WINCONV)
  39. #if ! defined(MARKUP_WINDOWS)
  40. #define MARKUP_ICONV
  41. #endif // not Windows
  42. #endif // GNUC and not ICONV not STDCONV not WINCONV
  43. #endif // not VC++
  44. #if (defined(_UNICODE) || defined(UNICODE)) && ! defined(MARKUP_WCHAR)
  45. #define MARKUP_WCHAR
  46. #endif // _UNICODE or UNICODE
  47. #if (defined(_MBCS) || defined(MBCS)) && ! defined(MARKUP_MBCS)
  48. #define MARKUP_MBCS
  49. #endif // _MBCS and not MBCS
  50. #if ! defined(MARKUP_SIZEOFWCHAR)
  51. #if __SIZEOF_WCHAR_T__ == 4 || __WCHAR_MAX__ > 0x10000
  52. #define MARKUP_SIZEOFWCHAR 4
  53. #else // sizeof(wchar_t) != 4
  54. #define MARKUP_SIZEOFWCHAR 2
  55. #endif // sizeof(wchar_t) != 4
  56. #endif // not MARKUP_SIZEOFWCHAR
  57. #if ! defined(MARKUP_WINCONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_ICONV)
  58. #define MARKUP_WINCONV
  59. #endif // not WINCONV not STDCONV not ICONV
  60. #if ! defined(MARKUP_FILEBLOCKSIZE)
  61. #define MARKUP_FILEBLOCKSIZE 16384
  62. #endif
  63. // Text type and function defines (compiler and build-option dependent)
  64. //
  65. #define MCD_ACP 0
  66. #define MCD_UTF8 65001
  67. #define MCD_UTF16 1200
  68. #define MCD_UTF32 65005
  69. #if defined(MARKUP_WCHAR)
  70. #define MCD_CHAR wchar_t
  71. #define MCD_PCSZ const wchar_t*
  72. #define MCD_PSZLEN (int)wcslen
  73. #define MCD_PSZCHR wcschr
  74. #define MCD_PSZSTR wcsstr
  75. #define MCD_PSZTOL wcstol
  76. #if defined(MARKUP_SAFESTR) // VC++ safe strings
  77. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  78. #define MCD_PSZCPY(sz,p) wcscpy_s(MCD_SSZ(sz),p)
  79. #define MCD_PSZNCPY(sz,p,n) wcsncpy_s(MCD_SSZ(sz),p,n)
  80. #define MCD_SPRINTF swprintf_s
  81. #define MCD_FOPEN(f,n,m) {if(_wfopen_s(&f,n,m)!=0)f=NULL;}
  82. #else // not VC++ safe strings
  83. #if defined(__GNUC__) && ! defined(MARKUP_WINDOWS) // non-Windows GNUC
  84. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  85. #else // not non-Windows GNUC
  86. #define MCD_SSZ(sz) sz
  87. #endif // not non-Windows GNUC
  88. #define MCD_PSZCPY wcscpy
  89. #define MCD_PSZNCPY wcsncpy
  90. #define MCD_SPRINTF swprintf
  91. #define MCD_FOPEN(f,n,m) f=_wfopen(n,m)
  92. #endif // not VC++ safe strings
  93. #define MCD_T(s) L ## s
  94. #if MARKUP_SIZEOFWCHAR == 4 // sizeof(wchar_t) == 4
  95. #define MCD_ENC MCD_T("UTF-32")
  96. #else // sizeof(wchar_t) == 2
  97. #define MCD_ENC MCD_T("UTF-16")
  98. #endif
  99. #define MCD_CLEN(p) 1
  100. #else // not MARKUP_WCHAR
  101. #define MCD_CHAR char
  102. #define MCD_PCSZ const char*
  103. #define MCD_PSZLEN (int)strlen
  104. #define MCD_PSZCHR strchr
  105. #define MCD_PSZSTR strstr
  106. #define MCD_PSZTOL strtol
  107. #if defined(MARKUP_SAFESTR) // VC++ safe strings
  108. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  109. #define MCD_PSZCPY(sz,p) strcpy_s(MCD_SSZ(sz),p)
  110. #define MCD_PSZNCPY(sz,p,n) strncpy_s(MCD_SSZ(sz),p,n)
  111. #define MCD_SPRINTF sprintf_s
  112. #define MCD_FOPEN(f,n,m) {if(fopen_s(&f,n,m)!=0)f=NULL;}
  113. #else // not VC++ safe strings
  114. #define MCD_SSZ(sz) sz
  115. #define MCD_PSZCPY strcpy
  116. #define MCD_PSZNCPY strncpy
  117. #define MCD_SPRINTF sprintf
  118. #define MCD_FOPEN(f,n,m) f=fopen(n,m)
  119. #endif // not VC++ safe strings
  120. #define MCD_T(s) s
  121. #if defined(MARKUP_MBCS) // MBCS/double byte
  122. #define MCD_ENC MCD_T("")
  123. #if defined(MARKUP_WINCONV)
  124. #define MCD_CLEN(p) (int)_mbclen((const unsigned char*)p)
  125. #else // not WINCONV
  126. #define MCD_CLEN(p) (int)mblen(p,MB_CUR_MAX)
  127. #endif // not WINCONV
  128. #else // not MBCS/double byte
  129. #define MCD_ENC MCD_T("UTF-8")
  130. #define MCD_CLEN(p) 1
  131. #endif // not MBCS/double byte
  132. #endif // not MARKUP_WCHAR
  133. #if _MSC_VER < 1000 // not VC++
  134. #define MCD_STRERROR strerror(errno)
  135. #endif // not VC++
  136. // String type and function defines (compiler and build-option dependent)
  137. // Define MARKUP_STL to use STL strings
  138. //
  139. #if defined(MARKUP_STL) // STL
  140. #include <string>
  141. #if defined(MARKUP_WCHAR)
  142. #define MCD_STR std::wstring
  143. #else // not MARKUP_WCHAR
  144. #define MCD_STR std::string
  145. #endif // not MARKUP_WCHAR
  146. #define MCD_2PCSZ(s) s.c_str()
  147. #define MCD_STRLENGTH(s) (int)s.size()
  148. #define MCD_STRCLEAR(s) s.erase()
  149. #define MCD_STRCLEARSIZE(s) MCD_STR t; s.swap(t)
  150. #define MCD_STRISEMPTY(s) s.empty()
  151. #define MCD_STRMID(s,n,l) s.substr(n,l)
  152. #define MCD_STRASSIGN(s,p,n) s.assign(p,n)
  153. #define MCD_STRCAPACITY(s) (int)s.capacity()
  154. #define MCD_STRINSERTREPLACE(d,i,r,s) d.replace(i,r,s)
  155. #define MCD_GETBUFFER(s,n) new MCD_CHAR[n+1]; if ((int)s.capacity()<(int)n) s.reserve(n)
  156. #define MCD_RELEASEBUFFER(s,p,n) s.replace(0,s.size(),p,n); delete[]p
  157. #define MCD_BLDRESERVE(s,n) s.reserve(n)
  158. #define MCD_BLDCHECK(s,n,d) ;
  159. #define MCD_BLDRELEASE(s) ;
  160. #define MCD_BLDAPPENDN(s,p,n) s.append(p,n)
  161. #define MCD_BLDAPPEND(s,p) s.append(p)
  162. #define MCD_BLDAPPEND1(s,c) s+=(MCD_CHAR)(c)
  163. #define MCD_BLDLEN(s) (int)s.size()
  164. #define MCD_BLDTRUNC(s,n) s.resize(n)
  165. #else // not STL, i.e. MFC
  166. // afx.h provides CString, to avoid "WINVER not defined" #include stdafh.x in Markup.cpp
  167. #include <afx.h>
  168. #define MCD_STR CString
  169. #define MCD_2PCSZ(s) ((MCD_PCSZ)s)
  170. #define MCD_STRLENGTH(s) s.GetLength()
  171. #define MCD_STRCLEAR(s) s.Empty()
  172. #define MCD_STRCLEARSIZE(s) s=MCD_STR()
  173. #define MCD_STRISEMPTY(s) s.IsEmpty()
  174. #define MCD_STRMID(s,n,l) s.Mid(n,l)
  175. #define MCD_STRASSIGN(s,p,n) memcpy(s.GetBuffer(n),p,(n)*sizeof(MCD_CHAR));s.ReleaseBuffer(n);
  176. #define MCD_STRCAPACITY(s) (((CStringData*)((MCD_PCSZ)s)-1)->nAllocLength)
  177. #define MCD_GETBUFFER(s,n) s.GetBuffer(n)
  178. #define MCD_RELEASEBUFFER(s,p,n) s.ReleaseBuffer(n)
  179. #define MCD_BLDRESERVE(s,n) MCD_CHAR*pD=s.GetBuffer(n); int nL=0
  180. #define MCD_BLDCHECK(s,n,d) if(nL+(int)(d)>n){s.ReleaseBuffer(nL);n<<=2;pD=s.GetBuffer(n);}
  181. #define MCD_BLDRELEASE(s) s.ReleaseBuffer(nL)
  182. #define MCD_BLDAPPENDN(s,p,n) MCD_PSZNCPY(&pD[nL],p,n);nL+=n
  183. #define MCD_BLDAPPEND(s,p) MCD_PSZCPY(&pD[nL],p);nL+=MCD_PSZLEN(p)
  184. #define MCD_BLDAPPEND1(s,c) pD[nL++]=(MCD_CHAR)(c)
  185. #define MCD_BLDLEN(s) nL
  186. #define MCD_BLDTRUNC(s,n) nL=n
  187. #endif // not STL
  188. #define MCD_STRTOINT(s) MCD_PSZTOL(MCD_2PCSZ(s),NULL,10)
  189. // Allow function args to accept string objects as constant string pointers
  190. struct MCD_CSTR
  191. {
  192. MCD_CSTR() { pcsz = NULL; };
  193. MCD_CSTR(MCD_PCSZ p) { pcsz = p; };
  194. MCD_CSTR(const MCD_STR& s) { pcsz = MCD_2PCSZ(s); };
  195. operator MCD_PCSZ() const { return pcsz; };
  196. MCD_PCSZ pcsz;
  197. };
  198. // On Linux and OS X, filenames are not specified in wchar_t
  199. #if defined(MARKUP_WCHAR) && defined(__GNUC__)
  200. #undef MCD_FOPEN
  201. #define MCD_FOPEN(f,n,m) f=fopen(n,m)
  202. #define MCD_T_FILENAME(s) s
  203. #define MCD_PCSZ_FILENAME const char*
  204. struct MCD_CSTR_FILENAME
  205. {
  206. MCD_CSTR_FILENAME() { pcsz = NULL; };
  207. MCD_CSTR_FILENAME(MCD_PCSZ_FILENAME p) { pcsz = p; };
  208. MCD_CSTR_FILENAME(const std::string& s) { pcsz = s.c_str(); };
  209. operator MCD_PCSZ_FILENAME() const { return pcsz; };
  210. MCD_PCSZ_FILENAME pcsz;
  211. };
  212. #else // not WCHAR GNUC
  213. #define MCD_CSTR_FILENAME MCD_CSTR
  214. #define MCD_T_FILENAME MCD_T
  215. #define MCD_PCSZ_FILENAME MCD_PCSZ
  216. #endif // not WCHAR GNUC
  217. // File fseek, ftell and offset type
  218. #if defined(__GNUC__) && ! defined(MARKUP_WINDOWS) // non-Windows GNUC
  219. #define MCD_FSEEK fseeko
  220. #define MCD_FTELL ftello
  221. #define MCD_INTFILEOFFSET off_t
  222. #elif _MSC_VER >= 1000 && defined(MARKUP_HUGEFILE) // VC++ HUGEFILE
  223. #if _MSC_VER < 1400 // before VC++ 2005
  224. extern "C" int __cdecl _fseeki64(FILE *, __int64, int);
  225. extern "C" __int64 __cdecl _ftelli64(FILE *);
  226. #endif // before VC++ 2005
  227. #define MCD_FSEEK _fseeki64
  228. #define MCD_FTELL _ftelli64
  229. #define MCD_INTFILEOFFSET __int64
  230. #else // not non-Windows GNUC or VC++ HUGEFILE
  231. #define MCD_FSEEK fseek
  232. #define MCD_FTELL ftell
  233. #define MCD_INTFILEOFFSET long
  234. #endif // not non-Windows GNUC or VC++ HUGEFILE
  235. // End of line choices: none, return, newline, or CRLF
  236. #if defined(MARKUP_EOL_NONE)
  237. #define MCD_EOL MCD_T("")
  238. #elif defined(MARKUP_EOL_RETURN) // rare; only used on some old operating systems
  239. #define MCD_EOL MCD_T("\r")
  240. #elif defined(MARKUP_EOL_NEWLINE) // Unix standard
  241. #define MCD_EOL MCD_T("\n")
  242. #elif defined(MARKUP_EOL_CRLF) || defined(MARKUP_WINDOWS) // Windows standard
  243. #define MCD_EOL MCD_T("\r\n")
  244. #else // not Windows and not otherwise specified
  245. #define MCD_EOL MCD_T("\n")
  246. #endif // not Windows and not otherwise specified
  247. #define MCD_EOLLEN (sizeof(MCD_EOL)/sizeof(MCD_CHAR)-1) // string length of MCD_EOL
  248. struct FilePos;
  249. struct TokenPos;
  250. struct NodePos;
  251. struct PathPos;
  252. struct SavedPosMapArray;
  253. struct ElemPosTree;
  254. class CMarkup
  255. {
  256. public:
  257. CMarkup() { x_InitMarkup(); SetDoc(NULL); };
  258. CMarkup(MCD_CSTR szDoc) { x_InitMarkup(); SetDoc(szDoc); };
  259. CMarkup(int nFlags) { x_InitMarkup(); SetDoc(NULL); m_nDocFlags = nFlags; };
  260. CMarkup(const CMarkup& markup) { x_InitMarkup(); *this = markup; };
  261. void operator=(const CMarkup& markup);
  262. ~CMarkup();
  263. // Navigate
  264. bool Load(MCD_CSTR_FILENAME szFileName);
  265. bool SetDoc(MCD_PCSZ pDoc);
  266. bool SetDoc(const MCD_STR& strDoc);
  267. bool IsWellFormed();
  268. bool FindElem(MCD_CSTR szName = NULL);
  269. bool FindChildElem(MCD_CSTR szName = NULL);
  270. bool IntoElem();
  271. bool OutOfElem();
  272. void ResetChildPos() { x_SetPos(m_iPosParent, m_iPos, 0); };
  273. void ResetMainPos() { x_SetPos(m_iPosParent, 0, 0); };
  274. void ResetPos() { x_SetPos(0, 0, 0); };
  275. MCD_STR GetTagName() const;
  276. MCD_STR GetChildTagName() const { return x_GetTagName(m_iPosChild); };
  277. MCD_STR GetData() { return x_GetData(m_iPos); };
  278. MCD_STR GetChildData() { return x_GetData(m_iPosChild); };
  279. MCD_STR GetElemContent() const { return x_GetElemContent(m_iPos); };
  280. MCD_STR GetAttrib(MCD_CSTR szAttrib) const { return x_GetAttrib(m_iPos, szAttrib); };
  281. MCD_STR GetChildAttrib(MCD_CSTR szAttrib) const { return x_GetAttrib(m_iPosChild, szAttrib); };
  282. bool GetNthAttrib(int n, MCD_STR& strAttrib, MCD_STR& strValue) const;
  283. MCD_STR GetAttribName(int n) const;
  284. int FindNode(int nType = 0);
  285. int GetNodeType() { return m_nNodeType; };
  286. bool SavePos(MCD_CSTR szPosName = MCD_T(""), int nMap = 0);
  287. bool RestorePos(MCD_CSTR szPosName = MCD_T(""), int nMap = 0);
  288. bool SetMapSize(int nSize, int nMap = 0);
  289. MCD_STR GetError() const;
  290. const MCD_STR& GetResult() const { return m_strResult; };
  291. int GetDocFlags() const { return m_nDocFlags; };
  292. void SetDocFlags(int nFlags) { m_nDocFlags = (nFlags & ~(MDF_READFILE | MDF_WRITEFILE | MDF_APPENDFILE)); };
  293. enum MarkupDocFlags
  294. {
  295. MDF_UTF16LEFILE = 1,
  296. MDF_UTF8PREAMBLE = 4,
  297. MDF_IGNORECASE = 8,
  298. MDF_READFILE = 16,
  299. MDF_WRITEFILE = 32,
  300. MDF_APPENDFILE = 64,
  301. MDF_UTF16BEFILE = 128,
  302. MDF_TRIMWHITESPACE = 256,
  303. MDF_COLLAPSEWHITESPACE = 512
  304. };
  305. enum MarkupNodeFlags
  306. {
  307. MNF_WITHCDATA = 0x01,
  308. MNF_WITHNOLINES = 0x02,
  309. MNF_WITHXHTMLSPACE = 0x04,
  310. MNF_WITHREFS = 0x08,
  311. MNF_WITHNOEND = 0x10,
  312. MNF_ESCAPEQUOTES = 0x100,
  313. MNF_NONENDED = 0x100000,
  314. MNF_ILLDATA = 0x200000
  315. };
  316. enum MarkupNodeType
  317. {
  318. MNT_ELEMENT = 1, // 0x0001
  319. MNT_TEXT = 2, // 0x0002
  320. MNT_WHITESPACE = 4, // 0x0004
  321. MNT_TEXT_AND_WHITESPACE = 6, // 0x0006
  322. MNT_CDATA_SECTION = 8, // 0x0008
  323. MNT_PROCESSING_INSTRUCTION = 16, // 0x0010
  324. MNT_COMMENT = 32, // 0x0020
  325. MNT_DOCUMENT_TYPE = 64, // 0x0040
  326. MNT_EXCLUDE_WHITESPACE = 123, // 0x007b
  327. MNT_LONE_END_TAG = 128, // 0x0080
  328. MNT_NODE_ERROR = 32768 // 0x8000
  329. };
  330. // Create
  331. bool Save(MCD_CSTR_FILENAME szFileName);
  332. const MCD_STR& GetDoc() const { return m_strDoc; };
  333. bool AddElem(MCD_CSTR szName, MCD_CSTR szData = NULL, int nFlags = 0) { return x_AddElem(szName, szData, nFlags); };
  334. bool InsertElem(MCD_CSTR szName, MCD_CSTR szData = NULL, int nFlags = 0) { return x_AddElem(szName, szData, nFlags | MNF_INSERT); };
  335. bool AddChildElem(MCD_CSTR szName, MCD_CSTR szData = NULL, int nFlags = 0) { return x_AddElem(szName, szData, nFlags | MNF_CHILD); };
  336. bool InsertChildElem(MCD_CSTR szName, MCD_CSTR szData = NULL, int nFlags = 0) { return x_AddElem(szName, szData, nFlags | MNF_INSERT | MNF_CHILD); };
  337. bool AddElem(MCD_CSTR szName, int nValue, int nFlags = 0) { return x_AddElem(szName, nValue, nFlags); };
  338. bool InsertElem(MCD_CSTR szName, int nValue, int nFlags = 0) { return x_AddElem(szName, nValue, nFlags | MNF_INSERT); };
  339. bool AddChildElem(MCD_CSTR szName, int nValue, int nFlags = 0) { return x_AddElem(szName, nValue, nFlags | MNF_CHILD); };
  340. bool InsertChildElem(MCD_CSTR szName, int nValue, int nFlags = 0) { return x_AddElem(szName, nValue, nFlags | MNF_INSERT | MNF_CHILD); };
  341. bool AddAttrib(MCD_CSTR szAttrib, MCD_CSTR szValue) { return x_SetAttrib(m_iPos, szAttrib, szValue); };
  342. bool AddChildAttrib(MCD_CSTR szAttrib, MCD_CSTR szValue) { return x_SetAttrib(m_iPosChild, szAttrib, szValue); };
  343. bool AddAttrib(MCD_CSTR szAttrib, int nValue) { return x_SetAttrib(m_iPos, szAttrib, nValue); };
  344. bool AddChildAttrib(MCD_CSTR szAttrib, int nValue) { return x_SetAttrib(m_iPosChild, szAttrib, nValue); };
  345. bool AddSubDoc(MCD_CSTR szSubDoc) { return x_AddSubDoc(szSubDoc, 0); };
  346. bool InsertSubDoc(MCD_CSTR szSubDoc) { return x_AddSubDoc(szSubDoc, MNF_INSERT); };
  347. MCD_STR GetSubDoc() { return x_GetSubDoc(m_iPos); };
  348. bool AddChildSubDoc(MCD_CSTR szSubDoc) { return x_AddSubDoc(szSubDoc, MNF_CHILD); };
  349. bool InsertChildSubDoc(MCD_CSTR szSubDoc) { return x_AddSubDoc(szSubDoc, MNF_CHILD | MNF_INSERT); };
  350. MCD_STR GetChildSubDoc() { return x_GetSubDoc(m_iPosChild); };
  351. bool AddNode(int nType, MCD_CSTR szText) { return x_AddNode(nType, szText, 0); };
  352. bool InsertNode(int nType, MCD_CSTR szText) { return x_AddNode(nType, szText, MNF_INSERT); };
  353. // Modify
  354. bool RemoveElem();
  355. bool RemoveChildElem();
  356. bool RemoveNode();
  357. bool SetAttrib(MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags = 0) { return x_SetAttrib(m_iPos, szAttrib, szValue, nFlags); };
  358. bool SetChildAttrib(MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags = 0) { return x_SetAttrib(m_iPosChild, szAttrib, szValue, nFlags); };
  359. bool SetAttrib(MCD_CSTR szAttrib, int nValue, int nFlags = 0) { return x_SetAttrib(m_iPos, szAttrib, nValue, nFlags); };
  360. bool SetChildAttrib(MCD_CSTR szAttrib, int nValue, int nFlags = 0) { return x_SetAttrib(m_iPosChild, szAttrib, nValue, nFlags); };
  361. bool SetData(MCD_CSTR szData, int nFlags = 0) { return x_SetData(m_iPos, szData, nFlags); };
  362. bool SetChildData(MCD_CSTR szData, int nFlags = 0) { return x_SetData(m_iPosChild, szData, nFlags); };
  363. bool SetData(int nValue) { return x_SetData(m_iPos, nValue); };
  364. bool SetChildData(int nValue) { return x_SetData(m_iPosChild, nValue); };
  365. bool SetElemContent(MCD_CSTR szContent) { return x_SetElemContent(szContent); };
  366. // Utility
  367. static bool ReadTextFile(MCD_CSTR_FILENAME szFileName, MCD_STR& strDoc, MCD_STR* pstrResult = NULL, int* pnDocFlags = NULL, MCD_STR* pstrEncoding = NULL);
  368. static bool WriteTextFile(MCD_CSTR_FILENAME szFileName, const MCD_STR& strDoc, MCD_STR* pstrResult = NULL, int* pnDocFlags = NULL, MCD_STR* pstrEncoding = NULL);
  369. static MCD_STR EscapeText(MCD_CSTR szText, int nFlags = 0);
  370. static MCD_STR UnescapeText(MCD_CSTR szText, int nTextLength = -1, int nFlags = 0);
  371. static int UTF16To8(char *pszUTF8, const unsigned short* pwszUTF16, int nUTF8Count);
  372. static int UTF8To16(unsigned short* pwszUTF16, const char* pszUTF8, int nUTF8Count);
  373. static MCD_STR UTF8ToA(MCD_CSTR pszUTF8, int* pnFailed = NULL);
  374. static MCD_STR AToUTF8(MCD_CSTR pszANSI);
  375. static void EncodeCharUTF8(int nUChar, char* pszUTF8, int& nUTF8Len);
  376. static int DecodeCharUTF8(const char*& pszUTF8, const char* pszUTF8End = NULL);
  377. static void EncodeCharUTF16(int nUChar, unsigned short* pwszUTF16, int& nUTF16Len);
  378. static int DecodeCharUTF16(const unsigned short*& pwszUTF16, const unsigned short* pszUTF16End = NULL);
  379. static bool DetectUTF8(const char* pText, int nTextLen, int* pnNonASCII = NULL, bool* bErrorAtEnd = NULL);
  380. static MCD_STR GetDeclaredEncoding(MCD_CSTR szDoc);
  381. static int GetEncodingCodePage(MCD_CSTR pszEncoding);
  382. protected:
  383. #if defined(_DEBUG)
  384. MCD_PCSZ m_pDebugCur;
  385. MCD_PCSZ m_pDebugPos;
  386. #endif // DEBUG
  387. MCD_STR m_strDoc;
  388. MCD_STR m_strResult;
  389. int m_iPosParent;
  390. int m_iPos;
  391. int m_iPosChild;
  392. int m_iPosFree;
  393. int m_iPosDeleted;
  394. int m_nNodeType;
  395. int m_nNodeOffset;
  396. int m_nNodeLength;
  397. int m_nDocFlags;
  398. FilePos* m_pFilePos;
  399. SavedPosMapArray* m_pSavedPosMaps;
  400. ElemPosTree* m_pElemPosTree;
  401. enum MarkupNodeFlagsInternal
  402. {
  403. MNF_INSERT = 0x002000,
  404. MNF_CHILD = 0x004000
  405. };
  406. #if defined(_DEBUG) // DEBUG
  407. void x_SetDebugState();
  408. #define MARKUP_SETDEBUGSTATE x_SetDebugState()
  409. #else // not DEBUG
  410. #define MARKUP_SETDEBUGSTATE
  411. #endif // not DEBUG
  412. void x_InitMarkup();
  413. void x_SetPos(int iPosParent, int iPos, int iPosChild);
  414. int x_GetFreePos();
  415. bool x_AllocElemPos(int nNewSize = 0);
  416. int x_GetParent(int i);
  417. bool x_ParseDoc();
  418. int x_ParseElem(int iPos, TokenPos& token);
  419. int x_FindElem(int iPosParent, int iPos, PathPos& path) const;
  420. MCD_STR x_GetPath(int iPos) const;
  421. MCD_STR x_GetTagName(int iPos) const;
  422. MCD_STR x_GetData(int iPos);
  423. MCD_STR x_GetAttrib(int iPos, MCD_PCSZ pAttrib) const;
  424. static MCD_STR x_EncodeCDATASection(MCD_PCSZ szData);
  425. bool x_AddElem(MCD_PCSZ pName, MCD_PCSZ pValue, int nFlags);
  426. bool x_AddElem(MCD_PCSZ pName, int nValue, int nFlags);
  427. MCD_STR x_GetSubDoc(int iPos);
  428. bool x_AddSubDoc(MCD_PCSZ pSubDoc, int nFlags);
  429. bool x_SetAttrib(int iPos, MCD_PCSZ pAttrib, MCD_PCSZ pValue, int nFlags = 0);
  430. bool x_SetAttrib(int iPos, MCD_PCSZ pAttrib, int nValue, int nFlags = 0);
  431. bool x_AddNode(int nNodeType, MCD_PCSZ pText, int nNodeFlags);
  432. void x_RemoveNode(int iPosParent, int& iPos, int& nNodeType, int& nNodeOffset, int& nNodeLength);
  433. static bool x_CreateNode(MCD_STR& strNode, int nNodeType, MCD_PCSZ pText);
  434. int x_InsertNew(int iPosParent, int& iPosRel, NodePos& node);
  435. void x_AdjustForNode(int iPosParent, int iPos, int nShift);
  436. void x_Adjust(int iPos, int nShift, bool bAfterPos = false);
  437. void x_LinkElem(int iPosParent, int iPosBefore, int iPos);
  438. int x_UnlinkElem(int iPos);
  439. int x_UnlinkPrevElem(int iPosParent, int iPosBefore, int iPos);
  440. int x_ReleaseSubDoc(int iPos);
  441. int x_ReleasePos(int iPos);
  442. void x_CheckSavedPos();
  443. bool x_SetData(int iPos, MCD_PCSZ szData, int nFlags);
  444. bool x_SetData(int iPos, int nValue);
  445. int x_RemoveElem(int iPos);
  446. MCD_STR x_GetElemContent(int iPos) const;
  447. bool x_SetElemContent(MCD_PCSZ szContent);
  448. void x_DocChange(int nLeft, int nReplace, const MCD_STR& strInsert);
  449. };
  450. #endif // !defined(_MARKUP_H_INCLUDED_)