Markup.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // Markup.h: interface for the CMarkup class.
  2. //
  3. // Markup Release 11.5
  4. // Copyright (C) 2011 First Objective Software, Inc. All rights reserved
  5. // Go to www.firstobject.com for the latest CMarkup and EDOM documentation
  6. // Use in commercial applications requires written permission
  7. // This software is provided "as is", with no warranty.
  8. #if !defined(_MARKUP_H_INCLUDED_)
  9. #define _MARKUP_H_INCLUDED_
  10. #include <stdlib.h>
  11. #include <string.h> // memcpy, memset, strcmp...
  12. // Major build options
  13. // MARKUP_WCHAR wide char (2-byte UTF-16 on Windows, 4-byte UTF-32 on Linux and OS X)
  14. // MARKUP_MBCS ANSI/double-byte strings on Windows
  15. // MARKUP_STL (default except VC++) use STL strings instead of MFC strings
  16. // MARKUP_SAFESTR to use string _s functions in VC++ 2005 (_MSC_VER >= 1400)
  17. // MARKUP_WINCONV (default for VC++) for Windows API character conversion
  18. // MARKUP_ICONV (default for GNU) for character conversion on Linux and OS X and other platforms
  19. // MARKUP_STDCONV to use neither WINCONV or ICONV, falls back to setlocale based conversion for ANSI
  20. //
  21. #if ! defined(MARKUP_WINDOWS)
  22. #if defined(_WIN32) || defined(WIN32)
  23. #define MARKUP_WINDOWS
  24. #endif // WIN32 or _WIN32
  25. #endif // not MARKUP_WINDOWS
  26. #if _MSC_VER > 1000 // VC++
  27. #pragma once
  28. #if ! defined(MARKUP_SAFESTR) // not VC++ safe strings
  29. #pragma warning(disable:4996) // VC++ 2005 deprecated function warnings
  30. #endif // not VC++ safe strings
  31. #if defined(MARKUP_STL) && _MSC_VER < 1400 // STL pre VC++ 2005
  32. #pragma warning(disable:4786) // std::string long names
  33. #endif // VC++ 2005 STL
  34. #else // not VC++
  35. #if ! defined(MARKUP_STL)
  36. #define MARKUP_STL
  37. #endif // not STL
  38. #if defined(__GNUC__) && ! defined(MARKUP_ICONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_WINCONV)
  39. #if ! defined(MARKUP_WINDOWS)
  40. #define MARKUP_ICONV
  41. #endif // not Windows
  42. #endif // GNUC and not ICONV not STDCONV not WINCONV
  43. #endif // not VC++
  44. #if (defined(_UNICODE) || defined(UNICODE)) && ! defined(MARKUP_WCHAR)
  45. #define MARKUP_WCHAR
  46. #endif // _UNICODE or UNICODE
  47. #if (defined(_MBCS) || defined(MBCS)) && ! defined(MARKUP_MBCS)
  48. #define MARKUP_MBCS
  49. #endif // _MBCS and not MBCS
  50. #if ! defined(MARKUP_SIZEOFWCHAR)
  51. #if __SIZEOF_WCHAR_T__ == 4 || __WCHAR_MAX__ > 0x10000
  52. #define MARKUP_SIZEOFWCHAR 4
  53. #else // sizeof(wchar_t) != 4
  54. #define MARKUP_SIZEOFWCHAR 2
  55. #endif // sizeof(wchar_t) != 4
  56. #endif // not MARKUP_SIZEOFWCHAR
  57. #if ! defined(MARKUP_WINCONV) && ! defined(MARKUP_STDCONV) && ! defined(MARKUP_ICONV)
  58. #define MARKUP_WINCONV
  59. #endif // not WINCONV not STDCONV not ICONV
  60. #if ! defined(MARKUP_FILEBLOCKSIZE)
  61. #define MARKUP_FILEBLOCKSIZE 16384
  62. #endif
  63. // Text type and function defines (compiler and build-option dependent)
  64. //
  65. #define MCD_ACP 0
  66. #define MCD_UTF8 65001
  67. #define MCD_UTF16 1200
  68. #define MCD_UTF32 65005
  69. #if defined(MARKUP_WCHAR)
  70. #define MCD_CHAR wchar_t
  71. #define MCD_PCSZ const wchar_t*
  72. #define MCD_PSZLEN (int)wcslen
  73. #define MCD_PSZCHR wcschr
  74. #define MCD_PSZSTR wcsstr
  75. #define MCD_PSZTOL wcstol
  76. #if defined(MARKUP_SAFESTR) // VC++ safe strings
  77. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  78. #define MCD_PSZCPY(sz,p) wcscpy_s(MCD_SSZ(sz),p)
  79. #define MCD_PSZNCPY(sz,p,n) wcsncpy_s(MCD_SSZ(sz),p,n)
  80. #define MCD_SPRINTF swprintf_s
  81. #define MCD_FOPEN(f,n,m) {if(_wfopen_s(&f,n,m)!=0)f=NULL;}
  82. #else // not VC++ safe strings
  83. #if defined(__GNUC__) && ! defined(MARKUP_WINDOWS) // non-Windows GNUC
  84. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  85. #else // not non-Windows GNUC
  86. #define MCD_SSZ(sz) sz
  87. #endif // not non-Windows GNUC
  88. #define MCD_PSZCPY wcscpy
  89. #define MCD_PSZNCPY wcsncpy
  90. #define MCD_SPRINTF swprintf
  91. #define MCD_FOPEN(f,n,m) f=_wfopen(n,m)
  92. #endif // not VC++ safe strings
  93. #define MCD_T(s) L ## s
  94. #if MARKUP_SIZEOFWCHAR == 4 // sizeof(wchar_t) == 4
  95. #define MCD_ENC MCD_T("UTF-32")
  96. #else // sizeof(wchar_t) == 2
  97. #define MCD_ENC MCD_T("UTF-16")
  98. #endif
  99. #define MCD_CLEN(p) 1
  100. #else // not MARKUP_WCHAR
  101. #define MCD_CHAR char
  102. #define MCD_PCSZ const char*
  103. #define MCD_PSZLEN (int)strlen
  104. #define MCD_PSZCHR strchr
  105. #define MCD_PSZSTR strstr
  106. #define MCD_PSZTOL strtol
  107. #if defined(MARKUP_SAFESTR) // VC++ safe strings
  108. #define MCD_SSZ(sz) sz,(sizeof(sz)/sizeof(MCD_CHAR))
  109. #define MCD_PSZCPY(sz,p) strcpy_s(MCD_SSZ(sz),p)
  110. #define MCD_PSZNCPY(sz,p,n) strncpy_s(MCD_SSZ(sz),p,n)
  111. #define MCD_SPRINTF sprintf_s
  112. #define MCD_FOPEN(f,n,m) {if(fopen_s(&f,n,m)!=0)f=NULL;}
  113. #else // not VC++ safe strings
  114. #define MCD_SSZ(sz) sz
  115. #define MCD_PSZCPY strcpy
  116. #define MCD_PSZNCPY strncpy
  117. #define MCD_SPRINTF sprintf
  118. #define MCD_FOPEN(f,n,m) f=fopen(n,m)
  119. #endif // not VC++ safe strings
  120. #define MCD_T(s) s
  121. #if defined(MARKUP_MBCS) // MBCS/double byte
  122. #define MCD_ENC MCD_T("")
  123. #if defined(MARKUP_WINCONV)
  124. #define MCD_CLEN(p) (int)_mbclen((const unsigned char*)p)
  125. #else // not WINCONV
  126. #define MCD_CLEN(p) (int)mblen(p,MB_CUR_MAX)
  127. #endif // not WINCONV
  128. #else // not MBCS/double byte
  129. #define MCD_ENC MCD_T("UTF-8")
  130. #define MCD_CLEN(p) 1
  131. #endif // not MBCS/double byte
  132. #endif // not MARKUP_WCHAR
  133. #if _MSC_VER < 1000 // not VC++
  134. #define MCD_STRERROR strerror(errno)
  135. #endif // not VC++
  136. // String type and function defines (compiler and build-option dependent)
  137. // Define MARKUP_STL to use STL strings
  138. //
  139. #if defined(MARKUP_STL) // STL
  140. #include <string>
  141. #if defined(MARKUP_WCHAR)
  142. #define MCD_STR std::wstring
  143. #else // not MARKUP_WCHAR
  144. #define MCD_STR std::string
  145. #endif // not MARKUP_WCHAR
  146. #define MCD_2PCSZ(s) s.c_str()
  147. #define MCD_STRLENGTH(s) (int)s.size()
  148. #define MCD_STRCLEAR(s) s.erase()
  149. #define MCD_STRCLEARSIZE(s) MCD_STR t; s.swap(t)
  150. #define MCD_STRISEMPTY(s) s.empty()
  151. #define MCD_STRMID(s,n,l) s.substr(n,l)
  152. #define MCD_STRASSIGN(s,p,n) s.assign(p,n)
  153. #define MCD_STRCAPACITY(s) (int)s.capacity()
  154. #define MCD_STRINSERTREPLACE(d,i,r,s) d.replace(i,r,s)
  155. #define MCD_GETBUFFER(s,n) new MCD_CHAR[n+1]; if ((int)s.capacity()<(int)n) s.reserve(n)
  156. #define MCD_RELEASEBUFFER(s,p,n) s.replace(0,s.size(),p,n); delete[]p
  157. #define MCD_BLDRESERVE(s,n) s.reserve(n)
  158. #define MCD_BLDCHECK(s,n,d) ;
  159. #define MCD_BLDRELEASE(s) ;
  160. #define MCD_BLDAPPENDN(s,p,n) s.append(p,n)
  161. #define MCD_BLDAPPEND(s,p) s.append(p)
  162. #define MCD_BLDAPPEND1(s,c) s+=(MCD_CHAR)(c)
  163. #define MCD_BLDLEN(s) (int)s.size()
  164. #define MCD_BLDTRUNC(s,n) s.resize(n)
  165. #else // not STL, i.e. MFC
  166. // afx.h provides CString, to avoid "WINVER not defined" #include stdafh.x in Markup.cpp
  167. #include <afx.h>
  168. #define MCD_STR CString
  169. #define MCD_2PCSZ(s) ((MCD_PCSZ)s)
  170. #define MCD_STRLENGTH(s) s.GetLength()
  171. #define MCD_STRCLEAR(s) s.Empty()
  172. #define MCD_STRCLEARSIZE(s) s=MCD_STR()
  173. #define MCD_STRISEMPTY(s) s.IsEmpty()
  174. #define MCD_STRMID(s,n,l) s.Mid(n,l)
  175. #define MCD_STRASSIGN(s,p,n) memcpy(s.GetBuffer(n),p,(n)*sizeof(MCD_CHAR));s.ReleaseBuffer(n);
  176. #define MCD_STRCAPACITY(s) (((CStringData*)((MCD_PCSZ)s)-1)->nAllocLength)
  177. #define MCD_GETBUFFER(s,n) s.GetBuffer(n)
  178. #define MCD_RELEASEBUFFER(s,p,n) s.ReleaseBuffer(n)
  179. #define MCD_BLDRESERVE(s,n) MCD_CHAR*pD=s.GetBuffer(n); int nL=0
  180. #define MCD_BLDCHECK(s,n,d) if(nL+(int)(d)>n){s.ReleaseBuffer(nL);n<<=2;pD=s.GetBuffer(n);}
  181. #define MCD_BLDRELEASE(s) s.ReleaseBuffer(nL)
  182. #define MCD_BLDAPPENDN(s,p,n) MCD_PSZNCPY(&pD[nL],p,n);nL+=n
  183. #define MCD_BLDAPPEND(s,p) MCD_PSZCPY(&pD[nL],p);nL+=MCD_PSZLEN(p)
  184. #define MCD_BLDAPPEND1(s,c) pD[nL++]=(MCD_CHAR)(c)
  185. #define MCD_BLDLEN(s) nL
  186. #define MCD_BLDTRUNC(s,n) nL=n
  187. #endif // not STL
  188. #define MCD_STRTOINT(s) MCD_PSZTOL(MCD_2PCSZ(s),NULL,10)
  189. // Allow function args to accept string objects as constant string pointers
  190. struct MCD_CSTR
  191. {
  192. MCD_CSTR() { pcsz=NULL; };
  193. MCD_CSTR( MCD_PCSZ p ) { pcsz=p; };
  194. MCD_CSTR( const MCD_STR& s ) { pcsz = MCD_2PCSZ(s); };
  195. operator MCD_PCSZ() const { return pcsz; };
  196. MCD_PCSZ pcsz;
  197. };
  198. // On Linux and OS X, filenames are not specified in wchar_t
  199. #if defined(MARKUP_WCHAR) && defined(__GNUC__)
  200. #undef MCD_FOPEN
  201. #define MCD_FOPEN(f,n,m) f=fopen(n,m)
  202. #define MCD_T_FILENAME(s) s
  203. #define MCD_PCSZ_FILENAME const char*
  204. struct MCD_CSTR_FILENAME
  205. {
  206. MCD_CSTR_FILENAME() { pcsz=NULL; };
  207. MCD_CSTR_FILENAME( MCD_PCSZ_FILENAME p ) { pcsz=p; };
  208. MCD_CSTR_FILENAME( const std::string& s ) { pcsz = s.c_str(); };
  209. operator MCD_PCSZ_FILENAME() const { return pcsz; };
  210. MCD_PCSZ_FILENAME pcsz;
  211. };
  212. #else // not WCHAR GNUC
  213. #define MCD_CSTR_FILENAME MCD_CSTR
  214. #define MCD_T_FILENAME MCD_T
  215. #define MCD_PCSZ_FILENAME MCD_PCSZ
  216. #endif // not WCHAR GNUC
  217. // File fseek, ftell and offset type
  218. #if defined(__GNUC__) && ! defined(MARKUP_WINDOWS) // non-Windows GNUC
  219. #define MCD_FSEEK fseeko
  220. #define MCD_FTELL ftello
  221. #define MCD_INTFILEOFFSET off_t
  222. #elif _MSC_VER >= 1000 && defined(MARKUP_HUGEFILE) // VC++ HUGEFILE
  223. #if _MSC_VER < 1400 // before VC++ 2005
  224. extern "C" int __cdecl _fseeki64(FILE *, __int64, int);
  225. extern "C" __int64 __cdecl _ftelli64(FILE *);
  226. #endif // before VC++ 2005
  227. #define MCD_FSEEK _fseeki64
  228. #define MCD_FTELL _ftelli64
  229. #define MCD_INTFILEOFFSET __int64
  230. #else // not non-Windows GNUC or VC++ HUGEFILE
  231. #define MCD_FSEEK fseek
  232. #define MCD_FTELL ftell
  233. #define MCD_INTFILEOFFSET long
  234. #endif // not non-Windows GNUC or VC++ HUGEFILE
  235. // End of line choices: none, return, newline, or CRLF
  236. #if defined(MARKUP_EOL_NONE)
  237. #define MCD_EOL MCD_T("")
  238. #elif defined(MARKUP_EOL_RETURN) // rare; only used on some old operating systems
  239. #define MCD_EOL MCD_T("\r")
  240. #elif defined(MARKUP_EOL_NEWLINE) // Unix standard
  241. #define MCD_EOL MCD_T("\n")
  242. #elif defined(MARKUP_EOL_CRLF) || defined(MARKUP_WINDOWS) // Windows standard
  243. #define MCD_EOL MCD_T("\r\n")
  244. #else // not Windows and not otherwise specified
  245. #define MCD_EOL MCD_T("\n")
  246. #endif // not Windows and not otherwise specified
  247. #define MCD_EOLLEN (sizeof(MCD_EOL)/sizeof(MCD_CHAR)-1) // string length of MCD_EOL
  248. struct FilePos;
  249. struct TokenPos;
  250. struct NodePos;
  251. struct PathPos;
  252. struct SavedPosMapArray;
  253. struct ElemPosTree;
  254. class CMarkup
  255. {
  256. public:
  257. CMarkup() { x_InitMarkup(); SetDoc( NULL ); };
  258. CMarkup( MCD_CSTR szDoc ) { x_InitMarkup(); SetDoc( szDoc ); };
  259. CMarkup( int nFlags ) { x_InitMarkup(); SetDoc( NULL ); m_nDocFlags = nFlags; };
  260. CMarkup( const CMarkup& markup ) { x_InitMarkup(); *this = markup; };
  261. void operator=( const CMarkup& markup );
  262. ~CMarkup();
  263. // Navigate
  264. bool Load( MCD_CSTR_FILENAME szFileName );
  265. bool SetDoc( MCD_PCSZ pDoc );
  266. bool SetDoc( const MCD_STR& strDoc );
  267. bool IsWellFormed();
  268. bool FindElem( MCD_CSTR szName=NULL );
  269. // bool FindPrevElem( MCD_CSTR szName=NULL ); // undo;
  270. bool FindChildElem( MCD_CSTR szName=NULL );
  271. // bool FindPrevChildElem( MCD_CSTR szName=NULL ); // undo;
  272. bool IntoElem();
  273. bool OutOfElem();
  274. void ResetChildPos() { x_SetPos(m_iPosParent,m_iPos,0); };
  275. void ResetMainPos() { x_SetPos(m_iPosParent,0,0); };
  276. void ResetPos() { x_SetPos(0,0,0); };
  277. MCD_STR GetTagName() const;
  278. MCD_STR GetChildTagName() const { return x_GetTagName(m_iPosChild); };
  279. MCD_STR GetData() { return x_GetData(m_iPos); };
  280. MCD_STR GetChildData() { return x_GetData(m_iPosChild); };
  281. MCD_STR GetElemContent() const { return x_GetElemContent(m_iPos); };
  282. MCD_STR GetAttrib( MCD_CSTR szAttrib ) const { return x_GetAttrib(m_iPos,szAttrib); };
  283. MCD_STR GetChildAttrib( MCD_CSTR szAttrib ) const { return x_GetAttrib(m_iPosChild,szAttrib); };
  284. bool GetNthAttrib( int n, MCD_STR& strAttrib, MCD_STR& strValue ) const;
  285. MCD_STR GetAttribName( int n ) const;
  286. int FindNode( int nType=0 );
  287. int GetNodeType() { return m_nNodeType; };
  288. bool SavePos( MCD_CSTR szPosName=MCD_T(""), int nMap = 0 );
  289. bool RestorePos( MCD_CSTR szPosName=MCD_T(""), int nMap = 0 );
  290. bool SetMapSize( int nSize, int nMap = 0 );
  291. MCD_STR GetError() const;
  292. const MCD_STR& GetResult() const { return m_strResult; };
  293. int GetDocFlags() const { return m_nDocFlags; };
  294. void SetDocFlags( int nFlags ) { m_nDocFlags = (nFlags & ~(MDF_READFILE|MDF_WRITEFILE|MDF_APPENDFILE)); };
  295. enum MarkupDocFlags
  296. {
  297. MDF_UTF16LEFILE = 1,
  298. MDF_UTF8PREAMBLE = 4,
  299. MDF_IGNORECASE = 8,
  300. MDF_READFILE = 16,
  301. MDF_WRITEFILE = 32,
  302. MDF_APPENDFILE = 64,
  303. MDF_UTF16BEFILE = 128,
  304. MDF_TRIMWHITESPACE = 256,
  305. MDF_COLLAPSEWHITESPACE = 512
  306. };
  307. enum MarkupNodeFlags
  308. {
  309. MNF_WITHCDATA = 0x01,
  310. MNF_WITHNOLINES = 0x02,
  311. MNF_WITHXHTMLSPACE = 0x04,
  312. MNF_WITHREFS = 0x08,
  313. MNF_WITHNOEND = 0x10,
  314. MNF_ESCAPEQUOTES = 0x100,
  315. MNF_NONENDED = 0x100000,
  316. MNF_ILLDATA = 0x200000
  317. };
  318. enum MarkupNodeType
  319. {
  320. MNT_ELEMENT = 1, // 0x0001
  321. MNT_TEXT = 2, // 0x0002
  322. MNT_WHITESPACE = 4, // 0x0004
  323. MNT_TEXT_AND_WHITESPACE = 6, // 0x0006
  324. MNT_CDATA_SECTION = 8, // 0x0008
  325. MNT_PROCESSING_INSTRUCTION = 16, // 0x0010
  326. MNT_COMMENT = 32, // 0x0020
  327. MNT_DOCUMENT_TYPE = 64, // 0x0040
  328. MNT_EXCLUDE_WHITESPACE = 123, // 0x007b
  329. MNT_LONE_END_TAG = 128, // 0x0080
  330. MNT_NODE_ERROR = 32768 // 0x8000
  331. };
  332. // Create
  333. bool Save( MCD_CSTR_FILENAME szFileName );
  334. const MCD_STR& GetDoc() const { return m_strDoc; };
  335. bool AddElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags); };
  336. bool InsertElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT); };
  337. bool AddChildElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_CHILD); };
  338. bool InsertChildElem( MCD_CSTR szName, MCD_CSTR szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT|MNF_CHILD); };
  339. bool AddElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags); };
  340. bool InsertElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT); };
  341. bool AddChildElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_CHILD); };
  342. bool InsertChildElem( MCD_CSTR szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT|MNF_CHILD); };
  343. bool AddAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); };
  344. bool AddChildAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); };
  345. bool AddAttrib( MCD_CSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); };
  346. bool AddChildAttrib( MCD_CSTR szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); };
  347. bool AddSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,0); };
  348. bool InsertSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_INSERT); };
  349. MCD_STR GetSubDoc() { return x_GetSubDoc(m_iPos); };
  350. bool AddChildSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD); };
  351. bool InsertChildSubDoc( MCD_CSTR szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD|MNF_INSERT); };
  352. MCD_STR GetChildSubDoc() { return x_GetSubDoc(m_iPosChild); };
  353. bool AddNode( int nType, MCD_CSTR szText ) { return x_AddNode(nType,szText,0); };
  354. bool InsertNode( int nType, MCD_CSTR szText ) { return x_AddNode(nType,szText,MNF_INSERT); };
  355. // Modify
  356. bool RemoveElem();
  357. bool RemoveChildElem();
  358. bool RemoveNode();
  359. bool SetAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags=0 ) { return x_SetAttrib(m_iPos,szAttrib,szValue,nFlags); };
  360. bool SetChildAttrib( MCD_CSTR szAttrib, MCD_CSTR szValue, int nFlags=0 ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue,nFlags); };
  361. bool SetAttrib( MCD_CSTR szAttrib, int nValue, int nFlags=0 ) { return x_SetAttrib(m_iPos,szAttrib,nValue,nFlags); };
  362. bool SetChildAttrib( MCD_CSTR szAttrib, int nValue, int nFlags=0 ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue,nFlags); };
  363. bool SetData( MCD_CSTR szData, int nFlags=0 ) { return x_SetData(m_iPos,szData,nFlags); };
  364. bool SetChildData( MCD_CSTR szData, int nFlags=0 ) { return x_SetData(m_iPosChild,szData,nFlags); };
  365. bool SetData( int nValue ) { return x_SetData(m_iPos,nValue); };
  366. bool SetChildData( int nValue ) { return x_SetData(m_iPosChild,nValue); };
  367. bool SetElemContent( MCD_CSTR szContent ) { return x_SetElemContent(szContent); };
  368. // Utility
  369. static bool ReadTextFile( MCD_CSTR_FILENAME szFileName, MCD_STR& strDoc, MCD_STR* pstrResult=NULL, int* pnDocFlags=NULL, MCD_STR* pstrEncoding=NULL );
  370. static bool WriteTextFile( MCD_CSTR_FILENAME szFileName, const MCD_STR& strDoc, MCD_STR* pstrResult=NULL, int* pnDocFlags=NULL, MCD_STR* pstrEncoding=NULL );
  371. static MCD_STR EscapeText( MCD_CSTR szText, int nFlags = 0 );
  372. static MCD_STR UnescapeText( MCD_CSTR szText, int nTextLength = -1, int nFlags = 0 );
  373. static int UTF16To8( char *pszUTF8, const unsigned short* pwszUTF16, int nUTF8Count );
  374. static int UTF8To16( unsigned short* pwszUTF16, const char* pszUTF8, int nUTF8Count );
  375. static MCD_STR UTF8ToA( MCD_CSTR pszUTF8, int* pnFailed = NULL );
  376. static MCD_STR AToUTF8( MCD_CSTR pszANSI );
  377. static void EncodeCharUTF8( int nUChar, char* pszUTF8, int& nUTF8Len );
  378. static int DecodeCharUTF8( const char*& pszUTF8, const char* pszUTF8End = NULL );
  379. static void EncodeCharUTF16( int nUChar, unsigned short* pwszUTF16, int& nUTF16Len );
  380. static int DecodeCharUTF16( const unsigned short*& pwszUTF16, const unsigned short* pszUTF16End = NULL );
  381. static bool DetectUTF8( const char* pText, int nTextLen, int* pnNonASCII = NULL, bool* bErrorAtEnd = NULL );
  382. static MCD_STR GetDeclaredEncoding( MCD_CSTR szDoc );
  383. static int GetEncodingCodePage( MCD_CSTR pszEncoding );
  384. protected:
  385. #if defined(_DEBUG)
  386. MCD_PCSZ m_pDebugCur;
  387. MCD_PCSZ m_pDebugPos;
  388. #endif // DEBUG
  389. MCD_STR m_strDoc;
  390. MCD_STR m_strResult;
  391. int m_iPosParent;
  392. int m_iPos;
  393. int m_iPosChild;
  394. int m_iPosFree;
  395. int m_iPosDeleted;
  396. int m_nNodeType;
  397. int m_nNodeOffset;
  398. int m_nNodeLength;
  399. int m_nDocFlags;
  400. FilePos* m_pFilePos;
  401. SavedPosMapArray* m_pSavedPosMaps;
  402. ElemPosTree* m_pElemPosTree;
  403. enum MarkupNodeFlagsInternal
  404. {
  405. MNF_INSERT = 0x002000,
  406. MNF_CHILD = 0x004000
  407. };
  408. #if defined(_DEBUG) // DEBUG
  409. void x_SetDebugState();
  410. #define MARKUP_SETDEBUGSTATE x_SetDebugState()
  411. #else // not DEBUG
  412. #define MARKUP_SETDEBUGSTATE
  413. #endif // not DEBUG
  414. void x_InitMarkup();
  415. void x_SetPos( int iPosParent, int iPos, int iPosChild );
  416. int x_GetFreePos();
  417. bool x_AllocElemPos( int nNewSize = 0 );
  418. int x_GetParent( int i );
  419. bool x_ParseDoc();
  420. int x_ParseElem( int iPos, TokenPos& token );
  421. int x_FindElem( int iPosParent, int iPos, PathPos& path ) const;
  422. MCD_STR x_GetPath( int iPos ) const;
  423. MCD_STR x_GetTagName( int iPos ) const;
  424. MCD_STR x_GetData( int iPos );
  425. MCD_STR x_GetAttrib( int iPos, MCD_PCSZ pAttrib ) const;
  426. static MCD_STR x_EncodeCDATASection( MCD_PCSZ szData );
  427. bool x_AddElem( MCD_PCSZ pName, MCD_PCSZ pValue, int nFlags );
  428. bool x_AddElem( MCD_PCSZ pName, int nValue, int nFlags );
  429. MCD_STR x_GetSubDoc( int iPos );
  430. bool x_AddSubDoc( MCD_PCSZ pSubDoc, int nFlags );
  431. bool x_SetAttrib( int iPos, MCD_PCSZ pAttrib, MCD_PCSZ pValue, int nFlags=0 );
  432. bool x_SetAttrib( int iPos, MCD_PCSZ pAttrib, int nValue, int nFlags=0 );
  433. bool x_AddNode( int nNodeType, MCD_PCSZ pText, int nNodeFlags );
  434. void x_RemoveNode( int iPosParent, int& iPos, int& nNodeType, int& nNodeOffset, int& nNodeLength );
  435. static bool x_CreateNode( MCD_STR& strNode, int nNodeType, MCD_PCSZ pText );
  436. int x_InsertNew( int iPosParent, int& iPosRel, NodePos& node );
  437. void x_AdjustForNode( int iPosParent, int iPos, int nShift );
  438. void x_Adjust( int iPos, int nShift, bool bAfterPos = false );
  439. void x_LinkElem( int iPosParent, int iPosBefore, int iPos );
  440. int x_UnlinkElem( int iPos );
  441. int x_UnlinkPrevElem( int iPosParent, int iPosBefore, int iPos );
  442. int x_ReleaseSubDoc( int iPos );
  443. int x_ReleasePos( int iPos );
  444. void x_CheckSavedPos();
  445. bool x_SetData( int iPos, MCD_PCSZ szData, int nFlags );
  446. bool x_SetData( int iPos, int nValue );
  447. int x_RemoveElem( int iPos );
  448. MCD_STR x_GetElemContent( int iPos ) const;
  449. bool x_SetElemContent( MCD_PCSZ szContent );
  450. void x_DocChange( int nLeft, int nReplace, const MCD_STR& strInsert );
  451. };
  452. #endif // !defined(_MARKUP_H_INCLUDED_)