// Markup.cpp: implementation of the CMarkup class. // // Markup Release 6.3 // Copyright (C) 1999-2002 First Objective Software, Inc. All rights reserved // Go to www.firstobject.com for the latest CMarkup and EDOM documentation // Use in commercial applications requires written permission // This software is provided "as is", with no warranty. #include "stdafx.h" #include "afxconv.h" #include "Markup.h" #ifdef _DEBUG #undef THIS_FILE static char THIS_FILE[]=__FILE__; #define new DEBUG_NEW #endif #ifdef _MBCS #pragma message( "Note: MBCS build (not UTF-8)" ) // For UTF-8, remove _MBCS from project settings C/C++ preprocessor definitions #endif // Defines for Windows CE #ifndef _tclen #define _tclen(p) 1 #define _tccpy(p1,p2) *(p1)=*(p2) #endif void CMarkup::operator=( const CMarkup& markup ) { m_iPosParent = markup.m_iPosParent; m_iPos = markup.m_iPos; m_iPosChild = markup.m_iPosChild; m_iPosFree = markup.m_iPosFree; m_nNodeType = markup.m_nNodeType; m_aPos.RemoveAll(); m_aPos.Append( markup.m_aPos ); m_csDoc = markup.m_csDoc; MARKUP_SETDEBUGSTATE; } bool CMarkup::SetDoc( LPCTSTR szDoc ) { // Reset indexes m_iPosFree = 1; ResetPos(); m_mapSavedPos.RemoveAll(); // Set document text if ( szDoc ) m_csDoc = szDoc; else m_csDoc.Empty(); // Starting size of position array: 1 element per 64 bytes of document // Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc // Start at 8 when creating new document int nStartSize = m_csDoc.GetLength() / 64 + 8; if ( m_aPos.GetSize() < nStartSize ) m_aPos.SetSize( nStartSize ); // Parse document bool bWellFormed = false; if ( m_csDoc.GetLength() ) { m_aPos[0].Clear(); int iPos = x_ParseElem( 0 ); if ( iPos > 0 ) { m_aPos[0].iElemChild = iPos; bWellFormed = true; } } // Clear indexes if parse failed or empty document if ( ! bWellFormed ) { m_aPos[0].Clear(); m_iPosFree = 1; } ResetPos(); return bWellFormed; }; bool CMarkup::IsWellFormed() { if ( m_aPos.GetSize() && m_aPos[0].iElemChild ) return true; return false; } bool CMarkup::Load( LPCTSTR szFileName ) { CString csDoc; CFile file; if ( ! file.Open(szFileName,CFile::modeRead) ) return false; int nLength = file.GetLength(); #if defined(_UNICODE) // Allocate Buffer for UTF-8 file data unsigned char* pBuffer = new unsigned char[nLength + 1]; nLength = file.Read( pBuffer, nLength ); pBuffer[nLength] = '\0'; // Convert file from UTF-8 to Windows UNICODE (AKA UCS-2) int nWideLength = MultiByteToWideChar(CP_UTF8,0,(const char*)pBuffer,nLength,NULL,0); nLength = MultiByteToWideChar(CP_UTF8,0,(const char*)pBuffer,nLength, csDoc.GetBuffer(nWideLength),nWideLength); ASSERT( nLength == nWideLength ); delete [] pBuffer; #else nLength = file.Read( csDoc.GetBuffer(nLength), nLength ); #endif csDoc.ReleaseBuffer(nLength); file.Close(); return SetDoc( csDoc ); } bool CMarkup::Save( LPCTSTR szFileName ) { int nLength = m_csDoc.GetLength(); CFile file; if ( ! file.Open(szFileName,CFile::modeWrite|CFile::modeCreate) ) return false; #if defined( _UNICODE ) int nUTF8Len = WideCharToMultiByte(CP_UTF8,0,m_csDoc,nLength,NULL,0,NULL,NULL); char* pBuffer = new char[nUTF8Len+1]; nLength = WideCharToMultiByte(CP_UTF8,0,m_csDoc,nLength,pBuffer,nUTF8Len+1,NULL,NULL); file.Write( pBuffer, nLength ); delete pBuffer; #else file.Write( (LPCTSTR)m_csDoc, nLength ); #endif file.Close(); return true; } bool CMarkup::FindElem( LPCTSTR szName ) { // Change current position only if found // if ( m_aPos.GetSize() ) { int iPos = x_FindElem( m_iPosParent, m_iPos, szName ); if ( iPos ) { // Assign new position x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 ); return true; } } return false; } bool CMarkup::FindChildElem( LPCTSTR szName ) { // Change current child position only if found // // Shorthand: call this with no current main position // means find child under root element if ( ! m_iPos ) FindElem(); int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName ); if ( iPosChild ) { // Assign new position int iPos = m_aPos[iPosChild].iElemParent; x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild ); return true; } return false; } CString CMarkup::GetTagName() const { // Return the tag name at the current main position CString csTagName; if ( m_iPos ) csTagName = x_GetTagName( m_iPos ); return csTagName; } bool CMarkup::IntoElem() { // If there is no child position and IntoElem is called it will succeed in release 6.3 // (A subsequent call to FindElem will find the first element) // The following short-hand behavior was never part of EDOM and was misleading // It would find a child element if there was no current child element position and go into it // It is removed in release 6.3, this change is NOT backwards compatible! // if ( ! m_iPosChild ) // FindChildElem(); if ( m_iPos && m_nNodeType == MNT_ELEMENT ) { x_SetPos( m_iPos, m_iPosChild, 0 ); return true; } return false; } bool CMarkup::OutOfElem() { // Go to parent element if ( m_iPosParent ) { x_SetPos( m_aPos[m_iPosParent].iElemParent, m_iPosParent, m_iPos ); return true; } return false; } CString CMarkup::GetAttrName( int n ) const { // Return nth Attrute name of main position if ( ! m_iPos || m_nNodeType != MNT_ELEMENT ) return _T(""); TokenPos token( m_csDoc ); token.nNext = m_aPos[m_iPos].nStartL + 1; for ( int nAttr=0; nAttr<=n; ++nAttr ) if ( ! x_FindAttr(token) ) return _T(""); // Return substring of document return x_GetToken( token ); } bool CMarkup::SavePos( LPCTSTR szPosName ) { // Save current element position in saved position map if ( szPosName ) { SavedPos savedpos; savedpos.iPosParent = m_iPosParent; savedpos.iPos = m_iPos; savedpos.iPosChild = m_iPosChild; m_mapSavedPos.SetAt( szPosName, savedpos ); return true; } return false; } bool CMarkup::RestorePos( LPCTSTR szPosName ) { // Restore element position if found in saved position map SavedPos savedpos; if ( szPosName && m_mapSavedPos.Lookup( szPosName, savedpos ) ) { x_SetPos( savedpos.iPosParent, savedpos.iPos, savedpos.iPosChild ); return true; } return false; } bool CMarkup::GetOffsets( int& nStart, int& nEnd ) const { // Return document offsets of current main position element // This is not part of EDOM but is used by the Markup project if ( m_iPos ) { nStart = m_aPos[m_iPos].nStartL; nEnd = m_aPos[m_iPos].nEndR; return true; } return false; } CString CMarkup::GetChildSubDoc() const { if ( m_iPosChild ) { int nL = m_aPos[m_iPosChild].nStartL; int nR = m_aPos[m_iPosChild].nEndR + 1; TokenPos token( m_csDoc ); token.nNext = nR; if ( ! x_FindToken(token) || m_csDoc[token.nL] == _T('<') ) nR = token.nL; return m_csDoc.Mid( nL, nR - nL ); } return _T(""); } bool CMarkup::RemoveElem() { // Remove current main position element if ( m_iPos && m_nNodeType == MNT_ELEMENT ) { int iPos = x_RemoveElem( m_iPos ); x_SetPos( m_iPosParent, iPos, 0 ); return true; } return false; } bool CMarkup::RemoveChildElem() { // Remove current child position element if ( m_iPosChild ) { int iPosChild = x_RemoveElem( m_iPosChild ); x_SetPos( m_iPosParent, m_iPos, iPosChild ); return true; } return false; } ////////////////////////////////////////////////////////////////////// // Private Methods ////////////////////////////////////////////////////////////////////// int CMarkup::x_GetFreePos() { // // This returns the index of the next unused ElemPos in the array // if ( m_iPosFree == m_aPos.GetSize() ) m_aPos.SetSize( m_iPosFree + m_iPosFree / 2 ); ++m_iPosFree; return m_iPosFree - 1; } int CMarkup::x_ReleasePos() { // // This decrements the index of the next unused ElemPos in the array // allowing the element index returned by GetFreePos() to be reused // --m_iPosFree; return 0; } int CMarkup::x_ParseError( LPCTSTR szError, LPCTSTR szName ) { if ( szName ) m_csError.Format( szError, szName ); else m_csError = szError; x_ReleasePos(); return -1; } int CMarkup::x_ParseElem( int iPosParent ) { // This is either called by SetDoc, x_AddSubDoc, or itself recursively // m_aPos[iPosParent].nEndL is where to start parsing for the child element // This returns the new position if a tag is found, otherwise zero // In all cases we need to get a new ElemPos, but release it if unused // int iPos = x_GetFreePos(); m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL; m_aPos[iPos].iElemParent = iPosParent; m_aPos[iPos].iElemChild = 0; m_aPos[iPos].iElemNext = 0; // Start Tag // A loop is used to ignore all remarks tags and special tags // i.e. , and // So any tag beginning with ? or ! is ignored // Loop past ignored tags TokenPos token( m_csDoc ); token.nNext = m_aPos[iPosParent].nEndL; CString csName; while ( csName.IsEmpty() ) { // Look for left angle bracket of start tag m_aPos[iPos].nStartL = token.nNext; if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nStartL, _T('<') ) ) return x_ParseError( _T("Element tag not found") ); // Set parent's End tag to start looking from here (or later) m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL; // Determine whether this is an element, or bypass other type of node token.nNext = m_aPos[iPos].nStartL + 1; if ( x_FindToken( token ) ) { if ( token.bIsString ) return x_ParseError( _T("Tag starts with quote") ); _TCHAR cFirstChar = m_csDoc[token.nL]; if ( cFirstChar == _T('?') || cFirstChar == _T('!') ) { token.nNext = m_aPos[iPos].nStartL; if ( ! x_ParseNode(token) ) return x_ParseError( _T("Invalid node") ); } else if ( cFirstChar != _T('/') ) { csName = x_GetToken( token ); // Look for end of tag if ( ! x_FindChar(token.szDoc, token.nNext, _T('>')) ) return x_ParseError( _T("End of tag not found") ); } else return x_ReleasePos(); // probably end tag of parent } else return x_ParseError( _T("Abrupt end within tag") ); } m_aPos[iPos].nStartR = token.nNext; // Is ending mark within start tag, i.e. empty element? if ( m_csDoc[m_aPos[iPos].nStartR-1] == _T('/') ) { // Empty element // Close tag left is set to ending mark, and right to open tag right m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1; m_aPos[iPos].nEndR = m_aPos[iPos].nStartR; } else // look for end tag { // Element probably has contents // Determine where to start looking for left angle bracket of end tag // This is done by recursively parsing the contents of this element int iInner, iInnerPrev = 0; m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1; while ( (iInner = x_ParseElem( iPos )) > 0 ) { // Set links to iInner if ( iInnerPrev ) m_aPos[iInnerPrev].iElemNext = iInner; else m_aPos[iPos].iElemChild = iInner; iInnerPrev = iInner; // Set offset to reflect child m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1; } if ( iInner == -1 ) return -1; // Look for left angle bracket of end tag if ( ! x_FindChar( token.szDoc, m_aPos[iPos].nEndL, _T('<') ) ) return x_ParseError( _T("End tag of %s element not found"), csName ); // Look through tokens of end tag token.nNext = m_aPos[iPos].nEndL + 1; int nTokenCount = 0; while ( x_FindToken( token ) ) { ++nTokenCount; if ( ! token.bIsString ) { // Is first token not an end slash mark? if ( nTokenCount == 1 && m_csDoc[token.nL] != _T('/') ) return x_ParseError( _T("Expecting end tag of element %s"), csName ); else if ( nTokenCount == 2 && ! token.Match(csName) ) return x_ParseError( _T("End tag does not correspond to %s"), csName ); // Else is it a right angle bracket? else if ( m_csDoc[token.nL] == _T('>') ) break; } } // Was a right angle bracket not found? if ( ! token.szDoc[token.nL] || nTokenCount < 2 ) return x_ParseError( _T("End tag not completed for element %s"), csName ); m_aPos[iPos].nEndR = token.nL; } // Successfully parsed element (and contained elements) return iPos; } bool CMarkup::x_FindChar( LPCTSTR szDoc, int& nChar, _TCHAR c ) { // static function LPCTSTR pChar = &szDoc[nChar]; while ( *pChar && *pChar != c ) pChar += _tclen( pChar ); nChar = pChar - szDoc; if ( ! *pChar ) return false; /* while ( szDoc[nChar] && szDoc[nChar] != c ) nChar += _tclen( &szDoc[nChar] ); if ( ! szDoc[nChar] ) return false; */ return true; } bool CMarkup::x_FindToken( CMarkup::TokenPos& token ) { // Starting at token.nNext, bypass whitespace and find the next token // returns true on success, members of token point to token // returns false on end of document, members point to end of document LPCTSTR szDoc = token.szDoc; int nChar = token.nNext; token.bIsString = false; // By-pass leading whitespace while ( szDoc[nChar] && _tcschr(_T(" \t\n\r"),szDoc[nChar]) ) ++nChar; if ( ! szDoc[nChar] ) { // No token was found before end of document token.nL = nChar; token.nR = nChar; token.nNext = nChar; return false; } // Is it an opening quote? _TCHAR cFirstChar = szDoc[nChar]; if ( cFirstChar == _T('\"') || cFirstChar == _T('\'') ) { token.bIsString = true; // Move past opening quote ++nChar; token.nL = nChar; // Look for closing quote x_FindChar( token.szDoc, nChar, cFirstChar ); // Set right to before closing quote token.nR = nChar - 1; // Set nChar past closing quote unless at end of document if ( szDoc[nChar] ) ++nChar; } else { // Go until special char or whitespace token.nL = nChar; while ( szDoc[nChar] && ! _tcschr(_T(" \t\n\r<>=\\/?!"),szDoc[nChar]) ) nChar += _tclen(&szDoc[nChar]); // Adjust end position if it is one special char if ( nChar == token.nL ) ++nChar; // it is a special char token.nR = nChar - 1; } // nNext points to one past last char of token token.nNext = nChar; return true; } CString CMarkup::x_GetToken( const CMarkup::TokenPos& token ) const { // The token contains indexes into the document identifying a small substring // Build the substring from those indexes and return it if ( token.nL > token.nR ) return _T(""); return m_csDoc.Mid( token.nL, token.nR - token.nL + ((token.nR comment // dtd // processing instruction // cdata section // element // if ( ! szDoc[token.nL+1] || ! szDoc[token.nL+2] ) return 0; _TCHAR cFirstChar = szDoc[token.nL+1]; LPCTSTR szEndOfNode = NULL; if ( cFirstChar == _T('?') ) { nTypeFound = MNT_PROCESSING_INSTRUCTION; // processing instruction szEndOfNode = _T("?>"); } else if ( cFirstChar == _T('!') ) { _TCHAR cSecondChar = szDoc[token.nL+2]; if ( cSecondChar == _T('[') ) { nTypeFound = MNT_CDATA_SECTION; szEndOfNode = _T("]]>"); } else if ( cSecondChar == _T('-') ) { nTypeFound = MNT_COMMENT; szEndOfNode = _T("-->"); } else { // Document type requires tokenizing because of strings and brackets nTypeFound = 0; int nBrackets = 0; while ( x_FindToken(token) ) { if ( ! token.bIsString ) { _TCHAR cChar = szDoc[token.nL]; if ( cChar == _T('[') ) ++nBrackets; else if ( cChar == _T(']') ) --nBrackets; else if ( nBrackets == 0 && cChar == _T('>') ) { nTypeFound = MNT_DOCUMENT_TYPE; break; } } } if ( ! nTypeFound ) return 0; } } else if ( cFirstChar == _T('/') ) { // End tag means no node found within parent element return 0; } else { nTypeFound = MNT_ELEMENT; } // Search for end of node if not found yet if ( szEndOfNode ) { LPCTSTR pEnd = _tcsstr( &szDoc[token.nNext], szEndOfNode ); if ( ! pEnd ) return 0; // not well-formed token.nNext = (pEnd - szDoc) + _tcslen(szEndOfNode); } } else if ( szDoc[token.nL] ) { // It is text or whitespace because it did not start with < nTypeFound = MNT_WHITESPACE; if ( x_FindToken(token) ) { if ( szDoc[token.nL] == _T('<') ) token.nNext = token.nL; else { nTypeFound = MNT_TEXT; x_FindChar( token.szDoc, token.nNext, _T('<') ); } } } return nTypeFound; } CString CMarkup::x_GetTagName( int iPos ) const { // Return the tag name at specified element TokenPos token( m_csDoc ); token.nNext = m_aPos[iPos].nStartL + 1; if ( ! iPos || ! x_FindToken( token ) ) return _T(""); // Return substring of document return x_GetToken( token ); } bool CMarkup::x_FindAttr( CMarkup::TokenPos& token, LPCTSTR szAttr ) const { // If szAttr is NULL find next Attr, otherwise find named Attr // Return true if found int nAttr = 0; for ( int nCount = 0; x_FindToken(token); ++nCount ) { if ( ! token.bIsString ) { // Is it the right angle bracket? if ( m_csDoc[token.nL] == _T('>') || m_csDoc[token.nL] == _T('/') ) break; // Attr not found // Equal sign if ( m_csDoc[token.nL] == _T('=') ) continue; // Potential Attrute if ( ! nAttr && nCount ) { // Attrute name search? if ( ! szAttr || ! szAttr[0] ) return true; // return with token at Attr name // Compare szAttr if ( token.Match(szAttr) ) nAttr = nCount; } } else if ( nAttr && nCount == nAttr + 2 ) { return true; } } // Not found return false; } CString CMarkup::x_GetAttr( int iPos, LPCTSTR szAttr ) const { // Return the value of the Attr at specified element if ( ! iPos || m_nNodeType != MNT_ELEMENT ) return _T(""); TokenPos token( m_csDoc ); token.nNext = m_aPos[iPos].nStartL + 1; if ( szAttr && x_FindAttr( token, szAttr ) ) return x_TextFromDoc( token.nL, token.nR - ((token.nR")) != NULL ) return false; csNode = ""; break; } return true; } bool CMarkup::x_SetData( int iPos, LPCTSTR szData, int nCDATA ) { // Set data at specified position // if nCDATA==1, set content of element to a CDATA Section CString csInsert; // Set data in iPos element if ( ! iPos || m_aPos[iPos].iElemChild ) return false; // Build csInsert from szData based on nCDATA // If CDATA section not valid, use parsed text (PCDATA) instead if ( nCDATA != 0 ) if ( ! x_CreateNode(csInsert, MNT_CDATA_SECTION, szData) ) nCDATA = 0; if ( nCDATA == 0 ) csInsert = x_TextToDoc( szData ); // Decide where to insert int nInsertAt, nReplace; if ( m_aPos[iPos].IsEmptyElement() ) { nInsertAt = m_aPos[iPos].nEndL; nReplace = 1; // Pre-adjust since becomes data CString csTagName = x_GetTagName( iPos ); m_aPos[iPos].nStartR -= 1; m_aPos[iPos].nEndL -= (1 + csTagName.GetLength()); CString csFormat; csFormat = _T(">"); csFormat += csInsert; csFormat += _T(""), token.nNext ); if ( nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL ) { return m_csDoc.Mid( token.nL+9, nEndCDATA-token.nL-9 ); } } return x_TextFromDoc( m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1 ); } return _T(""); } CString CMarkup::x_TextToDoc( LPCTSTR szText, bool bAttr ) const { // Convert text as seen outside XML document to XML friendly // replacing special characters with ampersand escape codes // E.g. convert "6>7" to "6>7" // // < less than // & ampersand // > greater than // // and for Attrutes: // // ' apostrophe or single quote // " double quote // static _TCHAR* szaReplace[] = { _T("<"),_T("&"),_T(">"),_T("'"),_T(""") }; const _TCHAR* pFind = bAttr?_T("<&>\'\""):_T("<&>"); CString csText; const _TCHAR* pSource = szText; int nDestSize = _tcslen(pSource); nDestSize += nDestSize / 10 + 7; _TCHAR* pDest = csText.GetBuffer(nDestSize); int nLen = 0; _TCHAR cSource = *pSource; _TCHAR* pFound; while ( cSource ) { if ( nLen > nDestSize - 6 ) { csText.ReleaseBuffer(nLen); nDestSize *= 2; pDest = csText.GetBuffer(nDestSize); } if ( (pFound=_tcschr(pFind,cSource)) != NULL ) { pFound = szaReplace[pFound-pFind]; _tcscpy(&pDest[nLen],pFound); nLen += _tcslen(pFound); } else { _tccpy( &pDest[nLen], pSource ); ++nLen; } pSource += _tclen( pSource ); cSource = *pSource; } csText.ReleaseBuffer(nLen); return csText; } CString CMarkup::x_TextFromDoc( int nLeft, int nRight ) const { // Convert XML friendly text to text as seen outside XML document // replacing ampersand escape codes with special characters // E.g. convert "6>7" to "6>7" // // Conveniently the result is always the same or shorter in length // static _TCHAR* szaCode[] = { _T("lt;"),_T("amp;"),_T("gt;"),_T("apos;"),_T("quot;") }; static int anCodeLen[] = { 3,4,3,5,5 }; static _TCHAR* szSymbol = _T("<&>\'\""); CString csText; const _TCHAR* pSource = m_csDoc; int nDestSize = nRight - nLeft + 1; _TCHAR* pDest = csText.GetBuffer(nDestSize); int nLen = 0; int nCharLen; int nChar = nLeft; while ( nChar <= nRight ) { if ( pSource[nChar] == _T('&') ) { // Look for matching &code; for ( int nMatch = 0; nMatch < 5; ++nMatch ) { if ( nChar <= nRight - anCodeLen[nMatch] && _tcsncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 ) { pDest[nLen++] = szSymbol[nMatch]; nChar += anCodeLen[nMatch] + 1; break; } } // If no match is found it means XML doc is invalid // no devastating harm done, ampersand code will just be left in result if ( nMatch == 5 ) { pDest[nLen++] = _T('&'); ++nChar; } } else { nCharLen = _tclen(&pSource[nChar]); _tccpy( &pDest[nLen], &pSource[nChar] ); nLen += nCharLen; nChar += nCharLen; } } csText.ReleaseBuffer(nLen); return csText; } void CMarkup::x_DocChange( int nLeft, int nReplace, const CString& csInsert ) { // Insert csInsert int m_csDoc at nLeft replacing nReplace chars // Do this with only one buffer reallocation if it grows // int nDocLength = m_csDoc.GetLength(); int nInsLength = csInsert.GetLength(); // Make sure nLeft and nReplace are within bounds nLeft = max( 0, min( nLeft, nDocLength ) ); nReplace = max( 0, min( nReplace, nDocLength-nLeft ) ); // Get pointer to buffer with enough room int nNewLength = nInsLength + nDocLength - nReplace; int nBufferLen = nNewLength; _TCHAR* pDoc = m_csDoc.GetBuffer( nBufferLen ); // Move part of old doc that goes after insert if ( nLeft+nReplace < nDocLength ) memmove( &pDoc[nLeft+nInsLength], &pDoc[nLeft+nReplace], (nDocLength-nLeft-nReplace)*sizeof(_TCHAR) ); // Copy insert memcpy( &pDoc[nLeft], csInsert, nInsLength*sizeof(_TCHAR) ); // Release m_csDoc.ReleaseBuffer( nNewLength ); } void CMarkup::x_Adjust( int iPos, int nShift, bool bAfterPos ) { // Loop through affected elements and adjust indexes // Algorithm: // 1. update children unless bAfterPos // (if no children or bAfterPos is true, end tag of iPos not affected) // 2. update next siblings and their children // 3. go up until there is a next sibling of a parent and update end tags // 4. step 2 int iPosTop = m_aPos[iPos].iElemParent; bool bPosFirst = bAfterPos; // mark as first to skip its children while ( iPos ) { // Were we at containing parent of affected position? bool bPosTop = false; if ( iPos == iPosTop ) { // Move iPosTop up one towards root iPosTop = m_aPos[iPos].iElemParent; bPosTop = true; } // Traverse to the next update position if ( ! bPosTop && ! bPosFirst && m_aPos[iPos].iElemChild ) { // Depth first iPos = m_aPos[iPos].iElemChild; } else if ( m_aPos[iPos].iElemNext ) { iPos = m_aPos[iPos].iElemNext; } else { // Look for next sibling of a parent of iPos // When going back up, parents have already been done except iPosTop while ( (iPos=m_aPos[iPos].iElemParent) != 0 && iPos != iPosTop ) if ( m_aPos[iPos].iElemNext ) { iPos = m_aPos[iPos].iElemNext; break; } } bPosFirst = false; // Shift indexes at iPos if ( iPos != iPosTop ) m_aPos[iPos].AdjustStart( nShift ); m_aPos[iPos].AdjustEnd( nShift ); } } void CMarkup::x_LocateNew( int iPosParent, int& iPosRel, int& nOffset, int nLength, int nFlags ) { // Determine where to insert new element or node // bool bInsert = (nFlags&1)?true:false; bool bHonorWhitespace = (nFlags&2)?true:false; int nStartL; if ( nLength ) { // Located at a non-element node if ( bInsert ) nStartL = nOffset; else nStartL = nOffset + nLength; } else if ( iPosRel ) { // Located at an element if ( bInsert ) // precede iPosRel nStartL = m_aPos[iPosRel].nStartL; else // follow iPosRel nStartL = m_aPos[iPosRel].nEndR + 1; } else if ( m_aPos[iPosParent].IsEmptyElement() ) { // Parent has no separate end tag, so split empty element nStartL = m_aPos[iPosParent].nStartR; } else { if ( bInsert ) // after start tag nStartL = m_aPos[iPosParent].nStartR + 1; else // before end tag nStartL = m_aPos[iPosParent].nEndL; } // Go up to start of next node, unless its splitting an empty element if ( ! bHonorWhitespace && ! m_aPos[iPosParent].IsEmptyElement() ) { TokenPos token( m_csDoc ); token.nNext = nStartL; if ( ! x_FindToken(token) || m_csDoc[token.nL] == _T('<') ) nStartL = token.nL; } // Determine iPosBefore int iPosBefore = 0; if ( iPosRel ) { if ( bInsert ) { // Is iPosRel past first sibling? int iPosPrev = m_aPos[iPosParent].iElemChild; if ( iPosPrev != iPosRel ) { // Find previous sibling of iPosRel while ( m_aPos[iPosPrev].iElemNext != iPosRel ) iPosPrev = m_aPos[iPosPrev].iElemNext; iPosBefore = iPosPrev; } } else { iPosBefore = iPosRel; } } else if ( m_aPos[iPosParent].iElemChild ) { if ( ! bInsert ) { // Find last element under iPosParent int iPosLast = m_aPos[iPosParent].iElemChild; int iPosNext = iPosLast; while ( iPosNext ) { iPosLast = iPosNext; iPosNext = m_aPos[iPosNext].iElemNext; } iPosBefore = iPosLast; } } nOffset = nStartL; iPosRel = iPosBefore; } bool CMarkup::x_AddElem( LPCTSTR szName, LPCTSTR szValue, bool bInsert, bool bAddChild ) { if ( bAddChild ) { // Adding a child element under main position if ( ! m_iPos ) return false; } else if ( m_iPosParent == 0 ) { // Adding root element if ( IsWellFormed() ) return false; // Locate after any version and DTD m_aPos[0].nEndL = m_csDoc.GetLength(); } // Locate where to add element relative to current node int iPosParent, iPosBefore, nOffset = 0, nLength = 0; if ( bAddChild ) { iPosParent = m_iPos; iPosBefore = m_iPosChild; } else { iPosParent = m_iPosParent; iPosBefore = m_iPos; } int nFlags = bInsert?1:0; x_LocateNew( iPosParent, iPosBefore, nOffset, nLength, nFlags ); bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement(); if ( bEmptyParent ) nOffset += 2; // include CRLF // Create element and modify positions of affected elements // If no szValue is specified, an empty element is created // i.e. either value or // int iPos = x_GetFreePos(); m_aPos[iPos].nStartL = nOffset; // Set links m_aPos[iPos].iElemParent = iPosParent; m_aPos[iPos].iElemChild = 0; m_aPos[iPos].iElemNext = 0; if ( iPosBefore ) { // Link in after iPosBefore m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext; m_aPos[iPosBefore].iElemNext = iPos; } else { // First child m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild; m_aPos[iPosParent].iElemChild = iPos; } // Create string for insert CString csInsert; int nLenName = _tcslen(szName); int nLenValue = szValue? _tcslen(szValue) : 0; if ( ! nLenValue ) { // empty element csInsert = _T("<"); csInsert += szName; csInsert += _T("/>\r\n"); m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 2; m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1; m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + 1; } else { // value CString csValue = x_TextToDoc( szValue ); nLenValue = csValue.GetLength(); csInsert = _T("<"); csInsert += szName; csInsert += _T(">"); csInsert += csValue; csInsert += _T("\r\n"); m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 1; m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + nLenValue + 1; m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + nLenName + 2; } // Insert int nReplace = 0, nLeft = m_aPos[iPos].nStartL; if ( bEmptyParent ) { CString csParentTagName = x_GetTagName(iPosParent); CString csFormat; csFormat = _T(">\r\n"); csFormat += csInsert; csFormat += _T(" (len 4) becomes (len 11) // In x_Adjust everything will be adjusted 11 - 4 = 7 // But the nEndL of element A should only be adjusted 5 m_aPos[iPosParent].nEndL -= (csParentTagName.GetLength() + 1); } x_DocChange( nLeft, nReplace, csInsert ); x_Adjust( iPos, csInsert.GetLength() - nReplace ); if ( bAddChild ) x_SetPos( m_iPosParent, iPosParent, iPos ); else x_SetPos( iPosParent, iPos, 0 ); return true; } bool CMarkup::x_AddSubDoc( LPCTSTR szSubDoc, bool bInsert, bool bAddChild ) { // Add subdocument, parse, and modify positions of affected elements // int nOffset = 0, iPosParent, iPosBefore; if ( bAddChild ) { // Add a subdocument under main position, after current child position if ( ! m_iPos ) return false; iPosParent = m_iPos; iPosBefore = m_iPosChild; } else { iPosParent = m_iPosParent; iPosBefore = m_iPos; } int nFlags = bInsert?1:0; x_LocateNew( iPosParent, iPosBefore, nOffset, 0, nFlags ); bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement(); if ( bEmptyParent ) nOffset += 2; // include CRLF // if iPosBefore is NULL, insert as first element under parent int nParentEndLBeforeAdd = m_aPos[iPosParent].nEndL; int iPosFreeBeforeAdd = m_iPosFree; // Skip version tag or DTD at start of subdocument TokenPos token( szSubDoc ); int nNodeType = x_ParseNode( token ); while ( nNodeType && nNodeType != MNT_ELEMENT ) { token.szDoc = &szSubDoc[token.nNext]; token.nNext = 0; nNodeType = x_ParseNode( token ); } CString csInsert = token.szDoc; // Insert subdocument m_aPos[iPosParent].nEndL = nOffset; int nReplace = 0, nLeft = nOffset; CString csParentTagName; if ( bEmptyParent ) { csParentTagName = x_GetTagName(iPosParent); CString csFormat; csFormat = _T(">\r\n"); csFormat += csInsert; csFormat += _T("