/* Abiword * Copyright (C) 2001 Christian Biesinger * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #include #include #include "ut_types.h" #include "ut_string.h" #include "ut_iconv.h" #include "ut_debugmsg.h" #include "pd_Document.h" #include "ie_imp_StarOffice.h" #include "ie_imp_StarOffice_encodings.h" #ifdef DEBUG #include #endif #define CHECK_OLE_RV(err) do { if ((err) != MS_OLE_ERR_OK) return UT_IE_BOGUSDOCUMENT; } while (0) #define RETURN_IF_FALSE(err) do { if (!(err)) return UT_IE_BOGUSDOCUMENT; } while (0) // ******************************************************************************** // Sniffer static const UT_Byte sdwSignature[] = {0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}; UT_Confidence_t IE_Imp_StarOffice_Sniffer::recognizeContents(const char* szBuf, UT_uint32 iNumBytes) { if (iNumBytes >= sizeof(sdwSignature)) { return (memcmp(szBuf, sdwSignature, sizeof(sdwSignature)) == 0) ? UT_CONFIDENCE_GOOD : UT_CONFIDENCE_ZILCH; } return UT_CONFIDENCE_ZILCH; } UT_Confidence_t IE_Imp_StarOffice_Sniffer::recognizeSuffix(const char* szSuffix) { return (UT_stricmp(szSuffix, ".sdw") == 0) ? UT_CONFIDENCE_PERFECT : UT_CONFIDENCE_ZILCH; } UT_Error IE_Imp_StarOffice_Sniffer::constructImporter(PD_Document *pDocument, IE_Imp **ppie) { *ppie = new IE_Imp_StarOffice(pDocument); if (!ppie) return UT_OUTOFMEM; return UT_OK; } bool IE_Imp_StarOffice_Sniffer::getDlgLabels(const char** pszDesc, const char** pszSuffixList, IEFileType* ft) { *pszDesc = "StarWriter up to 5.x (*.sdw)"; *pszSuffixList = "*.sdw"; *ft = getFileType(); return true; } // ******************************************************************************** // Header Class UT_Error DocHdr::load(MsOleStream* stream) { UT_DEBUGMSG(("SDW: entering DocHdr::load\n")); static const char sw3hdr[] = "SW3HDR"; static const char sw4hdr[] = "SW4HDR"; static const char sw5hdr[] = "SW5HDR"; char header[7]; bool err = streamRead(stream, header, 7); RETURN_IF_FALSE(err); if (memcmp(header, sw3hdr, sizeof(sw3hdr)) != 0 && memcmp(header, sw4hdr, sizeof(sw4hdr)) != 0 && memcmp(header, sw5hdr, sizeof(sw5hdr)) != 0) return UT_IE_BOGUSDOCUMENT; err = streamRead(stream, cLen); RETURN_IF_FALSE(err); err = streamRead(stream, nVersion); RETURN_IF_FALSE(err); err = streamRead(stream, nFileFlags); RETURN_IF_FALSE(err); err = streamRead(stream, nDocFlags); RETURN_IF_FALSE(err); err = streamRead(stream, nRecSzPos); RETURN_IF_FALSE(err); err = streamRead(stream, nDummy); RETURN_IF_FALSE(err); err = streamRead(stream, nDummy16); RETURN_IF_FALSE(err); err = streamRead(stream, cRedlineMode); RETURN_IF_FALSE(err); err = streamRead(stream, nCompatVer); RETURN_IF_FALSE(err); UT_DEBUGMSG(("SDW: clen %i nversion %i fileflags %i docflags %i recszpos %i readlinemode %i compatver %i\n", cLen, nVersion, nFileFlags, nDocFlags, nRecSzPos, cRedlineMode, nCompatVer)); // (see sw/source/core/sw3io/sw3doc.cxx line 700) if (nVersion >= SWG_MAJORVERSION && nCompatVer > 0) { // File is in a too new format return UT_IE_BOGUSDOCUMENT; } err = streamRead(stream, cPasswd, 16); RETURN_IF_FALSE(err); err = streamRead(stream, cSet); RETURN_IF_FALSE(err); err = streamRead(stream, cGui); RETURN_IF_FALSE(err); err = streamRead(stream, nDate); RETURN_IF_FALSE(err); err = streamRead(stream, nTime); RETURN_IF_FALSE(err); // Find the name of the used encoding for (unsigned int i = 0; i < sizeof(SOEncodings)/sizeof(SOEncodings[0]); i++) { if (SOEncodings[i].number == cSet) { UT_DEBUGMSG(("SDW: Found charset %s for encoding #%i\n", SOEncodings[i].name, cSet)); converter = iconv_open(UCS_2_INTERNAL, SOEncodings[i].name); if (UT_iconv_isValid(converter)) break; } } if (!UT_iconv_isValid(converter)) return UT_ERROR; if (nFileFlags & SWGF_BLOCKNAME) { char buf[64]; err = streamRead(stream, buf, 64); // XXX verify that the string is really null terminated sBlockName = (UT_UCSChar*)UT_convert_cd(buf, strlen(buf) + 1, converter, NULL, NULL); } if (nRecSzPos != 0 && nVersion >= SWG_RECSIZES) { // Read the Recsizes // XXX to be done see sw/source/core/sw3io/sw3imp.cxx#L1070 UT_ASSERT(UT_NOT_IMPLEMENTED); } // XXX Check the password, see sw/source/core/sw3io/sw3imp.cxx#L2721 and sw/source/core/sw3io/crypter.cxx#L77 if (nFileFlags & SWGF_BAD_FILE) return UT_IE_BOGUSDOCUMENT; return UT_OK; } // ******************************************************************************** // Actual Importer IE_Imp_StarOffice::IE_Imp_StarOffice(PD_Document *pDocument) : IE_Imp(pDocument), mOle(NULL), mDocStream(NULL) { } IE_Imp_StarOffice::~IE_Imp_StarOffice() { if (mDocStream) ms_ole_stream_close(&mDocStream); if (mOle) ms_ole_destroy(&mOle); } bool IE_Imp_StarOffice::readRecSize(MsOleStream* aStream, UT_uint32& aSize) { // Yes, that's correct, only 3 bytes. aSize = 0; bool rv = streamRead(aStream, (char*)&aSize, 3); #ifdef WORDS_BIGENDIAN aSize = ENDIAN32(aSize); #endif aSize -= 4; // Substract 4 for the rec type + size if (aSize == 0xFFFFFF && mDocHdr.nVersion >= SWG_LONGRECS) { // XXX need recsizes from header, see above UT_ASSERT(UT_NOT_IMPLEMENTED); } return rv; } bool IE_Imp_StarOffice::readFlagRec(MsOleStream* stream, UT_uint8& flags, UT_uint32* newPos) { if (!streamRead(stream, flags)) return false; if (newPos) *newPos = ms_ole_stream_tell(stream) + (flags & 0xF); return true; } bool IE_Imp_StarOffice::readByteString(MsOleStream* stream, char*& str) { UT_uint16 length; str = NULL; if (!streamRead(stream, length)) return false; str = new char[length + 1]; if (!streamRead(stream, str, length)) return false; str[length] = 0; return true; } bool IE_Imp_StarOffice::readByteString(MsOleStream* stream, UT_UCSChar*& str) { char* rawString; str = NULL; if (!readByteString(stream, rawString)) return false; str = (UT_UCSChar*)UT_convert_cd(rawString, strlen(rawString) + 1, mDocHdr.converter, NULL, NULL); #ifdef DEBUG if (!str) { UT_DEBUGMSG(("SDW: UT_convert_cd returned %i (%s)\n", errno, strerror(errno))); UT_DEBUGMSG(("SDW: Failed string was: \"%s\"\n", rawString)); } #endif delete[] rawString; return str ? true : false; } UT_Error IE_Imp_StarOffice::importFile(const char* szFilename) { UT_DEBUGMSG(("SDW: Starting import\n")); UT_Error rv; MsOleErr err = ms_ole_open(&mOle, szFilename); CHECK_OLE_RV(err); err = ms_ole_stream_open(&mDocStream, mOle, "/", "StarWriterDocument", 'r'); CHECK_OLE_RV(err); // Read the CompObj (no, I don't know what Comp stands for) if ((rv = readCompObj()) != UT_OK) return rv; if (!getDoc()->appendStrux(PTX_Section, NULL)) return UT_IE_NOMEMORY; if (!getDoc()->appendStrux(PTX_Block, NULL)) return UT_IE_NOMEMORY; UT_DEBUGMSG(("SDW: Attempting to load DocHdr\n")); if ((rv = mDocHdr.load(mDocStream)) != UT_OK) return rv; // do the actual reading char type; bool done = false; UT_uint32 recSize; while (readChar(mDocStream, type) && !done) { if (!readRecSize(mDocStream, recSize)) return UT_IE_BOGUSDOCUMENT; switch (type) { case SWG_CONTENTS: { UT_uint32 flagsEnd = 0; UT_uint32 nNodes; // sw/source/core/sw3io/sw3sectn.cxx#L129 if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint8 flags; if (!readFlagRec(mDocStream, flags, &flagsEnd)) return UT_IE_BOGUSDOCUMENT; } if (mDocHdr.nVersion >= SWG_LONGIDX) streamRead(mDocStream, nNodes); else { if (mDocHdr.nVersion >= SWG_LAYFRAMES) { UT_uint16 sectidDummy; if (!streamRead(mDocStream, sectidDummy)) return UT_IE_BOGUSDOCUMENT; } UT_uint16 nodes16; streamRead(mDocStream, nodes16); nNodes = (UT_uint32)nodes16; } if (flagsEnd) { UT_ASSERT(flagsEnd >= ms_ole_stream_tell(mDocStream)); if (ms_ole_stream_tell(mDocStream) != flagsEnd) { UT_DEBUGMSG(("SDW: have not read all flags\n")); ms_ole_stream_lseek(mDocStream, flagsEnd, MsOleSeekSet); } } bool done2 = false; UT_uint32 size2; while (readChar(mDocStream, type) && !done2) { if (!readRecSize(mDocStream, size2)) return UT_IE_BOGUSDOCUMENT; switch (type) { case SWG_TEXTNODE: { // sw/source/core/sw3io/sw3nodes.cxx#L788 UT_DEBUGMSG(("SDW: Found Textnode!\n")); UT_uint8 flags; UT_uint32 newPos; if (!readFlagRec(mDocStream, flags, &newPos)) return UT_IE_BOGUSDOCUMENT; UT_uint16 coll; // ??? if (!streamRead(mDocStream, coll)) return UT_IE_BOGUSDOCUMENT; // XXX check flags if (ms_ole_stream_tell(mDocStream) != newPos) ms_ole_stream_lseek(mDocStream, newPos, MsOleSeekSet); // Read the actual text UT_UCSChar* str; if (!readByteString(mDocStream, str)) return UT_IE_BOGUSDOCUMENT; // TODO: Decrypt (before converting charset!) UT_uint32 len = UT_UCS_strlen(str); if (len) getDoc()->appendSpan(str, len); free(str); getDoc()->appendStrux(PTX_Block, NULL); break; } default: UT_DEBUGMSG(("SDW: SWG_CONTENT: Skipping %lu bytes for record type '%c'\n", size2, type)); if (ms_ole_stream_lseek(mDocStream, size2, MsOleSeekCur) < 0) return UT_IE_BOGUSDOCUMENT; } } break; } case SWG_EOF: done = true; break; default: UT_DEBUGMSG(("SDW: Skipping %lu bytes for record type '%c'\n", recSize, type)); // Skip the record if (ms_ole_stream_lseek(mDocStream, recSize, MsOleSeekCur) < 0) return UT_IE_BOGUSDOCUMENT; } } return UT_OK; } // ******************************************************************************** // CompObj functions UT_Error IE_Imp_StarOffice::readCompObj() { MsOleStream *compObj; MsOleErr err = ms_ole_stream_open(&compObj, mOle, "/", "\1CompObj", 'r'); CHECK_OLE_RV(err); // Get version of the file ms_ole_stream_lseek(compObj, 8, MsOleSeekSet); // see oo's sot/source/sdstor/stgole.cxx lines 144ff UT_sint32 marker; bool rv = streamRead(compObj, marker); RETURN_IF_FALSE(rv); if (marker == -1) { rv = streamRead(compObj, mCompObj.clsId); RETURN_IF_FALSE(rv); UT_sint32 length; rv = streamRead(compObj, length); RETURN_IF_FALSE(rv); mCompObj.username = new char[length + 1]; rv = streamRead(compObj, mCompObj.username, length); RETURN_IF_FALSE(rv); mCompObj.username[length] = 0; rv = streamRead(compObj, length); RETURN_IF_FALSE(rv); if (length == -1) // windows clipboard format streamRead(compObj, mCompObj.version); else if (length > 0) { mCompObj.versionString = new char[length + 1]; rv = streamRead(compObj, mCompObj.versionString, length); RETURN_IF_FALSE(rv); mCompObj.versionString[length] = 0; mCompObj.version = getVersion(mCompObj.versionString); } else if (length) return UT_IE_BOGUSDOCUMENT; } ms_ole_stream_close(&compObj); return UT_OK; } UT_uint32 IE_Imp_StarOffice::getVersion(const char* szVerString) { static const char so30ver[] = "StarWriter 3.0"; static const char so40ver[] = "StarWriter 4.0"; static const char so50ver[] = "StarWriter 5.0"; if (UT_strcmp(szVerString, so30ver) == 0) return SOFFICE_FILEFORMAT_31; else if (UT_strcmp(szVerString, so40ver) == 0) return SOFFICE_FILEFORMAT_40; else if (UT_strcmp(szVerString, so50ver) == 0) return SOFFICE_FILEFORMAT_50; else return 0; }