/* Abiword * Copyright (C) 2001 Christian Biesinger * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ #ifndef IE_IMP_STAROFFICE_H #define IE_IMP_STAROFFICE_H #include #include #include "ut_string_class.h" #include "ut_types.h" #include "ie_imp.h" class PD_Document; // The following struct definition was taken from the OpenOffice file // sot/inc/stg.hxx line 85ff, with changes because of the different // cross-platform toolkits. That file is available under the LGPL. struct ClsId { UT_sint32 n1; UT_sint16 n2, n3; UT_uint8 n4, n5, n6, n7, n8, n9, n10, n11; }; class CompObj { public: CompObj() : username(NULL), versionString(NULL), version(0) {} ~CompObj() { if (username) delete[] username; if (versionString) delete[] versionString; } #if 0 // commenting out, don't need them // no idea what this should be. Openoffice doesn't have an idea either, apparently - see // /sot/source/sdstor/stgole.cxx line 198 UT_uint16 streamVersion; // =1 UT_uint16 byteOrder; // 0xFFFE in native byteorder UT_uint32 winVer; // really windows version? UT_uint32 marker; // no idea what this is #endif ClsId clsId; char* username; char* versionString; UT_sint32 version; }; class DocHdr { public: DocHdr() : sBlockName(NULL), converter((UT_iconv_t)(-1)) {} ~DocHdr() { if (sBlockName) free(sBlockName); if (UT_iconv_isValid(converter)) UT_iconv_close(converter); } /*! Reads the document header * \param stream The OLE Stream to load from - should be the one * with the name "StarWriterDocument" */ UT_Error load(MsOleStream* stream); UT_uint8 cLen; // ??? UT_uint16 nVersion; UT_uint16 nFileFlags; UT_sint32 nDocFlags; UT_uint32 nRecSzPos; UT_sint32 nDummy; UT_uint16 nDummy16; // actually 2x dummy8 UT_uint8 cRedlineMode; // should actually be an enum, see sw/inc/redlenum.hxx#L83 UT_uint8 nCompatVer; UT_uint8 cPasswd[16]; // coded password UT_uint8 cSet; // the encoding to use UT_uint8 cGui; UT_uint32 nDate; UT_uint32 nTime; UT_UCSChar* sBlockName; // name of a text module UT_iconv_t converter; // Iconv handle for converting from the doc charset to UCS_2_INTERNAL }; // File Flags: (from sw/source/core/sw3io/sw3ids.hxx lines 65ff) #define SWGF_BLOCKNAME 0x0002 // Header has textmodule #define SWGF_HAS_PGNUMS 0x0100 // Stream has pagenumbers #define SWGF_BAD_FILE 0x8000 // There was an error writing the file // Document Flags: (from sw/source/core/sw3io/sw3doc.cxx 733ff) #define SWDF_BROWSEMODE1 0x1 // show document in browse mode? #define SWDF_BROWSEMODE2 0x2 // same as above, one of them need to be set #define SWDF_HTMLMODE 0x4 // document is in HTML Mode #define SWDF_HEADINBROWSE 0x8 // Show headers in Browse Mode #define SWDF_FOOTINBROWSE 0x10 // Show footers in browse mode #define SWDF_GLOBALDOC 0x20 // Is a global document (a global document can contain chapter documents... I think) #define SWDF_GLOBALDOCSAVELINK 0x40 // Include sections that are linked to the global document when saving #define SWDF_LABELDOC 0x80 // is a label ("etiketten") document // File versions (sw/source/core/sw3io/sw3ids.hxx 77ff) #define SWG_LAYFRAMES 0x5 // Layout Frames #define SWG_RECSIZES '%' // Record Sizes #define SWG_LONGIDX 0x201 #define SWG_LONGRECS 0x209 // Record-Length > 8/16MB #define SWG_MAJORVERSION SWG_LONGIDX // Document Sections #define SWG_CONTENTS 'N' // Textpart #define SWG_TEXTNODE 'T' #define SWG_EOF 'Z' // File format constants, from OpenOffice's tools/inc/solar.h line 471ff #define SOFFICE_FILEFORMAT_31 3450 #define SOFFICE_FILEFORMAT_40 3580 #define SOFFICE_FILEFORMAT_50 5050 #define SOFFICE_FILEFORMAT_60 6200 // Staroffice document sniffer. class ABI_EXPORT IE_Imp_StarOffice_Sniffer : public IE_ImpSniffer { public: IE_Imp_StarOffice_Sniffer() {} virtual ~IE_Imp_StarOffice_Sniffer() {} virtual UT_Confidence_t recognizeContents(const char* szBuf, UT_uint32 iNumBytes); virtual UT_Confidence_t recognizeSuffix(const char* szSuffix); virtual bool getDlgLabels(const char** szDesc, const char** szSuffixList, IEFileType *ft); virtual UT_Error constructImporter(PD_Document* pDocument, IE_Imp **ppie); }; // Actual Importer class ABI_EXPORT IE_Imp_StarOffice : public IE_Imp { public: IE_Imp_StarOffice(PD_Document *pDocument); ~IE_Imp_StarOffice(); virtual UT_Error importFile(const char* szFilename); protected: /*! Reads the \1CompObj Stream */ UT_Error readCompObj(); FILE* mFile; MsOle* mOle; MsOleStream *mDocStream; DocHdr mDocHdr; CompObj mCompObj; /*! Reads one character from the given MsOleStream. * \param aStream The OLE Stream * \param aChar Reference to the character * \return true on success, false on failure */ inline static bool readChar(MsOleStream* aStream, char& aChar) { return (bool)ms_ole_stream_read_copy(aStream, (guint8*)&aChar, 1); } /*! Reads the record size from the stream. That is, usually * three bytes starting from the current position. * \param aStream the stream to read from * \param aSize Reference to the size of the record * \return true on success, false on failure */ bool readRecSize(MsOleStream* stream, UT_uint32& aSize); /*! Reads the header of a flag record from the stream * \param flags Flags (also contain the length in the 4 least significant bytes) * \param newPos (optional) Pointer to a variable where the position after the * flags record is stored. * \return true on success, false on failure */ static bool readFlagRec(MsOleStream* stream, UT_uint8& flags, UT_uint32* newPos = NULL); /*! Reads a string from the file where the first sint32 contains the length. If it * is zero-terminated, length must include the byte for termination. The string will * be converted to the charset given in mDocHdr. * \param stream The stream to read from * \param str Reference to pointer to UT_UCSChar, where the string is stored. * Must be free'd. Is NULL if the function fails. * \return true on success, false on failure */ bool readByteString(MsOleStream* stream, UT_UCSChar*& str); /*! As above, but no charset conversion is done. str must be delete[]'d, not free'd! */ bool readByteString(MsOleStream* stream, char*& str); /*! Finds the version number, given a version string. * \return One of SOFFICE_FILEFORMAT_31, SOFFICE_FILEFORMAT_40 and * SOFFICE_FILEFORMAT_50 */ static UT_uint32 getVersion(const char* szVerString); }; /* Helper functions; all return true on success and false on error */ inline bool streamRead(MsOleStream* aStream, UT_uint8& aDest) { return (bool)ms_ole_stream_read_copy(aStream, (guint8*)&aDest, 1); } inline bool streamRead(MsOleStream* aStream, UT_sint8& aDest) { return streamRead(aStream, (UT_uint8)aDest); } #define ENDIAN16(x) ((((x) & 0xFF) << 8) | ((x) >> 8)) #define ENDIAN32(x) ((((x) & 0xFF) << 24) | \ ((((x) >> 8) & 0xFF) << 16) | \ ((((x) >> 16) & 0xFF) << 8) | \ ((x) >> 24)) inline bool streamRead(MsOleStream* aStream, UT_uint16& aDest, bool isLittleEndian = true) { #ifdef WORDS_BIGENDIAN const bool swap = isLittleEndian; #else const bool swap = !isLittleEndian; #endif bool rv = (bool)ms_ole_stream_read_copy(aStream, (guint8*)&aDest, 2); if (swap) aDest = ENDIAN16(aDest); return rv; } inline bool streamRead(MsOleStream* aStream, UT_sint16& aDest, bool isLittleEndian = true) { return streamRead(aStream, (UT_uint16)aDest, isLittleEndian); } inline bool streamRead(MsOleStream* aStream, UT_uint32& aDest, bool isLittleEndian = true) { #ifdef WORDS_BIGENDIAN const bool swap = isLittleEndian; #else const bool swap = !isLittleEndian; #endif bool rv = (bool)ms_ole_stream_read_copy(aStream, (guint8*)&aDest, 4); if (swap) aDest = ENDIAN32(aDest); return rv; } inline bool streamRead(MsOleStream* aStream, UT_sint32& aDest, bool isLittleEndian = true) { return streamRead(aStream, (UT_uint32)aDest, isLittleEndian); } // Class ID inline bool streamRead(MsOleStream* aStream, ClsId& aClsId) { bool rv = streamRead(aStream, aClsId.n1); if (!rv) return false; rv = streamRead(aStream, aClsId.n2); if (!rv) return false; rv = streamRead(aStream, aClsId.n3); if (!rv) return false; rv = streamRead(aStream, aClsId.n4); if (!rv) return false; rv = streamRead(aStream, aClsId.n5); if (!rv) return false; rv = streamRead(aStream, aClsId.n6); if (!rv) return false; rv = streamRead(aStream, aClsId.n7); if (!rv) return false; rv = streamRead(aStream, aClsId.n8); if (!rv) return false; rv = streamRead(aStream, aClsId.n9); if (!rv) return false; rv = streamRead(aStream, aClsId.n10); if (!rv) return false; rv = streamRead(aStream, aClsId.n11); if (!rv) return false; return true; } // for completeness... inline bool streamRead(MsOleStream* aStream, char* aBuffer, UT_uint32 length) { return (bool)ms_ole_stream_read_copy(aStream, (guint8*)aBuffer, length); } inline bool streamRead(MsOleStream* aStream, UT_uint8* aBuffer, UT_uint32 length) { return (bool)ms_ole_stream_read_copy(aStream, (guint8*)aBuffer, length); } #endif /* IE_IMP_STAROFFICE_H */