• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ie_imp_StarOffice.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 /* Abiword
00004  * Copyright (C) 2001 Christian Biesinger <cbiesinger@web.de>
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00019  * 02110-1301 USA.
00020  */
00021 
00022 #ifndef IE_IMP_STAROFFICE_H
00023 #define IE_IMP_STAROFFICE_H
00024 
00025 #include <stdio.h>
00026 #include <map>
00027 #include <string>
00028 
00029 #include "ut_types.h"
00030 #include "ut_iconv.h"
00031 #include "ie_imp.h"
00032 
00033 #include "sdw_cryptor.h"
00034 
00035 class PD_Document;
00036 
00037 // The following struct definition was taken from the OpenOffice file
00038 // sot/inc/stg.hxx line 85ff, with changes because of the different
00039 // cross-platform toolkits. That file is available under the LGPL.
00040 struct ClsId {
00041     UT_sint32 n1;
00042     UT_sint16 n2, n3;
00043     UT_uint8 n4, n5, n6, n7, n8, n9, n10, n11;
00044 };
00045 
00050 void readByteString(GsfInput* stream, char*& str, UT_uint16* aLength = NULL)
00051     noexcept(false);
00052 
00059 void readByteString(GsfInput* stream, UT_UCS4Char*& str, UT_iconv_t converter, SDWCryptor* cryptor = NULL) noexcept(false);
00060 
00061 class DocHdr {
00062     public:
00063         DocHdr() : sBlockName(NULL), converter(reinterpret_cast<UT_iconv_t>(-1)) {}
00064         ~DocHdr() { if (sBlockName) free(sBlockName); if (UT_iconv_isValid(converter)) UT_iconv_close(converter); }
00069         void load(GsfInput* stream) noexcept(false);
00070 
00071         UT_uint8 cLen; // ???
00072         UT_uint16 nVersion;
00073         UT_uint16 nFileFlags;
00074         UT_sint32 nDocFlags;
00075         UT_uint32 nRecSzPos;
00076         UT_sint32 nDummy;
00077         UT_uint16 nDummy16; // actually 2x dummy8
00078         UT_uint8 cRedlineMode; // should actually be an enum, see sw/inc/redlenum.hxx#L83
00079         UT_uint8 nCompatVer;
00080 
00081         UT_uint8 cPasswd[16]; // password verification data
00082 
00083         UT_uint8 cSet; // the encoding to use
00084         UT_uint8 cGui;
00085 
00086         UT_uint32 nDate;
00087         UT_uint32 nTime;
00088 
00089         UT_UCS4Char* sBlockName; // name of a text module
00090 
00091         UT_iconv_t converter; // Iconv handle for converting from the doc charset to UCS_2_INTERNAL
00092 
00093         SDWCryptor* cryptor; // used for decrypting the document or null if not encrypted
00094 };
00095 
00096 // A text attribute record
00097 struct TextAttr {
00098     TextAttr() : data(NULL), isOff(false), isPara(false) {}
00099     ~TextAttr() { if (data) delete[] data; }
00100     bool startSet, endSet; // true if the start/end attribute is valid
00101     UT_uint16 which;
00102     UT_uint16 ver;
00103     UT_uint16 start;
00104     UT_uint16 end;
00105 
00106     UT_uint8* data; // possible additional data. NULL if no data existant.
00107     gsf_off_t dataLen;
00108 
00109     std::string attrName;
00110     std::string attrVal;
00111     bool isOff; // if true, attrVal is undefined
00112     bool isPara; // should be applied to paragraph, not span
00113 };
00114 
00115 // File Flags: (from sw/source/core/sw3io/sw3ids.hxx lines 65ff)
00116 #define SWGF_BLOCKNAME  0x0002 // Header has textmodule
00117 #define SWGF_HAS_PASSWD 0x0008 // Stream is password protected
00118 #define SWGF_HAS_PGNUMS 0x0100 // Stream has pagenumbers
00119 #define SWGF_BAD_FILE   0x8000 // There was an error writing the file
00120 
00121 // Document Flags: (from sw/source/core/sw3io/sw3doc.cxx 733ff)
00122 #define SWDF_BROWSEMODE1 0x1   // show document in browse mode?
00123 #define SWDF_BROWSEMODE2 0x2   // same as above, one of them need to be set
00124 #define SWDF_HTMLMODE 0x4      // document is in HTML Mode
00125 #define SWDF_HEADINBROWSE 0x8  // Show headers in Browse Mode
00126 #define SWDF_FOOTINBROWSE 0x10 // Show footers in browse mode
00127 #define SWDF_GLOBALDOC 0x20    // Is a global document (a global document can contain chapter documents... I think)
00128 #define SWDF_GLOBALDOCSAVELINK 0x40 // Include sections that are linked to the global document when saving
00129 #define SWDF_LABELDOC 0x80     // is a label ("etiketten") document
00130 
00131 // File versions (sw/source/core/sw3io/sw3ids.hxx 77ff)
00132 #define SWG_POOLIDS 0x3        // IDs for Stringpool-Strings
00133 #define SWG_LAYFRAMES 0x5      // Layout Frames
00134 #define SWG_RECSIZES '%'       // Record Sizes
00135 #define SWG_LONGIDX 0x201
00136 #define SWG_LONGRECS 0x209     // Record-Length > 8/16MB
00137 #define SWG_MAJORVERSION SWG_LONGIDX
00138 
00139 
00140 // Document Sections
00141 #define SWG_ATTRIBUTE 'A' // attribute of a textnode
00142 #define SWG_COMMENT  'C' // comments
00143 #define SWG_JOBSETUP 'J' // Printer Job Setup
00144 #define SWG_CONTENTS 'N' // Textpart
00145 #define SWG_ATTRSET  'S' // set of attributes
00146 #define SWG_STRINGPOOL '!'
00147 #define SWG_TEXTNODE 'T'
00148 #define SWG_EOF      'Z'
00149 
00150 // File format constants, from OpenOffice's tools/inc/solar.h line 471ff
00151 #define SOFFICE_FILEFORMAT_31   3450
00152 #define SOFFICE_FILEFORMAT_40   3580
00153 #define SOFFICE_FILEFORMAT_50   5050
00154 #define SOFFICE_FILEFORMAT_60   6200
00155 
00156 // Print Job constants
00157 #define JOBSET_FILE364_SYSTEM 0xFFFF
00158 #define JOBSET_FILE605_SYSTEM 0xFFFE
00159 
00160 #define IDX_NOCONV_FF 0xFFFC
00161 
00162 // Staroffice document sniffer.
00163 class IE_Imp_StarOffice_Sniffer : public IE_ImpSniffer
00164 {
00165     public:
00166         IE_Imp_StarOffice_Sniffer();
00167         virtual ~IE_Imp_StarOffice_Sniffer() {}
00168 
00169         virtual const IE_SuffixConfidence * getSuffixConfidence ();
00170         virtual const IE_MimeConfidence * getMimeConfidence ();
00171         virtual UT_Confidence_t recognizeContents(GsfInput * input);
00172         virtual bool getDlgLabels(const char** szDesc, const char** szSuffixList, IEFileType *ft);
00173         virtual UT_Error constructImporter(PD_Document* pDocument, IE_Imp **ppie);
00174 };
00175 
00176 // Actual Importer
00177 class IE_Imp_StarOffice : public IE_Imp
00178 {
00179     public:
00180         IE_Imp_StarOffice(PD_Document *pDocument);
00181         ~IE_Imp_StarOffice();
00182 
00183     protected:
00184         virtual UT_Error _loadFile(GsfInput * input);
00185 
00186     private:
00187         FILE* mFile;
00188         GsfInfile* mOle;
00189         GsfInput *mDocStream;
00190         DocHdr mDocHdr;
00191 
00193     typedef std::map<UT_uint16, std::basic_string<UT_UCS4Char> > stringpool_map;
00194     stringpool_map mStringPool;
00195 
00201         void readRecSize(GsfInput* stream, UT_uint32& aSize, gsf_off_t* aEOR = NULL) noexcept(false);
00209         void readByteString(GsfInput* stream, UT_UCS4Char*& str) noexcept(false) {
00210 			::readByteString(stream, str, mDocHdr.converter, mDocHdr.cryptor);
00211         }
00212 
00216         static UT_uint32 getVersion(const char* szVerString);
00217 };
00218 
00219 /* Helper functions; all throw an UT_IE_BOGUSDOCUMENT on error */
00224 void readFlagRec(GsfInput* stream, UT_uint8& flags, gsf_off_t* newPos = NULL) noexcept(false);
00225 
00230 inline void readChar(GsfInput* aStream, char& aChar) noexcept(false) {
00231     if (!gsf_input_read(aStream, 1, reinterpret_cast<guint8*>(&aChar)))
00232         throw UT_IE_BOGUSDOCUMENT;
00233 }
00234 
00235 inline void streamRead(GsfInput* aStream, UT_uint8& aDest) noexcept(false) {
00236     if (!gsf_input_read(aStream, 1, static_cast<guint8*>(&aDest)))
00237         throw UT_IE_BOGUSDOCUMENT;
00238 }
00239 
00240 inline void streamRead(GsfInput* aStream, UT_sint8& aDest) noexcept(false) {
00241     streamRead(aStream, reinterpret_cast<UT_uint8 &>(aDest));
00242 }
00243 
00244 inline void streamRead(GsfInput* aStream, char& aDest) noexcept(false) {
00245     streamRead(aStream, reinterpret_cast<UT_uint8 &>(aDest));
00246 }
00247 
00248 
00249 inline void streamRead(GsfInput* aStream, UT_uint16& aDest, bool isLittleEndian = true) noexcept(false) {
00250     guint8 buf [2];
00251     if (!gsf_input_read(aStream, 2, buf))
00252         throw UT_IE_BOGUSDOCUMENT;
00253     if (isLittleEndian) {
00254         aDest = buf [0] | (buf [1] << 8);
00255     }
00256     else {
00257         aDest = buf [1] | (buf [0] << 8);
00258     }
00259 }
00260 
00261 inline void streamRead(GsfInput* aStream, UT_sint16& aDest, bool isLittleEndian = true) noexcept(false) {
00262     streamRead(aStream, reinterpret_cast<UT_uint16 &>(aDest), isLittleEndian);
00263 }
00264 
00265 inline void streamRead(GsfInput* aStream, UT_uint32& aDest, bool isLittleEndian = true) noexcept(false) {
00266     guint8 buf [4];
00267     if (!gsf_input_read(aStream, 4, buf))
00268         throw UT_IE_BOGUSDOCUMENT;
00269     if (isLittleEndian) {
00270         aDest = buf [0] | (buf [1] << 8) | (buf [2] << 16) | (buf [3] << 24);
00271     }
00272     else {
00273         aDest = buf [3] | (buf [2] << 8) | (buf [1] << 16) | (buf [0] << 24);
00274     }
00275 }
00276 
00277 inline void streamRead(GsfInput* aStream, UT_sint32& aDest, bool isLittleEndian = true) noexcept(false) {
00278     streamRead(aStream, reinterpret_cast<UT_uint32 &>(aDest), isLittleEndian);
00279 }
00280 
00281 // reads the value as uint8
00282 inline void streamRead(GsfInput* aStream, bool& aDest) noexcept(false) {
00283     streamRead(aStream, reinterpret_cast<UT_uint8&>(aDest));
00284 }
00285 
00286 // Class ID
00287 inline void streamRead(GsfInput* aStream, ClsId& aClsId) noexcept(false) {
00288     streamRead(aStream, aClsId.n1);
00289     streamRead(aStream, aClsId.n2);
00290     streamRead(aStream, aClsId.n3);
00291     streamRead(aStream, aClsId.n4);
00292     streamRead(aStream, aClsId.n5);
00293     streamRead(aStream, aClsId.n6);
00294     streamRead(aStream, aClsId.n7);
00295     streamRead(aStream, aClsId.n8);
00296     streamRead(aStream, aClsId.n9);
00297     streamRead(aStream, aClsId.n10);
00298     streamRead(aStream, aClsId.n11);
00299 }
00300 #include "ut_debugmsg.h"
00301 
00302 // for completeness...
00303 inline void streamRead(GsfInput* aStream, char* aBuffer, UT_uint32 length) noexcept(false) {
00304     if (!gsf_input_read(aStream, length, reinterpret_cast<guint8 *>(aBuffer)))
00305         throw UT_IE_BOGUSDOCUMENT;
00306 }
00307 
00308 inline void streamRead(GsfInput* aStream, UT_uint8* aBuffer, UT_uint32 length) noexcept(false) {
00309     if (!gsf_input_read(aStream, length, static_cast<guint8*>(aBuffer)))
00310         throw UT_IE_BOGUSDOCUMENT;
00311 }
00312 
00313 // readRecSize must have been called already. readFlagRec must not.
00314 // aEoa = position of the end of the attr.
00315 void streamRead(GsfInput* aStream, TextAttr& aAttr, gsf_off_t aEoa) noexcept(false);
00316 
00317 #endif /* IE_IMP_STAROFFICE_H */

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1