• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ie_imp_StarOffice.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 /* Abiword
00004  * Copyright (C) 2001 Christian Biesinger <cbiesinger@web.de>
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
00019  * 02111-1307, USA.
00020  */
00021 
00022 #ifndef IE_IMP_STAROFFICE_H
00023 #define IE_IMP_STAROFFICE_H
00024 
00025 #include <stdio.h>
00026 #include <map>
00027 #include <string>
00028 
00029 #include "ut_string_class.h"
00030 #include "ut_types.h"
00031 #include "ut_iconv.h"
00032 #include "ie_imp.h"
00033 
00034 #include "sdw_cryptor.h"
00035 
00036 class PD_Document;
00037 
00038 // The following struct definition was taken from the OpenOffice file
00039 // sot/inc/stg.hxx line 85ff, with changes because of the different
00040 // cross-platform toolkits. That file is available under the LGPL.
00041 struct ClsId {
00042     UT_sint32 n1;
00043     UT_sint16 n2, n3;
00044     UT_uint8 n4, n5, n6, n7, n8, n9, n10, n11;
00045 };
00046 
00051 void readByteString(GsfInput* stream, char*& str, UT_uint16* aLength = NULL)
00052     throw(UT_Error);
00053 
00060 void readByteString(GsfInput* stream, UT_UCS4Char*& str, UT_iconv_t converter, SDWCryptor* cryptor = NULL) throw(UT_Error);
00061 
00062 class DocHdr {
00063     public:
00064         DocHdr() : sBlockName(NULL), converter(reinterpret_cast<UT_iconv_t>(-1)) {}
00065         ~DocHdr() { if (sBlockName) free(sBlockName); if (UT_iconv_isValid(converter)) UT_iconv_close(converter); }
00070         void load(GsfInput* stream) throw(UT_Error);
00071 
00072         UT_uint8 cLen; // ???
00073         UT_uint16 nVersion;
00074         UT_uint16 nFileFlags;
00075         UT_sint32 nDocFlags;
00076         UT_uint32 nRecSzPos;
00077         UT_sint32 nDummy;
00078         UT_uint16 nDummy16; // actually 2x dummy8
00079         UT_uint8 cRedlineMode; // should actually be an enum, see sw/inc/redlenum.hxx#L83
00080         UT_uint8 nCompatVer;
00081 
00082         UT_uint8 cPasswd[16]; // password verification data
00083 
00084         UT_uint8 cSet; // the encoding to use
00085         UT_uint8 cGui;
00086 
00087         UT_uint32 nDate;
00088         UT_uint32 nTime;
00089 
00090         UT_UCS4Char* sBlockName; // name of a text module
00091 
00092         UT_iconv_t converter; // Iconv handle for converting from the doc charset to UCS_2_INTERNAL
00093 
00094         SDWCryptor* cryptor; // used for decrypting the document or null if not encrypted
00095 };
00096 
00097 // A text attribute record
00098 struct TextAttr {
00099     TextAttr() : data(NULL), isOff(false), isPara(false) {}
00100     ~TextAttr() { if (data) delete[] data; }
00101     bool startSet, endSet; // true if the start/end attribute is valid
00102     UT_uint16 which;
00103     UT_uint16 ver;
00104     UT_uint16 start;
00105     UT_uint16 end;
00106 
00107     UT_uint8* data; // possible additional data. NULL if no data existant.
00108     gsf_off_t dataLen;
00109 
00110     UT_String attrName;
00111     UT_String attrVal;
00112     bool isOff; // if true, attrVal is undefined
00113     bool isPara; // should be applied to paragraph, not span
00114 };
00115 
00116 // File Flags: (from sw/source/core/sw3io/sw3ids.hxx lines 65ff)
00117 #define SWGF_BLOCKNAME  0x0002 // Header has textmodule
00118 #define SWGF_HAS_PASSWD 0x0008 // Stream is password protected
00119 #define SWGF_HAS_PGNUMS 0x0100 // Stream has pagenumbers
00120 #define SWGF_BAD_FILE   0x8000 // There was an error writing the file
00121 
00122 // Document Flags: (from sw/source/core/sw3io/sw3doc.cxx 733ff)
00123 #define SWDF_BROWSEMODE1 0x1   // show document in browse mode?
00124 #define SWDF_BROWSEMODE2 0x2   // same as above, one of them need to be set
00125 #define SWDF_HTMLMODE 0x4      // document is in HTML Mode
00126 #define SWDF_HEADINBROWSE 0x8  // Show headers in Browse Mode
00127 #define SWDF_FOOTINBROWSE 0x10 // Show footers in browse mode
00128 #define SWDF_GLOBALDOC 0x20    // Is a global document (a global document can contain chapter documents... I think)
00129 #define SWDF_GLOBALDOCSAVELINK 0x40 // Include sections that are linked to the global document when saving
00130 #define SWDF_LABELDOC 0x80     // is a label ("etiketten") document
00131 
00132 // File versions (sw/source/core/sw3io/sw3ids.hxx 77ff)
00133 #define SWG_POOLIDS 0x3        // IDs for Stringpool-Strings
00134 #define SWG_LAYFRAMES 0x5      // Layout Frames
00135 #define SWG_RECSIZES '%'       // Record Sizes
00136 #define SWG_LONGIDX 0x201
00137 #define SWG_LONGRECS 0x209     // Record-Length > 8/16MB
00138 #define SWG_MAJORVERSION SWG_LONGIDX
00139 
00140 
00141 // Document Sections
00142 #define SWG_ATTRIBUTE 'A' // attribute of a textnode
00143 #define SWG_COMMENT  'C' // comments
00144 #define SWG_JOBSETUP 'J' // Printer Job Setup
00145 #define SWG_CONTENTS 'N' // Textpart
00146 #define SWG_ATTRSET  'S' // set of attributes
00147 #define SWG_STRINGPOOL '!'
00148 #define SWG_TEXTNODE 'T'
00149 #define SWG_EOF      'Z'
00150 
00151 // File format constants, from OpenOffice's tools/inc/solar.h line 471ff
00152 #define SOFFICE_FILEFORMAT_31   3450
00153 #define SOFFICE_FILEFORMAT_40   3580
00154 #define SOFFICE_FILEFORMAT_50   5050
00155 #define SOFFICE_FILEFORMAT_60   6200
00156 
00157 // Print Job constants
00158 #define JOBSET_FILE364_SYSTEM 0xFFFF
00159 #define JOBSET_FILE605_SYSTEM 0xFFFE
00160 
00161 #define IDX_NOCONV_FF 0xFFFC
00162 
00163 // Staroffice document sniffer.
00164 class IE_Imp_StarOffice_Sniffer : public IE_ImpSniffer
00165 {
00166     public:
00167         IE_Imp_StarOffice_Sniffer();
00168         virtual ~IE_Imp_StarOffice_Sniffer() {}
00169 
00170         virtual const IE_SuffixConfidence * getSuffixConfidence ();
00171         virtual const IE_MimeConfidence * getMimeConfidence ();
00172         virtual UT_Confidence_t recognizeContents(GsfInput * input);
00173         virtual bool getDlgLabels(const char** szDesc, const char** szSuffixList, IEFileType *ft);
00174         virtual UT_Error constructImporter(PD_Document* pDocument, IE_Imp **ppie);
00175 };
00176 
00177 // Actual Importer
00178 class IE_Imp_StarOffice : public IE_Imp
00179 {
00180     public:
00181         IE_Imp_StarOffice(PD_Document *pDocument);
00182         ~IE_Imp_StarOffice();
00183 
00184     protected:
00185         virtual UT_Error _loadFile(GsfInput * input);
00186 
00187     private:
00188         FILE* mFile;
00189         GsfInfile* mOle;
00190         GsfInput *mDocStream;
00191         DocHdr mDocHdr;
00192 
00194     typedef std::map<UT_uint16, std::basic_string<UT_UCS4Char> > stringpool_map;
00195     stringpool_map mStringPool;
00196 
00202         void readRecSize(GsfInput* stream, UT_uint32& aSize, gsf_off_t* aEOR = NULL) throw(UT_Error);
00210         void readByteString(GsfInput* stream, UT_UCS4Char*& str) throw(UT_Error) {
00211 			::readByteString(stream, str, mDocHdr.converter, mDocHdr.cryptor);
00212         }
00213 
00217         static UT_uint32 getVersion(const char* szVerString);
00218 };
00219 
00220 /* Helper functions; all throw an UT_IE_BOGUSDOCUMENT on error */
00225 void readFlagRec(GsfInput* stream, UT_uint8& flags, gsf_off_t* newPos = NULL) throw(UT_Error);
00226 
00231 inline void readChar(GsfInput* aStream, char& aChar) throw(UT_Error) {
00232     if (!gsf_input_read(aStream, 1, reinterpret_cast<guint8*>(&aChar)))
00233         throw UT_IE_BOGUSDOCUMENT;
00234 }
00235 
00236 inline void streamRead(GsfInput* aStream, UT_uint8& aDest) throw(UT_Error) {
00237     if (!gsf_input_read(aStream, 1, static_cast<guint8*>(&aDest)))
00238         throw UT_IE_BOGUSDOCUMENT;
00239 }
00240 
00241 inline void streamRead(GsfInput* aStream, UT_sint8& aDest) throw(UT_Error) {
00242     streamRead(aStream, reinterpret_cast<UT_uint8 &>(aDest));
00243 }
00244 
00245 inline void streamRead(GsfInput* aStream, char& aDest) throw(UT_Error) {
00246     streamRead(aStream, reinterpret_cast<UT_uint8 &>(aDest));
00247 }
00248 
00249 
00250 inline void streamRead(GsfInput* aStream, UT_uint16& aDest, bool isLittleEndian = true) throw(UT_Error) {
00251     guint8 buf [2];
00252     if (!gsf_input_read(aStream, 2, buf))
00253         throw UT_IE_BOGUSDOCUMENT;
00254     if (isLittleEndian) {
00255         aDest = buf [0] | (buf [1] << 8);
00256     }
00257     else {
00258         aDest = buf [1] | (buf [0] << 8);
00259     }
00260 }
00261 
00262 inline void streamRead(GsfInput* aStream, UT_sint16& aDest, bool isLittleEndian = true) throw(UT_Error) {
00263     streamRead(aStream, reinterpret_cast<UT_uint16 &>(aDest), isLittleEndian);
00264 }
00265 
00266 inline void streamRead(GsfInput* aStream, UT_uint32& aDest, bool isLittleEndian = true) throw(UT_Error) {
00267     guint8 buf [4];
00268     if (!gsf_input_read(aStream, 4, buf))
00269         throw UT_IE_BOGUSDOCUMENT;
00270     if (isLittleEndian) {
00271         aDest = buf [0] | (buf [1] << 8) | (buf [2] << 16) | (buf [3] << 24);
00272     }
00273     else {
00274         aDest = buf [3] | (buf [2] << 8) | (buf [1] << 16) | (buf [0] << 24);
00275     }
00276 }
00277 
00278 inline void streamRead(GsfInput* aStream, UT_sint32& aDest, bool isLittleEndian = true) throw(UT_Error) {
00279     streamRead(aStream, reinterpret_cast<UT_uint32 &>(aDest), isLittleEndian);
00280 }
00281 
00282 // reads the value as uint8
00283 inline void streamRead(GsfInput* aStream, bool& aDest) throw(UT_Error) {
00284     streamRead(aStream, reinterpret_cast<UT_uint8&>(aDest));
00285 }
00286 
00287 // Class ID
00288 inline void streamRead(GsfInput* aStream, ClsId& aClsId) throw(UT_Error) {
00289     streamRead(aStream, aClsId.n1);
00290     streamRead(aStream, aClsId.n2);
00291     streamRead(aStream, aClsId.n3);
00292     streamRead(aStream, aClsId.n4);
00293     streamRead(aStream, aClsId.n5);
00294     streamRead(aStream, aClsId.n6);
00295     streamRead(aStream, aClsId.n7);
00296     streamRead(aStream, aClsId.n8);
00297     streamRead(aStream, aClsId.n9);
00298     streamRead(aStream, aClsId.n10);
00299     streamRead(aStream, aClsId.n11);
00300 }
00301 #include "ut_debugmsg.h"
00302 
00303 // for completeness...
00304 inline void streamRead(GsfInput* aStream, char* aBuffer, UT_uint32 length) throw(UT_Error) {
00305     if (!gsf_input_read(aStream, length, reinterpret_cast<guint8 *>(aBuffer)))
00306         throw UT_IE_BOGUSDOCUMENT;
00307 }
00308 
00309 inline void streamRead(GsfInput* aStream, UT_uint8* aBuffer, UT_uint32 length) throw(UT_Error) {
00310     if (!gsf_input_read(aStream, length, static_cast<guint8*>(aBuffer)))
00311         throw UT_IE_BOGUSDOCUMENT;
00312 }
00313 
00314 // readRecSize must have been called already. readFlagRec must not.
00315 // aEoa = position of the end of the attr.
00316 void streamRead(GsfInput* aStream, TextAttr& aAttr, gsf_off_t aEoa) throw(UT_Error);
00317 
00318 #endif /* IE_IMP_STAROFFICE_H */

Generated on Sun May 27 2012 for AbiWord by  doxygen 1.7.1