• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ie_imp_MsWord_97.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 /* AbiWord
00004  * Copyright (C) 2001 AbiSource, Inc.
00005  * Copyright (C) 2001 Dom Lachowicz <dominicl@seas.upenn.edu>
00006  * Copyright (C) 2001-2003 Tomas Frydrych
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License
00010  * as published by the Free Software Foundation; either version 2
00011  * of the License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00021  * 02110-1301 USA.
00022  */
00023 
00024 #ifndef IE_IMP_MSWORD_H
00025 #define IE_IMP_MSWORD_H
00026 
00027 // The importer/reader for Microsoft Word Documents
00028 
00029 #include "ie_imp.h"
00030 #include "ut_string_class.h"
00031 #include "fl_DocLayout.h"
00032 #include "fl_AutoLists.h"
00033 #include "ut_units.h"
00034 //
00035 // forward decls so that we don't have to #include "wv.h" here
00036 //
00037 typedef struct _wvParseStruct wvParseStruct;
00038 typedef struct _Blip Blip;
00039 typedef struct _CHP CHP;
00040 typedef struct _PAP PAP;
00041 class PD_Document;
00042 class pf_Frag;
00043 class UT_Stack;
00044 
00045 struct field;
00046 
00047 struct bookmark
00048 {
00049     gchar * name;
00050     UT_uint32  pos;
00051     bool       start;
00052 };
00053 
00054 struct footnote
00055 {
00056     UT_uint32  type;
00057     UT_uint32  ref_pos;
00058     UT_uint32  txt_pos;
00059     UT_uint32  txt_len;
00060     UT_uint32  pid;
00061 };
00062 
00063 
00064 struct textbox
00065 {
00066     UT_uint32  lid;
00067     UT_uint32  ref_pos;
00068     UT_uint32  txt_pos;
00069     UT_uint32  txt_len;
00070     UT_sint32  iLeft;
00071     UT_sint32  iWidth;
00072     UT_sint32  iTop;
00073     UT_sint32  iHeight;
00074     UT_sint32  iPosType;
00075     UT_sint32  iBorderWidth;
00076 };
00077 
00078 struct textboxPos
00079 {
00080     UT_uint32 lid;
00081     pf_Frag * endFrame;
00082 };
00083 
00084 typedef enum
00085     {
00086         HF_HeaderFirst = 0,
00087         HF_FooterFirst,
00088         HF_HeaderOdd,
00089         HF_FooterOdd,
00090         HF_HeaderEven,
00091         HF_FooterEven,
00092         HF_Unsupported
00093     }_headerTypes;
00094 
00095 
00096 struct header
00097 {
00098     _headerTypes type;
00099     UT_uint32    pos;
00100     UT_uint32    len;
00101     UT_uint32    pid;
00102 
00103     struct _d
00104     {
00105         UT_Vector hdr;
00106         UT_Vector frag;
00107     }d;
00108 };
00109 
00110 class ABI_EXPORT MsColSpan
00111 {
00112 public:
00113     MsColSpan(void):iLeft(0),iRight(0),width(0){}
00114     virtual ~MsColSpan(void) {}
00115     UT_sint32 iLeft;
00116     UT_sint32 iRight;
00117     UT_sint32 width;
00118 };
00119 
00120 class ABI_EXPORT emObject
00121 {
00122 public:
00123     UT_String props1;
00124     UT_String props2;
00125     PTObjectType objType;
00126 };
00127 
00128 //
00129 // The Sniffer/Manager/Creator Class for DOC
00130 //
00131 class ABI_EXPORT IE_Imp_MsWord_97_Sniffer : public IE_ImpSniffer
00132 {
00133     friend class IE_Imp;
00134 
00135 public:
00136     IE_Imp_MsWord_97_Sniffer();
00137     virtual ~IE_Imp_MsWord_97_Sniffer() {}
00138 
00139     virtual const IE_SuffixConfidence * getSuffixConfidence ();
00140     virtual const IE_MimeConfidence * getMimeConfidence ();
00141     virtual UT_Confidence_t recognizeContents (const char * szBuf,
00142                                     UT_uint32 iNumbytes);
00143     virtual UT_Confidence_t recognizeContents (GsfInput * input);
00144     virtual bool getDlgLabels (const char ** szDesc,
00145                                const char ** szSuffixList,
00146                                IEFileType * ft);
00147     virtual UT_Error constructImporter (PD_Document * pDocument,
00148                                         IE_Imp ** ppie);
00149 };
00150 
00151 // how many chars to buffer in our fields implementation
00152 #define FLD_SIZE 40000
00153 
00154 //
00155 // The import class for the MSFT Word DOC format
00156 //
00157 class ABI_EXPORT IE_Imp_MsWord_97 : public IE_Imp
00158 {
00159 public:
00160     IE_Imp_MsWord_97 (PD_Document * pDocument);
00161     ~IE_Imp_MsWord_97 ();
00162 
00163     virtual bool        supportsLoadStylesOnly() {return true;}
00164 
00165     // wv's callbacks need access to these, so they have to be public
00166     int             _specCharProc (wvParseStruct *ps, UT_uint16 eachchar,
00167                                    CHP * achp);
00168     int             _charProc (wvParseStruct *ps, UT_uint16 eachchar,
00169                                UT_Byte chartype,  UT_uint16 lid);
00170     int             _docProc  (wvParseStruct *ps, UT_uint32 tag);
00171     int             _eleProc  (wvParseStruct *ps, UT_uint32 tag,
00172                                void *props, int dirty);
00173 
00174 protected:
00175 
00176     UT_Error            _loadFile (GsfInput * input);
00177 
00178 private:
00179 
00180     void       _handleMetaData(wvParseStruct *ps);
00181 
00182     int        _beginSect (wvParseStruct *ps, UT_uint32 tag,
00183                            void *props, int dirty);
00184     int        _endSect (wvParseStruct *ps, UT_uint32 tag,
00185                          void *props, int dirty);
00186 
00187     int        _beginPara (wvParseStruct *ps, UT_uint32 tag,
00188                            void *props, int dirty);
00189     int        _endPara (wvParseStruct *ps, UT_uint32 tag,
00190                          void *props, int dirty);
00191 
00192     int        _beginChar (wvParseStruct *ps, UT_uint32 tag,
00193                            void *props, int dirty);
00194     int        _endChar (wvParseStruct *ps, UT_uint32 tag,
00195                          void *props, int dirty);
00196     int        _beginComment (wvParseStruct *ps, UT_uint32 tag,
00197                            void *props, int dirty);
00198     int        _endComment (wvParseStruct *ps, UT_uint32 tag,
00199                          void *props, int dirty);
00200     gchar * _getBookmarkName(const wvParseStruct * ps, UT_uint32 pos);
00201     bool       _insertBookmarkIfAppropriate(UT_uint32 iPos);
00202     bool       _insertBookmark(bookmark * bm);
00203     UT_Error   _handleImage (Blip *, long width, long height, long cropt, long cropb, long cropl, long cropr);
00204     UT_Error   _handlePositionedImage (Blip *, UT_String & sImageName);
00205     bool       _handleCommandField (char *command);
00206     bool       _handleFieldEnd (char * command, UT_uint32 iPos);
00207     int        _fieldProc (wvParseStruct *ps, UT_uint16 eachchar,
00208                            UT_Byte chartype, UT_uint16 lid);
00209     void       _appendChar (UT_UCSChar ch);
00210     void       _flush ();
00211 
00212     void        _table_open();
00213     void        _table_close(const wvParseStruct *ps, const PAP *apap);
00214     void        _row_open(const wvParseStruct *ps);
00215     void        _row_close();
00216     void        _cell_open(const wvParseStruct *ps, const PAP *apap);
00217     void        _cell_close();
00218     void        _handleStyleSheet(const wvParseStruct *ps);
00219     void        _generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps);
00220     void        _generateParaProps(UT_String &s, const PAP * apap, wvParseStruct *ps);
00221     int         _handleBookmarks(const wvParseStruct *ps);
00222     void        _handleNotes(const wvParseStruct *ps);
00223     void        _handleTextBoxes(const wvParseStruct *ps);
00224     bool        _insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c);
00225     bool        _insertFootnote(const footnote * f, UT_UCS4Char c);
00226     bool        _insertEndnote(const footnote * f, UT_UCS4Char c);
00227     bool        _handleNotesText(UT_uint32 iPos);
00228     bool        _handleTextboxesText(UT_uint32 iPos);
00229     bool        _findNextTextboxSection();
00230     bool        _findNextFNoteSection();
00231     bool        _findNextENoteSection();
00232     bool        _shouldUseInsert()const;
00233     bool        _ensureInBlock();
00234     bool        _appendStrux(PTStruxType pts, const PP_PropertyVector & attributes);
00235     bool        _appendObject(PTObjectType pto, const PP_PropertyVector & attributes);
00236     bool        _appendSpan(const UT_UCSChar * p, UT_uint32 length);
00237     bool        _appendStruxHdrFtr(PTStruxType pts, const PP_PropertyVector & attributes);
00238     bool        _appendObjectHdrFtr(PTObjectType pto, const PP_PropertyVector & attributes);
00239     bool        _appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length);
00240     bool        _appendFmt(const PP_PropertyVector & attributes);
00241     void        _handleHeaders(const wvParseStruct *ps);
00242     bool        _handleHeadersText(UT_uint32 iPos, bool bDoBlockIns);
00243     bool        _insertHeaderSection(bool bDoBlockIns);
00244     bool        _build_ColumnWidths(UT_NumberVector & colWidths);
00245     bool        _isVectorFull(UT_NumberVector & vec);
00246     void        setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val);
00247     bool        findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight);
00248     bool        _ignorePosition(UT_uint32 pos);
00249 
00250     bool        _isTOCsupported(field *f);
00251     bool        _insertTOC(field *f);
00252 
00253 
00254     UT_UCS4String       m_pTextRun;
00255     //UT_uint32         m_iImageCount;
00256     UT_uint32           m_nSections;
00257     bool                m_bSetPageSize;
00258 #if 0
00259     UT_UCS2Char m_command [FLD_SIZE];
00260     UT_UCS2Char m_argument [FLD_SIZE];
00261     UT_UCS2Char *m_fieldWhich;
00262     UT_sint32   m_fieldI;
00263     char *      m_fieldC;
00264     UT_sint32   m_fieldRet;
00265     UT_sint32   m_fieldDepth;
00266 #else
00267     UT_Stack    m_stackField;
00268 #endif
00269     //char *      m_fieldA;
00270     bool       m_bIsLower;
00271 
00272     bool m_bInSect;
00273     bool m_bInPara;
00274     bool m_bLTRCharContext;
00275     bool m_bLTRParaContext;
00276     bool m_bBidiMode;
00277     bool m_bInLink;
00278     bookmark * m_pBookmarks;
00279     UT_uint32  m_iBookmarksCount;
00280     footnote * m_pFootnotes;
00281     UT_uint32  m_iFootnotesCount;
00282     footnote * m_pEndnotes;
00283     UT_uint32  m_iEndnotesCount;
00284     textbox *  m_pTextboxes;
00285     UT_sint32  m_iTextboxCount;
00286     UT_Vector  m_vLists;
00287     UT_uint32  m_iListIdIncrement[9];
00288     UT_uint32  m_iMSWordListId;
00289 
00290     bool m_bEncounteredRevision;
00291     bool        m_bInTable;                     // are we in a table ?
00292     int         m_iRowsRemaining;               // number of rows left to process
00293     int         m_iCellsRemaining;              // number of cells left to process in the current row
00294     int         m_iCurrentRow;                  //
00295     int         m_iCurrentCell;                 //
00296     bool        m_bRowOpen;                     // row strux open ?
00297     bool        m_bCellOpen;                    // cell strux open ?
00298     UT_NumberVector m_vecColumnSpansForCurrentRow;  // placeholder for horizontal cell spans
00299     UT_GenericVector<MsColSpan *>   m_vecColumnWidths;
00300     UT_GenericVector<emObject*>   m_vecEmObjects;               // Objects between cell
00301                                               // struxes
00302     UT_NumberVector m_vecColumnPositions;
00303     UT_String   m_charProps;
00304     UT_String   m_charRevs;
00305     UT_String   m_charStyle;
00306     UT_String   m_paraProps;
00307     UT_String   m_paraStyle;
00308 
00309     UT_uint32   m_iFootnotesStart;
00310     UT_uint32   m_iFootnotesEnd;
00311     UT_uint32   m_iEndnotesStart;
00312     UT_uint32   m_iEndnotesEnd;
00313     UT_uint32   m_iNextFNote;
00314     UT_uint32   m_iNextENote;
00315     bool        m_bInFNotes;
00316     bool        m_bInENotes;
00317     pf_Frag *   m_pNotesEndSection;
00318     header *    m_pHeaders;
00319     UT_uint32   m_iHeadersCount;
00320     UT_uint32   m_iHeadersStart;
00321     UT_uint32   m_iHeadersEnd;
00322     UT_uint32   m_iCurrentHeader;
00323     bool        m_bInHeaders;
00324     UT_uint32   m_iCurrentSectId;
00325     UT_uint32   m_iAnnotationsStart;
00326     UT_uint32   m_iAnnotationsEnd;
00327     UT_uint32   m_iMacrosStart;
00328     UT_uint32   m_iMacrosEnd;
00329     UT_uint32   m_iTextStart;
00330     UT_uint32   m_iTextEnd;
00331     bool        m_bPageBreakPending;
00332     bool        m_bLineBreakPending;
00333     UT_NumberVector m_vListIdMap;
00334     bool        m_bSymbolFont;
00335     UT_Dimension m_dim;
00336     UT_sint32    m_iLeft;
00337     UT_sint32    m_iRight;
00338     UT_uint32    m_iTextboxesStart;
00339     UT_uint32    m_iTextboxesEnd;
00340     UT_sint32    m_iNextTextbox;
00341     UT_uint32    m_iPrevHeaderPosition;
00342     bool         m_bEvenOddHeaders;
00343 
00344     UT_sint32    m_bInTOC;
00345     bool         m_bTOCsupported;
00346     bool         m_bInTextboxes;
00347     pf_Frag *    m_pTextboxEndSection;
00348     UT_GenericVector<textboxPos *> m_vecTextboxPos;
00349     UT_sint32    m_iLeftCellPos;
00350     UT_uint32    m_iLastAppendedHeader;
00351 };
00352 
00353 #endif /* IE_IMP_MSWORD_H */

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1