00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef IE_IMP_MSWORD_H
00025 #define IE_IMP_MSWORD_H
00026
00027
00028
00029 #include "ie_imp.h"
00030 #include "ut_string_class.h"
00031 #include "fl_DocLayout.h"
00032 #include "fl_AutoLists.h"
00033 #include "ut_units.h"
00034
00035
00036
00037 typedef struct _wvParseStruct wvParseStruct;
00038 typedef struct _Blip Blip;
00039 typedef struct _CHP CHP;
00040 typedef struct _PAP PAP;
00041 class PD_Document;
00042 class pf_Frag;
00043 class UT_Stack;
00044
00045 struct field;
00046
00047 struct bookmark
00048 {
00049 gchar * name;
00050 UT_uint32 pos;
00051 bool start;
00052 };
00053
00054 struct footnote
00055 {
00056 UT_uint32 type;
00057 UT_uint32 ref_pos;
00058 UT_uint32 txt_pos;
00059 UT_uint32 txt_len;
00060 UT_uint32 pid;
00061 };
00062
00063
00064 struct textbox
00065 {
00066 UT_uint32 lid;
00067 UT_uint32 ref_pos;
00068 UT_uint32 txt_pos;
00069 UT_uint32 txt_len;
00070 UT_sint32 iLeft;
00071 UT_sint32 iWidth;
00072 UT_sint32 iTop;
00073 UT_sint32 iHeight;
00074 UT_sint32 iPosType;
00075 UT_sint32 iBorderWidth;
00076 };
00077
00078 struct textboxPos
00079 {
00080 UT_uint32 lid;
00081 pf_Frag * endFrame;
00082 };
00083
00084 typedef enum
00085 {
00086 HF_HeaderFirst = 0,
00087 HF_FooterFirst,
00088 HF_HeaderOdd,
00089 HF_FooterOdd,
00090 HF_HeaderEven,
00091 HF_FooterEven,
00092 HF_Unsupported
00093 }_headerTypes;
00094
00095
00096 struct header
00097 {
00098 _headerTypes type;
00099 UT_uint32 pos;
00100 UT_uint32 len;
00101 UT_uint32 pid;
00102
00103 struct _d
00104 {
00105 UT_Vector hdr;
00106 UT_Vector frag;
00107 }d;
00108 };
00109
00110 class ABI_EXPORT MsColSpan
00111 {
00112 public:
00113 MsColSpan(void):iLeft(0),iRight(0),width(0){}
00114 virtual ~MsColSpan(void) {}
00115 UT_sint32 iLeft;
00116 UT_sint32 iRight;
00117 UT_sint32 width;
00118 };
00119
00120 class ABI_EXPORT emObject
00121 {
00122 public:
00123 UT_String props1;
00124 UT_String props2;
00125 PTObjectType objType;
00126 };
00127
00128
00129
00130
00131 class ABI_EXPORT IE_Imp_MsWord_97_Sniffer : public IE_ImpSniffer
00132 {
00133 friend class IE_Imp;
00134
00135 public:
00136 IE_Imp_MsWord_97_Sniffer();
00137 virtual ~IE_Imp_MsWord_97_Sniffer() {}
00138
00139 virtual const IE_SuffixConfidence * getSuffixConfidence ();
00140 virtual const IE_MimeConfidence * getMimeConfidence ();
00141 virtual UT_Confidence_t recognizeContents (const char * szBuf,
00142 UT_uint32 iNumbytes);
00143 virtual UT_Confidence_t recognizeContents (GsfInput * input);
00144 virtual bool getDlgLabels (const char ** szDesc,
00145 const char ** szSuffixList,
00146 IEFileType * ft);
00147 virtual UT_Error constructImporter (PD_Document * pDocument,
00148 IE_Imp ** ppie);
00149 };
00150
00151
00152 #define FLD_SIZE 40000
00153
00154
00155
00156
00157 class ABI_EXPORT IE_Imp_MsWord_97 : public IE_Imp
00158 {
00159 public:
00160 IE_Imp_MsWord_97 (PD_Document * pDocument);
00161 ~IE_Imp_MsWord_97 ();
00162
00163 virtual bool supportsLoadStylesOnly() {return true;}
00164
00165
00166 int _specCharProc (wvParseStruct *ps, UT_uint16 eachchar,
00167 CHP * achp);
00168 int _charProc (wvParseStruct *ps, UT_uint16 eachchar,
00169 UT_Byte chartype, UT_uint16 lid);
00170 int _docProc (wvParseStruct *ps, UT_uint32 tag);
00171 int _eleProc (wvParseStruct *ps, UT_uint32 tag,
00172 void *props, int dirty);
00173
00174 protected:
00175
00176 UT_Error _loadFile (GsfInput * input);
00177
00178 private:
00179
00180 void _handleMetaData(wvParseStruct *ps);
00181
00182 int _beginSect (wvParseStruct *ps, UT_uint32 tag,
00183 void *props, int dirty);
00184 int _endSect (wvParseStruct *ps, UT_uint32 tag,
00185 void *props, int dirty);
00186
00187 int _beginPara (wvParseStruct *ps, UT_uint32 tag,
00188 void *props, int dirty);
00189 int _endPara (wvParseStruct *ps, UT_uint32 tag,
00190 void *props, int dirty);
00191
00192 int _beginChar (wvParseStruct *ps, UT_uint32 tag,
00193 void *props, int dirty);
00194 int _endChar (wvParseStruct *ps, UT_uint32 tag,
00195 void *props, int dirty);
00196 int _beginComment (wvParseStruct *ps, UT_uint32 tag,
00197 void *props, int dirty);
00198 int _endComment (wvParseStruct *ps, UT_uint32 tag,
00199 void *props, int dirty);
00200 gchar * _getBookmarkName(const wvParseStruct * ps, UT_uint32 pos);
00201 bool _insertBookmarkIfAppropriate(UT_uint32 iPos);
00202 bool _insertBookmark(bookmark * bm);
00203 UT_Error _handleImage (Blip *, long width, long height, long cropt, long cropb, long cropl, long cropr);
00204 UT_Error _handlePositionedImage (Blip *, UT_String & sImageName);
00205 bool _handleCommandField (char *command);
00206 bool _handleFieldEnd (char * command, UT_uint32 iPos);
00207 int _fieldProc (wvParseStruct *ps, UT_uint16 eachchar,
00208 UT_Byte chartype, UT_uint16 lid);
00209 void _appendChar (UT_UCSChar ch);
00210 void _flush ();
00211
00212 void _table_open();
00213 void _table_close(const wvParseStruct *ps, const PAP *apap);
00214 void _row_open(const wvParseStruct *ps);
00215 void _row_close();
00216 void _cell_open(const wvParseStruct *ps, const PAP *apap);
00217 void _cell_close();
00218 void _handleStyleSheet(const wvParseStruct *ps);
00219 void _generateCharProps(UT_String &s, const CHP * achp, wvParseStruct *ps);
00220 void _generateParaProps(UT_String &s, const PAP * apap, wvParseStruct *ps);
00221 int _handleBookmarks(const wvParseStruct *ps);
00222 void _handleNotes(const wvParseStruct *ps);
00223 void _handleTextBoxes(const wvParseStruct *ps);
00224 bool _insertNoteIfAppropriate(UT_uint32 iDocPosition,UT_UCS4Char c);
00225 bool _insertFootnote(const footnote * f, UT_UCS4Char c);
00226 bool _insertEndnote(const footnote * f, UT_UCS4Char c);
00227 bool _handleNotesText(UT_uint32 iPos);
00228 bool _handleTextboxesText(UT_uint32 iPos);
00229 bool _findNextTextboxSection();
00230 bool _findNextFNoteSection();
00231 bool _findNextENoteSection();
00232 bool _shouldUseInsert()const;
00233 bool _ensureInBlock();
00234 bool _appendStrux(PTStruxType pts, const PP_PropertyVector & attributes);
00235 bool _appendObject(PTObjectType pto, const PP_PropertyVector & attributes);
00236 bool _appendSpan(const UT_UCSChar * p, UT_uint32 length);
00237 bool _appendStruxHdrFtr(PTStruxType pts, const PP_PropertyVector & attributes);
00238 bool _appendObjectHdrFtr(PTObjectType pto, const PP_PropertyVector & attributes);
00239 bool _appendSpanHdrFtr(const UT_UCSChar * p, UT_uint32 length);
00240 bool _appendFmt(const PP_PropertyVector & attributes);
00241 void _handleHeaders(const wvParseStruct *ps);
00242 bool _handleHeadersText(UT_uint32 iPos, bool bDoBlockIns);
00243 bool _insertHeaderSection(bool bDoBlockIns);
00244 bool _build_ColumnWidths(UT_NumberVector & colWidths);
00245 bool _isVectorFull(UT_NumberVector & vec);
00246 void setNumberVector(UT_NumberVector & vec, UT_sint32 i, UT_sint32 val);
00247 bool findMatchSpan(UT_sint32 iLeft,UT_sint32 iRight);
00248 bool _ignorePosition(UT_uint32 pos);
00249
00250 bool _isTOCsupported(field *f);
00251 bool _insertTOC(field *f);
00252
00253
00254 UT_UCS4String m_pTextRun;
00255
00256 UT_uint32 m_nSections;
00257 bool m_bSetPageSize;
00258 #if 0
00259 UT_UCS2Char m_command [FLD_SIZE];
00260 UT_UCS2Char m_argument [FLD_SIZE];
00261 UT_UCS2Char *m_fieldWhich;
00262 UT_sint32 m_fieldI;
00263 char * m_fieldC;
00264 UT_sint32 m_fieldRet;
00265 UT_sint32 m_fieldDepth;
00266 #else
00267 UT_Stack m_stackField;
00268 #endif
00269
00270 bool m_bIsLower;
00271
00272 bool m_bInSect;
00273 bool m_bInPara;
00274 bool m_bLTRCharContext;
00275 bool m_bLTRParaContext;
00276 bool m_bBidiMode;
00277 bool m_bInLink;
00278 bookmark * m_pBookmarks;
00279 UT_uint32 m_iBookmarksCount;
00280 footnote * m_pFootnotes;
00281 UT_uint32 m_iFootnotesCount;
00282 footnote * m_pEndnotes;
00283 UT_uint32 m_iEndnotesCount;
00284 textbox * m_pTextboxes;
00285 UT_sint32 m_iTextboxCount;
00286 UT_Vector m_vLists;
00287 UT_uint32 m_iListIdIncrement[9];
00288 UT_uint32 m_iMSWordListId;
00289
00290 bool m_bEncounteredRevision;
00291 bool m_bInTable;
00292 int m_iRowsRemaining;
00293 int m_iCellsRemaining;
00294 int m_iCurrentRow;
00295 int m_iCurrentCell;
00296 bool m_bRowOpen;
00297 bool m_bCellOpen;
00298 UT_NumberVector m_vecColumnSpansForCurrentRow;
00299 UT_GenericVector<MsColSpan *> m_vecColumnWidths;
00300 UT_GenericVector<emObject*> m_vecEmObjects;
00301
00302 UT_NumberVector m_vecColumnPositions;
00303 UT_String m_charProps;
00304 UT_String m_charRevs;
00305 UT_String m_charStyle;
00306 UT_String m_paraProps;
00307 UT_String m_paraStyle;
00308
00309 UT_uint32 m_iFootnotesStart;
00310 UT_uint32 m_iFootnotesEnd;
00311 UT_uint32 m_iEndnotesStart;
00312 UT_uint32 m_iEndnotesEnd;
00313 UT_uint32 m_iNextFNote;
00314 UT_uint32 m_iNextENote;
00315 bool m_bInFNotes;
00316 bool m_bInENotes;
00317 pf_Frag * m_pNotesEndSection;
00318 header * m_pHeaders;
00319 UT_uint32 m_iHeadersCount;
00320 UT_uint32 m_iHeadersStart;
00321 UT_uint32 m_iHeadersEnd;
00322 UT_uint32 m_iCurrentHeader;
00323 bool m_bInHeaders;
00324 UT_uint32 m_iCurrentSectId;
00325 UT_uint32 m_iAnnotationsStart;
00326 UT_uint32 m_iAnnotationsEnd;
00327 UT_uint32 m_iMacrosStart;
00328 UT_uint32 m_iMacrosEnd;
00329 UT_uint32 m_iTextStart;
00330 UT_uint32 m_iTextEnd;
00331 bool m_bPageBreakPending;
00332 bool m_bLineBreakPending;
00333 UT_NumberVector m_vListIdMap;
00334 bool m_bSymbolFont;
00335 UT_Dimension m_dim;
00336 UT_sint32 m_iLeft;
00337 UT_sint32 m_iRight;
00338 UT_uint32 m_iTextboxesStart;
00339 UT_uint32 m_iTextboxesEnd;
00340 UT_sint32 m_iNextTextbox;
00341 UT_uint32 m_iPrevHeaderPosition;
00342 bool m_bEvenOddHeaders;
00343
00344 UT_sint32 m_bInTOC;
00345 bool m_bTOCsupported;
00346 bool m_bInTextboxes;
00347 pf_Frag * m_pTextboxEndSection;
00348 UT_GenericVector<textboxPos *> m_vecTextboxPos;
00349 UT_sint32 m_iLeftCellPos;
00350 UT_uint32 m_iLastAppendedHeader;
00351 };
00352
00353 #endif