• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ie_imp_Text.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 /* AbiWord
00004  * Copyright (C) 1998 AbiSource, Inc.
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License
00008  * as published by the Free Software Foundation; either version 2
00009  * of the License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00019  * 02110-1301 USA.
00020  */
00021 
00022 
00023 #ifndef IE_IMP_TEXT_H
00024 #define IE_IMP_TEXT_H
00025 
00026 #include <stdio.h>
00027 #include "ie_imp.h"
00028 #include "ut_mbtowc.h"
00029 #include "pd_Document.h"
00030 
00031 class pf_Frag_Strux;
00032 
00033 // Stream class can be File or Clipboard
00034 
00035 class ABI_EXPORT ImportStream
00036 {
00037  public:
00038     ImportStream();
00039     virtual ~ImportStream();
00040     bool init(const char *szEncoding);
00041     bool getChar(UT_UCSChar &b);
00042     UT_UCSChar peekChar() { return m_ucsLookAhead; }
00043  protected:
00044     virtual bool _getByte(unsigned char &b) = 0;
00045     virtual bool getRawChar(UT_UCSChar &b);
00046 
00047     bool _get_eof () const { return m_bEOF; }
00048     void _set_eof (bool b) { m_bEOF = b; }
00049     UT_UCSChar _lookAhead () const { return m_ucsLookAhead; }
00050     void _lookAhead ( UT_UCSChar c ) { m_ucsLookAhead = c; }
00051 
00052  private:
00053     UT_UCS4_mbtowc m_Mbtowc;
00054     UT_UCSChar m_ucsLookAhead;
00055     bool m_bEOF;
00056     bool m_bRaw;
00057 };
00058 
00059 // File stream class
00060 
00061 class ABI_EXPORT ImportStreamFile : public ImportStream
00062 {
00063 public:
00064     ImportStreamFile(GsfInput *pFile);
00065     ~ImportStreamFile();
00066     bool getChar();
00067 protected:
00068     bool _getByte(unsigned char &b);
00069 private:
00070     GsfInput *m_pFile;
00071 };
00072 
00073 // Clipboard stream class
00074 
00075 class ABI_EXPORT ImportStreamClipboard : public ImportStream
00076 {
00077 public:
00078     ImportStreamClipboard(const unsigned char *pClipboard, UT_uint32 iLength);
00079     ~ImportStreamClipboard();
00080     //  bool getChar();
00081 protected:
00082     bool _getByte(unsigned char &b);
00083 private:
00084     const unsigned char *m_p;
00085     const unsigned char *m_pEnd;
00086 };
00087 
00088 // The importer/reader for Plain Text Files.
00089 
00090 class ABI_EXPORT IE_Imp_Text_Sniffer : public IE_ImpSniffer
00091 {
00092     friend class IE_Imp;
00093     friend class IE_Imp_Text;
00094 
00095 public:
00096     IE_Imp_Text_Sniffer();
00097     virtual ~IE_Imp_Text_Sniffer();
00098 
00099     virtual const IE_SuffixConfidence * getSuffixConfidence ();
00100     virtual const IE_MimeConfidence * getMimeConfidence ();
00101     virtual UT_Confidence_t recognizeContents (const char * szBuf,
00102                                     UT_uint32 iNumbytes);
00103     const char * recognizeContentsType (const char * szBuf,
00104                                     UT_uint32 iNumbytes);
00105     virtual bool getDlgLabels (const char ** szDesc,
00106                                const char ** szSuffixList,
00107                                IEFileType * ft);
00108     virtual UT_Error constructImporter (PD_Document * pDocument,
00109                                         IE_Imp ** ppie);
00110 
00111 protected:
00112     enum UCS2_Endian { UE_BigEnd = -1, UE_NotUCS = 0, UE_LittleEnd };
00113 
00114     static bool _recognizeUTF8 (const char * szBuf,
00115                                 UT_uint32 iNumbytes);
00116     static UCS2_Endian _recognizeUCS2 (const char * szBuf,
00117                                        UT_uint32 iNumbytes,
00118                                        bool bDeep);
00119 };
00120 
00121 // The importer/reader for Plain Text Files with selectable encoding.
00122 
00123 class ABI_EXPORT IE_Imp_EncodedText_Sniffer : public IE_ImpSniffer
00124 {
00125     friend class IE_Imp;
00126     friend class IE_Imp_Text;
00127 
00128 public:
00129     IE_Imp_EncodedText_Sniffer();
00130     virtual ~IE_Imp_EncodedText_Sniffer();
00131 
00132     virtual const IE_SuffixConfidence * getSuffixConfidence ();
00133     virtual const IE_MimeConfidence * getMimeConfidence () { return NULL; }
00134 
00135     virtual UT_Confidence_t recognizeContents (const char * szBuf,
00136                         UT_uint32 iNumbytes);
00137     virtual bool getDlgLabels (const char ** szDesc,
00138                                const char ** szSuffixList,
00139                                IEFileType * ft);
00140     virtual UT_Error constructImporter (PD_Document * pDocument,
00141                                         IE_Imp ** ppie);
00142 
00143 protected:
00144 };
00145 
00146 class ABI_EXPORT IE_Imp_Text : public IE_Imp
00147 {
00148 public:
00149     IE_Imp_Text(PD_Document * pDocument, bool bEncoded=false);
00150     IE_Imp_Text(PD_Document * pDocument, const char * encoding);
00151     virtual ~IE_Imp_Text();
00152 
00153     virtual bool        pasteFromBuffer(PD_DocumentRange * pDocRange,
00154                                         const unsigned char * pData, UT_uint32 lenData, const char * szEncoding = 0);
00155 
00156 protected:
00157     virtual UT_Error    _loadFile(GsfInput * fp);
00158     UT_Error            _recognizeEncoding(GsfInput * fp);
00159     UT_Error            _recognizeEncoding(const char *szBuf, UT_uint32 iNumbytes);
00160     virtual UT_Error    _constructStream(ImportStream *& pStream, GsfInput * fp);
00161     UT_Error            _writeHeader(GsfInput * fp);
00162     UT_Error            _parseStream(ImportStream * pStream);
00163     bool                _doEncodingDialog(const char *szEncoding);
00164     void                _setEncoding(const char *szEncoding);
00165 
00166     bool _insertBlock ();
00167     bool _insertSpan (UT_GrowBuf &b);
00168 
00169  private:
00170     const char *    m_szEncoding;
00171     bool m_bExplicitlySetEncoding;
00172     bool            m_bIsEncoded;
00173     bool            m_bIs16Bit;
00174     bool            m_bUseBOM;
00175     bool            m_bBigEndian;
00176     bool            m_bBlockDirectionPending;
00177     bool            m_bFirstBlockData;
00178     pf_Frag_Strux * m_pBlock;
00179 };
00180 
00181 #endif /* IE_IMP_TEXT_H */

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1