• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ut_TextIterator.h

Go to the documentation of this file.
00001 /* AbiWord
00002  * Copyright (C) 2003 Tomas Frydrych <tomas@frydrych.uklinux.net>
00003  *
00004  * This program is free software; you can redistribute it and/or
00005  * modify it under the terms of the GNU General Public License
00006  * as published by the Free Software Foundation; either version 2
00007  * of the License, or (at your option) any later version.
00008  *
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software
00016  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00017  * 02110-1301 USA.
00018  */
00019 
00020 #ifndef UT_ITERATOR_H
00021 #define UT_ITERATOR_H
00022 
00024 //
00025 //  UT_TextIterator class is an abstraction of a text iterator, making
00026 //  it possible to iterate sequentially over textual data without
00027 //  having to know anything about how that data might be stored.
00028 //
00029 //  This class is pure virtual, its sole purpose is to define generic
00030 //  interface so that we can pass a generic type into and out of
00031 //  functions. For example of implementation see pd_Iterator.h/cpp
00032 //
00033 //  Notes on imlementation
00034 //  ----------------------
00035 //  Any derrived classes should implement the individual functions to
00036 //  conform to the behaviour outlined in the comments in the class
00037 //  definion below.
00038 //
00039 //  In addtion, the actual iterator implementations should provide a
00040 //  mechanism allowing to restrict upper and lower bounds (either at
00041 //  construction or subsequently), so that when passing iterators into
00042 //  functions it is not necessary to pass with them a length
00043 //  parameter. For example, PD_StruxIterator can provide access to the
00044 //  entire document from the start of the strux onwards; we might want
00045 //  to restrict this to the part that only belongs to a particular
00046 //  TextRun, etc.
00047 //
00048 //  Notes on use
00049 //  ------------
00050 //  When passing iterators into functions, the iterator should be set
00051 //  at the position where processing is to start, i.e., the user is
00052 //  not expected to reposition the iterator before commencing
00053 //  processing. Also, the upper boundary should be restricted
00054 //  appropriately to indicate where the processing is to stop; this is
00055 //  preferable to passing an extra length parameter.
00056 //
00057 //  Tomas, November, 2003
00058 //
00059 
00061 // the follwoing are values that the getChar() function can fall back
00062 // on when things are not entirely right ...
00063 //
00064 //    UT_IT_NOT_CHARACTER: when at the current position we have
00065 //                         something else than text (image, etc)
00066 //
00067 //    UT_IT_ERROR: when things are really not going as they should
00068 //                 NB: this is just to have something to fall back on,
00069 //                 not an error reporting mechanism; for that see
00070 //                 getStatus() below
00071 #define UT_IT_NOT_CHARACTER UCS_SPACE
00072 #define UT_IT_ERROR 0xffffffff
00073 
00074 #include "ut_types.h"
00075 
00076 class PD_Document;
00077 class pt_PieceTable;
00078 class pf_Frag;
00079 
00081 //
00082 // The following enum defines possible iterator states:
00083 //
00084 //     OK: need I say more?
00085 //
00086 //     OutOfBounds: last positioning operation took the iterator
00087 //                  out of bounds; this error state is recoverable
00088 //                  by using the indexing operator [], or calling
00089 //                  setPosition() but the use of relative increment
00090 //                  operators (++, --, +=, -=) in this state will
00091 //                  lead to undefined results.
00092 //
00093 //     Error: any other error; this state is irrecoverable, clean up
00094 //            and go home
00095 //
00096 enum UTIterStatus
00097 {
00098     UTIter_OK,
00099     UTIter_OutOfBounds,
00100     UTIter_Error
00101 };
00102 
00103 
00104 class ABI_EXPORT UT_TextIterator
00105 {
00106   public:
00107     virtual ~UT_TextIterator() {}
00108 
00110     // data accessor; retrieves character at present position
00111     //
00112     // NB: I.getChar() is functionally equivalent to I[getPosition()]
00113     //
00114     virtual UT_UCS4Char getChar() = 0;
00115 
00117     // positon accessor; returns a value representing current postion
00118     //
00119     // NB: The position can be expressed in an arbitrary coordinate
00120     // system, typically one that makes sense to the actual
00121     // implementation; when an iterator is passed into a function, the
00122     // starting position might not be 0.
00123     //
00124     virtual UT_uint32   getPosition() const = 0;
00125 
00127     // moves iterator to position pos
00128     //
00129     virtual void setPosition(UT_uint32 pos) = 0;
00130 
00132     // set and retrieve upper bounds
00133     //
00134     virtual void      setUpperLimit(UT_uint32 maxpos) = 0;
00135     virtual UT_uint32 getUpperLimit() const = 0;
00136 
00138     // returns the current state of the iterator (see definition of
00139     // UTIterStatus above)
00140     //
00141     virtual UTIterStatus getStatus() const = 0;
00142 
00144     // finds first occurence of given string, looking in direction
00145     // indicated by bForward
00146     // failure is indicated through getStatus() == UTIter_OutOfBounds;
00147     //
00148     virtual UT_uint32 find(UT_UCS4Char * what, UT_uint32 iLen, bool bForward = true) = 0;
00149     virtual UT_uint32 find(UT_TextIterator & text, UT_uint32 iLen, bool bForward = true) = 0;
00150 
00152     // makes a copy of the iterator in its present state
00153     //
00154     virtual UT_TextIterator * makeCopy() const = 0;
00155 
00157     // increment operators
00158     //
00159     // NB: We intentionally define prefix operators only, as post-fix
00160     // versions provide no real advantage, and are less efficient
00161     //
00162     virtual UT_TextIterator & operator ++ () = 0;
00163     virtual UT_TextIterator & operator -- () = 0;
00164     virtual UT_TextIterator & operator += (UT_sint32 i) = 0;
00165     virtual UT_TextIterator & operator -= (UT_sint32 i) = 0;
00166 
00168     // subscript operator []; repostions iterator and returns
00169     // character at new postion
00170     //
00171     // NB(1): the operator physically advances the iterator to positon
00172     // pos before returning, i.e.,
00173     //
00174     //     UT_UCS4Char c = I[p];
00175     //
00176     // and
00177     //
00178     //     I.setPosition(p);
00179     //     UT_UCS4Char c = I.getChar();
00180     //
00181     // are exactly equivalent, leaving the iterator in the same state
00182     //
00183     // NB(2): if passed iterator as an argumenent in a function, you
00184     // need to know the initial position to use this operator for
00185     // processing which is relative to the state of iterator when
00186     // passed to you, i.e., f1() and f2() below do exactly the same
00187     // thing, f3() does not.
00188     //
00189     // function f1(UT_TextIterator & I, UT_uint32 len)
00190     // {
00191     //    UT_uint32 pos = I.getPosition();
00192     //
00193     //    for(UT_uint32 i = pos; i < len + pos; i++)
00194     //    {
00195     //       UT_UCS4Char c = text[i];
00196     //       // do something with c ...
00197     //    }
00198     // }
00199     //
00200     // function f2(UT_TextIterator & I, UT_uint32 len)
00201     // {
00202     //    for(UT_uint32 i = 0; i < len; ++i, ++I)
00203     //    {
00204     //       UT_UCS4Char c = text.getChar();
00205     //       // do something with c ...
00206     //    }
00207     // }
00208     //
00209     // In contrast, f3() will start at the leftmost edge of the
00210     // theoretical iterator range, which is probably not what you
00211     // want; the actual implementation of the iterator can if fact
00212     // restrict valid range of the subscript to an arbitrary range
00213     // (i.e., I[0] may produce OutOfBounds state).
00214     //
00215     // function f3(UT_TextIterator & I, UT_uint32 len)
00216     // {
00217     //    for(UT_uint32 i = 0; i < len; i++)
00218     //    {
00219     //       UT_UCS4Char c = text[i];
00220     //       // do something with c ...
00221     //    }
00222     // }
00223     //
00224     // Bottom Line: unless told otherwise, assume that
00225     // processing is to start from I.getPosition(), not 0.
00226     //
00227     virtual UT_UCS4Char   operator [](UT_uint32 pos) = 0;
00228 
00229 };
00230 
00231 
00232 #endif //UT_ITERATOR_H

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1