00001 /* AbiWord 00002 * Copyright (C) 2003 Tomas Frydrych <tomas@frydrych.uklinux.net> 00003 * 00004 * This program is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU General Public License 00006 * as published by the Free Software Foundation; either version 2 00007 * of the License, or (at your option) any later version. 00008 * 00009 * This program is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 * GNU General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU General Public License 00015 * along with this program; if not, write to the Free Software 00016 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 00017 * 02110-1301 USA. 00018 */ 00019 00020 #ifndef UT_ITERATOR_H 00021 #define UT_ITERATOR_H 00022 00024 // 00025 // UT_TextIterator class is an abstraction of a text iterator, making 00026 // it possible to iterate sequentially over textual data without 00027 // having to know anything about how that data might be stored. 00028 // 00029 // This class is pure virtual, its sole purpose is to define generic 00030 // interface so that we can pass a generic type into and out of 00031 // functions. For example of implementation see pd_Iterator.h/cpp 00032 // 00033 // Notes on imlementation 00034 // ---------------------- 00035 // Any derrived classes should implement the individual functions to 00036 // conform to the behaviour outlined in the comments in the class 00037 // definion below. 00038 // 00039 // In addtion, the actual iterator implementations should provide a 00040 // mechanism allowing to restrict upper and lower bounds (either at 00041 // construction or subsequently), so that when passing iterators into 00042 // functions it is not necessary to pass with them a length 00043 // parameter. For example, PD_StruxIterator can provide access to the 00044 // entire document from the start of the strux onwards; we might want 00045 // to restrict this to the part that only belongs to a particular 00046 // TextRun, etc. 00047 // 00048 // Notes on use 00049 // ------------ 00050 // When passing iterators into functions, the iterator should be set 00051 // at the position where processing is to start, i.e., the user is 00052 // not expected to reposition the iterator before commencing 00053 // processing. Also, the upper boundary should be restricted 00054 // appropriately to indicate where the processing is to stop; this is 00055 // preferable to passing an extra length parameter. 00056 // 00057 // Tomas, November, 2003 00058 // 00059 00061 // the follwoing are values that the getChar() function can fall back 00062 // on when things are not entirely right ... 00063 // 00064 // UT_IT_NOT_CHARACTER: when at the current position we have 00065 // something else than text (image, etc) 00066 // 00067 // UT_IT_ERROR: when things are really not going as they should 00068 // NB: this is just to have something to fall back on, 00069 // not an error reporting mechanism; for that see 00070 // getStatus() below 00071 #define UT_IT_NOT_CHARACTER UCS_SPACE 00072 #define UT_IT_ERROR 0xffffffff 00073 00074 #include "ut_types.h" 00075 00076 class PD_Document; 00077 class pt_PieceTable; 00078 class pf_Frag; 00079 00081 // 00082 // The following enum defines possible iterator states: 00083 // 00084 // OK: need I say more? 00085 // 00086 // OutOfBounds: last positioning operation took the iterator 00087 // out of bounds; this error state is recoverable 00088 // by using the indexing operator [], or calling 00089 // setPosition() but the use of relative increment 00090 // operators (++, --, +=, -=) in this state will 00091 // lead to undefined results. 00092 // 00093 // Error: any other error; this state is irrecoverable, clean up 00094 // and go home 00095 // 00096 enum UTIterStatus 00097 { 00098 UTIter_OK, 00099 UTIter_OutOfBounds, 00100 UTIter_Error 00101 }; 00102 00103 00104 class ABI_EXPORT UT_TextIterator 00105 { 00106 public: 00107 virtual ~UT_TextIterator() {} 00108 00110 // data accessor; retrieves character at present position 00111 // 00112 // NB: I.getChar() is functionally equivalent to I[getPosition()] 00113 // 00114 virtual UT_UCS4Char getChar() = 0; 00115 00117 // positon accessor; returns a value representing current postion 00118 // 00119 // NB: The position can be expressed in an arbitrary coordinate 00120 // system, typically one that makes sense to the actual 00121 // implementation; when an iterator is passed into a function, the 00122 // starting position might not be 0. 00123 // 00124 virtual UT_uint32 getPosition() const = 0; 00125 00127 // moves iterator to position pos 00128 // 00129 virtual void setPosition(UT_uint32 pos) = 0; 00130 00132 // set and retrieve upper bounds 00133 // 00134 virtual void setUpperLimit(UT_uint32 maxpos) = 0; 00135 virtual UT_uint32 getUpperLimit() const = 0; 00136 00138 // returns the current state of the iterator (see definition of 00139 // UTIterStatus above) 00140 // 00141 virtual UTIterStatus getStatus() const = 0; 00142 00144 // finds first occurence of given string, looking in direction 00145 // indicated by bForward 00146 // failure is indicated through getStatus() == UTIter_OutOfBounds; 00147 // 00148 virtual UT_uint32 find(UT_UCS4Char * what, UT_uint32 iLen, bool bForward = true) = 0; 00149 virtual UT_uint32 find(UT_TextIterator & text, UT_uint32 iLen, bool bForward = true) = 0; 00150 00152 // makes a copy of the iterator in its present state 00153 // 00154 virtual UT_TextIterator * makeCopy() const = 0; 00155 00157 // increment operators 00158 // 00159 // NB: We intentionally define prefix operators only, as post-fix 00160 // versions provide no real advantage, and are less efficient 00161 // 00162 virtual UT_TextIterator & operator ++ () = 0; 00163 virtual UT_TextIterator & operator -- () = 0; 00164 virtual UT_TextIterator & operator += (UT_sint32 i) = 0; 00165 virtual UT_TextIterator & operator -= (UT_sint32 i) = 0; 00166 00168 // subscript operator []; repostions iterator and returns 00169 // character at new postion 00170 // 00171 // NB(1): the operator physically advances the iterator to positon 00172 // pos before returning, i.e., 00173 // 00174 // UT_UCS4Char c = I[p]; 00175 // 00176 // and 00177 // 00178 // I.setPosition(p); 00179 // UT_UCS4Char c = I.getChar(); 00180 // 00181 // are exactly equivalent, leaving the iterator in the same state 00182 // 00183 // NB(2): if passed iterator as an argumenent in a function, you 00184 // need to know the initial position to use this operator for 00185 // processing which is relative to the state of iterator when 00186 // passed to you, i.e., f1() and f2() below do exactly the same 00187 // thing, f3() does not. 00188 // 00189 // function f1(UT_TextIterator & I, UT_uint32 len) 00190 // { 00191 // UT_uint32 pos = I.getPosition(); 00192 // 00193 // for(UT_uint32 i = pos; i < len + pos; i++) 00194 // { 00195 // UT_UCS4Char c = text[i]; 00196 // // do something with c ... 00197 // } 00198 // } 00199 // 00200 // function f2(UT_TextIterator & I, UT_uint32 len) 00201 // { 00202 // for(UT_uint32 i = 0; i < len; ++i, ++I) 00203 // { 00204 // UT_UCS4Char c = text.getChar(); 00205 // // do something with c ... 00206 // } 00207 // } 00208 // 00209 // In contrast, f3() will start at the leftmost edge of the 00210 // theoretical iterator range, which is probably not what you 00211 // want; the actual implementation of the iterator can if fact 00212 // restrict valid range of the subscript to an arbitrary range 00213 // (i.e., I[0] may produce OutOfBounds state). 00214 // 00215 // function f3(UT_TextIterator & I, UT_uint32 len) 00216 // { 00217 // for(UT_uint32 i = 0; i < len; i++) 00218 // { 00219 // UT_UCS4Char c = text[i]; 00220 // // do something with c ... 00221 // } 00222 // } 00223 // 00224 // Bottom Line: unless told otherwise, assume that 00225 // processing is to start from I.getPosition(), not 0. 00226 // 00227 virtual UT_UCS4Char operator [](UT_uint32 pos) = 0; 00228 00229 }; 00230 00231 00232 #endif //UT_ITERATOR_H