• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ut_stringbuf.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 // ut_stringbuf.h
00004 //
00005 #ifndef UT_STRINGBUF_H
00006 #define UT_STRINGBUF_H
00007 
00008 //
00009 // Copyright (C) 2001 Mike Nordell <tamlin@algonet.se>
00010 // Copyright (c) 2007 Hubert Figuiere <hub@figuiere.net>
00011 //
00012 // This class is free software; you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License
00014 // as published by the Free Software Foundation; either version 2
00015 // of the License, or (at your option) any later version.
00016 //
00017 // This class is distributed in the hope that it will be useful,
00018 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU General Public License
00023 // along with this program; if not, write to the Free Software
00024 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
00025 // 02110-1301 USA.
00026 //
00027 
00028 #include <stdlib.h> // size_t
00029 
00030 #include <string>
00031 #include <algorithm>
00032 
00033 /* pre-emptive dismissal; ut_types.h is needed by just about everything,
00034  * so even if it's commented out in-file that's still a lot of work for
00035  * the preprocessor to do...
00036  */
00037 #ifndef UT_TYPES_H
00038 #include "ut_types.h"
00039 #endif
00040 #include "ut_assert.h"
00041 #include "ut_unicode.h"
00042 
00044 
00045 #define g_rGrowBy 1.5f
00046 
00047 
00048 template <typename char_type>
00049 class UT_StringImpl
00050 {
00051 public:
00052     UT_StringImpl();
00053     UT_StringImpl(const UT_StringImpl<char_type>& rhs);
00054     UT_StringImpl(const char_type* sz, size_t n);
00055     UT_StringImpl(const std::basic_string<char_type> &s);
00056     ~UT_StringImpl();
00057 
00058     void        operator=(const UT_StringImpl<char_type>& rhs);
00059 
00060     void        assign(const char_type* sz, size_t n);
00061     void        append(const char_type* sz, size_t n);
00062     void        append(const UT_StringImpl<char_type>& rhs);
00063 
00064     void        swap(UT_StringImpl<char_type>& rhs);
00065     void        clear();
00066     void        reserve(size_t n);
00067 
00068     bool                empty()     const { return m_psz == m_pEnd; }
00069     size_t              size()      const { return m_pEnd - m_psz; }
00070     size_t              capacity()  const { return m_size; }
00071     const char_type*    data()      const { return m_psz; }
00072     char_type*          data()            { return m_psz; }
00074     const char*         utf8_data();
00075 
00076 private:
00077     void    grow_nocopy(size_t n);
00078     void    grow_copy(size_t n);
00079     void    grow_common(size_t n, bool bCopy);
00080 
00081     static void copy(char_type* pDest, const char_type* pSrc, size_t n);
00082 
00083     char_type*  m_psz;
00084     char_type*  m_pEnd;
00085     size_t      m_size;
00086     char*       m_utf8string;
00087 };
00088 
00089 
00090 class UT_UTF8String;
00091 
00092 class ABI_EXPORT UT_UTF8Stringbuf
00093 {
00094 public:
00095     typedef UT_UCSChar   UCS2Char;
00096     typedef unsigned int UCS4Char;
00097 
00098     static UCS4Char charCode (const char * str);
00099 
00100     UT_UTF8Stringbuf ();
00101     UT_UTF8Stringbuf (const UT_UTF8Stringbuf & rhs);
00102     UT_UTF8Stringbuf (const char * sz, size_t n = 0 /* 0 == null-termination */);
00103 
00104     ~UT_UTF8Stringbuf ();
00105 
00106     void        operator=(const UT_UTF8Stringbuf & rhs);
00107 
00108     void        assign (const char * sz, size_t n = 0 /* 0 == null-termination */);
00109     void        append (const char * sz, size_t n = 0 /* 0 == null-termination */);
00110     void        append (const UT_UTF8Stringbuf & rhs);
00111 
00112     void        appendUCS2 (const UT_UCS2Char * sz, size_t n /* == 0 => null-termination */);
00113     void        appendUCS4 (const UT_UCS4Char * sz, size_t n /* == 0 => null-termination */);
00114 
00115     void        escape (const UT_UTF8String & str1,
00116                         const UT_UTF8String & str2);  // replaces <str1> with <str2> in the current string
00117     void        escapeXML ();  // escapes '<', '>', '"', & '&' in the current string
00118     void        decodeXML ();  // unescapes '<', '>', '"', & '&' in the current string
00119     void        escapeMIME (); // translates the current string to
00120                                // MIME "quoted-printable" format
00121     void        escapeURL ();  // makes string conform to RFC 1738
00122     void        decodeURL ();
00123 
00124     void        clear ();
00125     void        reserve(size_t n);
00126 
00127     bool        empty ()    const { return m_psz == m_pEnd; }
00128     size_t      byteLength ()   const { return m_pEnd - m_psz; }
00129     size_t      utf8Length ()   const { return m_strlen; }
00130     const char *    data ()     const { return m_psz; }
00131 
00132     class ABI_EXPORT UTF8Iterator
00133     {
00134     public:
00135         UTF8Iterator (const UT_UTF8Stringbuf * strbuf);
00136         ~UTF8Iterator ();
00137 
00138         void operator=(const char * position);
00139 
00140         UTF8Iterator & operator++() { advance (); return *this; } // prefix operators
00141         UTF8Iterator & operator--() { retreat (); return *this; }
00142 
00143         const char * current (); // return 0 if current position is invalid
00144         const char * start ();   // return 0 if no string exists
00145         const char * end ();     // return 0 if no string exists
00146         const char * advance (); // return 0 if unable to advance
00147         const char * retreat (); // return 0 if unable to retreat
00148 
00149     private:
00150         const UT_UTF8Stringbuf * m_strbuf;
00151 
00152         const char * m_utfbuf;
00153         const char * m_utfptr;
00154 
00155         bool sync ();
00156     };
00157 
00158 private:
00159     void    insert (char *& ptr, const char * str, size_t utf8length);
00160 
00161     char *  m_psz;
00162     char *  m_pEnd;
00163     size_t  m_strlen;
00164     size_t  m_buflen;
00165 
00166     bool    grow (size_t length);
00167 };
00168 
00169 
00170 
00172 //
00173 //  Generic string implementation
00174 //
00175 //  String is built of char_type units
00176 //  Encoding could be any single-byte or multi-byte encoding
00177 //
00179 
00180 template <typename char_type>
00181 UT_StringImpl<char_type>::UT_StringImpl()
00182     :   m_psz(0),
00183         m_pEnd(0),
00184         m_size(0),
00185         m_utf8string(0)
00186 {
00187 }
00188 
00189 template <typename char_type>
00190 UT_StringImpl<char_type>::UT_StringImpl(const UT_StringImpl<char_type>& rhs)
00191     :   m_psz(new char_type[rhs.capacity()]),
00192         m_pEnd(m_psz + rhs.size()),
00193         m_size(rhs.capacity()),
00194         m_utf8string(0)
00195 {
00196     copy(m_psz, rhs.m_psz, rhs.capacity());
00197 }
00198 
00199 template <typename char_type>
00200 UT_StringImpl<char_type>::UT_StringImpl(const char_type* sz, size_t n)
00201 :   m_psz(new char_type[n+1]),
00202     m_pEnd(m_psz + n),
00203     m_size(n+1),
00204     m_utf8string(0)
00205 {
00206     copy(m_psz, sz, n);
00207     m_psz[n] = 0;
00208 }
00209 
00210 template <typename char_type>
00211 UT_StringImpl<char_type>::UT_StringImpl(const std::basic_string<char_type> &s)
00212 :   m_psz(new char_type[s.size()+1]),
00213     m_pEnd(m_psz + s.size()),
00214     m_size(s.size()+1),
00215     m_utf8string(0)
00216 {
00217     // string is terminated here, so we know
00218     strcpy(m_psz, s.c_str());
00219 }
00220 
00221 
00222 template <typename char_type>
00223 UT_StringImpl<char_type>::~UT_StringImpl()
00224 {
00225     clear();
00226 }
00227 
00228 
00229 template <typename char_type>
00230 void UT_StringImpl<char_type>::operator=(const UT_StringImpl<char_type>& rhs)
00231 {
00232     if (this != &rhs)
00233     {
00234         clear();
00235         assign(rhs.m_psz, rhs.size());
00236     }
00237 }
00238 
00239 template <typename char_type>
00240 void UT_StringImpl<char_type>::assign(const char_type* sz, size_t n)
00241 {
00242     if (n)
00243     {
00244         if (n >= capacity())
00245         {
00246             grow_nocopy(n);
00247         }
00248         copy(m_psz, sz, n);
00249         m_psz[n] = 0;
00250         m_pEnd = m_psz + n;
00251         delete[] m_utf8string;
00252         m_utf8string = 0;
00253     } else {
00254         clear();
00255     }
00256 }
00257 
00258 template <typename char_type>
00259 void UT_StringImpl<char_type>::append(const char_type* sz, size_t n)
00260 {
00261     if (!n)
00262     {
00263         return;
00264     }
00265     if (!capacity())
00266     {
00267         assign(sz, n);
00268         return;
00269     }
00270     const size_t nLen = size();
00271     grow_copy(nLen + n);
00272     copy(m_psz + nLen, sz, n);
00273     m_psz[nLen + n] = 0;
00274     m_pEnd += n;
00275 }
00276 
00277 template <typename char_type>
00278 void UT_StringImpl<char_type>::append(const UT_StringImpl<char_type>& rhs)
00279 {
00280     append(rhs.m_psz, rhs.size());
00281 }
00282 
00283 template <typename char_type>
00284 void UT_StringImpl<char_type>::swap(UT_StringImpl<char_type>& rhs)
00285 {
00286     std::swap(m_psz , rhs.m_psz );
00287     std::swap(m_pEnd, rhs.m_pEnd);
00288     std::swap(m_size, rhs.m_size);
00289     std::swap(m_utf8string, rhs.m_utf8string);
00290 }
00291 
00292 template <typename char_type>
00293 void UT_StringImpl<char_type>::clear()
00294 {
00295     if (m_psz)
00296     {
00297         delete[] m_psz;
00298         m_psz = 0;
00299         m_pEnd = 0;
00300         m_size = 0;
00301     }
00302     if(m_utf8string) {
00303         delete[] m_utf8string;
00304         m_utf8string = 0;
00305     }
00306 }
00307 
00308 template <typename char_type>
00309 void UT_StringImpl<char_type>::reserve(size_t n)
00310 {
00311     grow_nocopy(n);
00312 }
00313 
00314 
00315 template <typename char_type>
00316 const char* UT_StringImpl<char_type>::utf8_data()
00317 {
00318     UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
00319     return "";
00320 }
00321 
00322 
00323 template <typename char_type>
00324 void UT_StringImpl<char_type>::grow_nocopy(size_t n)
00325 {
00326     grow_common(n, false);
00327 }
00328 
00329 template <typename char_type>
00330 void UT_StringImpl<char_type>::grow_copy(size_t n)
00331 {
00332     grow_common(n, true);
00333 }
00334 
00335 template <typename char_type>
00336 void UT_StringImpl<char_type>::grow_common(size_t n, bool bCopy)
00337 {
00338     ++n;    // allow for zero termination
00339     if (n > capacity())
00340     {
00341         const size_t nCurSize = size();
00342         n = std::max(n, static_cast<size_t>(nCurSize * g_rGrowBy));
00343         char_type* pNew = new char_type[n];
00344         if (bCopy && m_psz)
00345         {
00346             copy(pNew, m_psz, size() + 1);
00347         }
00348         delete[] m_psz;
00349         m_psz  = pNew;
00350         m_pEnd = m_psz + nCurSize;
00351         m_size = n;
00352         delete[] m_utf8string;
00353         m_utf8string = 0;
00354     }
00355 }
00356 
00357 template <typename char_type>
00358 void UT_StringImpl<char_type>::copy(char_type* pDest, const char_type* pSrc, size_t n)
00359 {
00360     if (pDest && pSrc && n)
00361         memcpy(pDest, pSrc, n * sizeof(char_type));
00362 }
00363 
00364 
00365 #endif  // UT_STRINGBUF_H

Generated on Sun Feb 14 2021 for AbiWord by  doxygen 1.7.1