• Main Page
  • Related Pages
  • Namespaces
  • Classes
  • Files
  • File List
  • File Members

ut_stringbuf.h

Go to the documentation of this file.
00001 /* -*- mode: C++; tab-width: 4; c-basic-offset: 4; -*- */
00002 
00003 // ut_stringbuf.h
00004 //
00005 #ifndef UT_STRINGBUF_H
00006 #define UT_STRINGBUF_H
00007 
00008 //
00009 // Copyright (C) 2001 Mike Nordell <tamlin@algonet.se>
00010 // Copyright (c) 2007 Hubert Figuiere <hub@figuiere.net>
00011 //
00012 // This class is free software; you can redistribute it and/or
00013 // modify it under the terms of the GNU General Public License
00014 // as published by the Free Software Foundation; either version 2
00015 // of the License, or (at your option) any later version.
00016 //
00017 // This class is distributed in the hope that it will be useful,
00018 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 // GNU General Public License for more details.
00021 //
00022 // You should have received a copy of the GNU General Public License
00023 // along with this program; if not, write to the Free Software
00024 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
00025 // 02111-1307, USA.
00026 //
00027 
00028 #include <stdlib.h> // size_t
00029 
00030 #include <string>
00031 #include <algorithm>
00032 
00033 /* pre-emptive dismissal; ut_types.h is needed by just about everything,
00034  * so even if it's commented out in-file that's still a lot of work for
00035  * the preprocessor to do...
00036  */
00037 #ifndef UT_TYPES_H
00038 #include "ut_types.h"
00039 #endif
00040 #include "ut_assert.h"
00041 #include "ut_unicode.h"
00042 
00044 
00045 #define g_rGrowBy 1.5f
00046 
00047 
00048 template <typename char_type>
00049 class UT_StringImpl
00050 {
00051 public:
00052     UT_StringImpl();
00053     UT_StringImpl(const UT_StringImpl<char_type>& rhs);
00054     UT_StringImpl(const char_type* sz, size_t n);
00055     UT_StringImpl(const std::basic_string<char_type> &s);
00056     ~UT_StringImpl();
00057 
00058     void        operator=(const UT_StringImpl<char_type>& rhs);
00059 
00060     void        assign(const char_type* sz, size_t n);
00061     void        append(const char_type* sz, size_t n);
00062     void        append(const UT_StringImpl<char_type>& rhs);
00063 
00064     void        swap(UT_StringImpl<char_type>& rhs);
00065     void        clear();
00066     void        reserve(size_t n);
00067 
00068     bool                empty()     const { return m_psz == m_pEnd; }
00069     size_t              size()      const { return m_pEnd - m_psz; }
00070     size_t              capacity()  const { return m_size; }
00071     const char_type*    data()      const { return m_psz; }
00072     char_type*          data()            { return m_psz; }
00074     const char*         utf8_data();
00075 
00076 private:
00077     void    grow_nocopy(size_t n);
00078     void    grow_copy(size_t n);
00079     void    grow_common(size_t n, bool bCopy);
00080 
00081     static void copy(char_type* pDest, const char_type* pSrc, size_t n);
00082 
00083     char_type*  m_psz;
00084     char_type*  m_pEnd;
00085     size_t      m_size;
00086     char*       m_utf8string;
00087 };
00088 
00089 
00090 class UT_UTF8String;
00091 
00092 class ABI_EXPORT UT_UTF8Stringbuf
00093 {
00094 public:
00095     typedef UT_UCSChar   UCS2Char;
00096     typedef unsigned int UCS4Char;
00097 
00098     static UCS4Char charCode (const char * str);
00099 
00100     UT_UTF8Stringbuf ();
00101     UT_UTF8Stringbuf (const UT_UTF8Stringbuf & rhs);
00102     UT_UTF8Stringbuf (const char * sz, size_t n = 0 /* 0 == null-termination */);
00103 
00104     ~UT_UTF8Stringbuf ();
00105 
00106     void        operator=(const UT_UTF8Stringbuf & rhs);
00107 
00108     void        assign (const char * sz, size_t n = 0 /* 0 == null-termination */);
00109     void        append (const char * sz, size_t n = 0 /* 0 == null-termination */);
00110     void        append (const UT_UTF8Stringbuf & rhs);
00111 
00112     void        appendUCS2 (const UT_UCS2Char * sz, size_t n /* == 0 => null-termination */);
00113     void        appendUCS4 (const UT_UCS4Char * sz, size_t n /* == 0 => null-termination */);
00114 
00115     void        escape (const UT_UTF8String & str1,
00116                         const UT_UTF8String & str2);  // replaces <str1> with <str2> in the current string
00117     void        escapeXML ();  // escapes '<', '>', '"', & '&' in the current string
00118     void        decodeXML ();  // unescapes '<', '>', '"', & '&' in the current string
00119     void        escapeMIME (); // translates the current string to
00120                                // MIME "quoted-printable" format
00121     void        escapeURL ();  // makes string conform to RFC 1738
00122     void        decodeURL ();
00123 
00124     UT_UTF8Stringbuf * lowerCase ();
00125 
00126     void        clear ();
00127     void        reserve(size_t n);
00128 
00129     bool        empty ()    const { return m_psz == m_pEnd; }
00130     size_t      byteLength ()   const { return m_pEnd - m_psz; }
00131     size_t      utf8Length ()   const { return m_strlen; }
00132     const char *    data ()     const { return m_psz; }
00133 
00134     class ABI_EXPORT UTF8Iterator
00135     {
00136     public:
00137         UTF8Iterator (const UT_UTF8Stringbuf * strbuf);
00138         ~UTF8Iterator ();
00139 
00140         void operator=(const char * position);
00141 
00142         UTF8Iterator & operator++() { advance (); return *this; } // prefix operators
00143         UTF8Iterator & operator--() { retreat (); return *this; }
00144 
00145         const char * current (); // return 0 if current position is invalid
00146         const char * start ();   // return 0 if no string exists
00147         const char * end ();     // return 0 if no string exists
00148         const char * advance (); // return 0 if unable to advance
00149         const char * retreat (); // return 0 if unable to retreat
00150 
00151     private:
00152         const UT_UTF8Stringbuf * m_strbuf;
00153 
00154         const char * m_utfbuf;
00155         const char * m_utfptr;
00156 
00157         bool sync ();
00158     };
00159 
00160 private:
00161     void    insert (char *& ptr, const char * str, size_t utf8length);
00162 
00163     char *  m_psz;
00164     char *  m_pEnd;
00165     size_t  m_strlen;
00166     size_t  m_buflen;
00167 
00168     bool    grow (size_t length);
00169 };
00170 
00171 
00172 
00174 //
00175 //  Generic string implementation
00176 //
00177 //  String is built of char_type units
00178 //  Encoding could be any single-byte or multi-byte encoding
00179 //
00181 
00182 template <typename char_type>
00183 UT_StringImpl<char_type>::UT_StringImpl()
00184     :   m_psz(0),
00185         m_pEnd(0),
00186         m_size(0),
00187         m_utf8string(0)
00188 {
00189 }
00190 
00191 template <typename char_type>
00192 UT_StringImpl<char_type>::UT_StringImpl(const UT_StringImpl<char_type>& rhs)
00193     :   m_psz(new char_type[rhs.capacity()]),
00194         m_pEnd(m_psz + rhs.size()),
00195         m_size(rhs.capacity()),
00196         m_utf8string(0)
00197 {
00198     copy(m_psz, rhs.m_psz, rhs.capacity());
00199 }
00200 
00201 template <typename char_type>
00202 UT_StringImpl<char_type>::UT_StringImpl(const char_type* sz, size_t n)
00203 :   m_psz(new char_type[n+1]),
00204     m_pEnd(m_psz + n),
00205     m_size(n+1),
00206     m_utf8string(0)
00207 {
00208     copy(m_psz, sz, n);
00209     m_psz[n] = 0;
00210 }
00211 
00212 template <typename char_type>
00213 UT_StringImpl<char_type>::UT_StringImpl(const std::basic_string<char_type> &s)
00214 :   m_psz(new char_type[s.size()+1]),
00215     m_pEnd(m_psz + s.size()),
00216     m_size(s.size()+1),
00217     m_utf8string(0)
00218 {
00219     // string is terminated here, so we know
00220     strcpy(m_psz, s.c_str());
00221 }
00222 
00223 
00224 template <typename char_type>
00225 UT_StringImpl<char_type>::~UT_StringImpl()
00226 {
00227     clear();
00228 }
00229 
00230 
00231 template <typename char_type>
00232 void UT_StringImpl<char_type>::operator=(const UT_StringImpl<char_type>& rhs)
00233 {
00234     if (this != &rhs)
00235     {
00236         clear();
00237         assign(rhs.m_psz, rhs.size());
00238     }
00239 }
00240 
00241 template <typename char_type>
00242 void UT_StringImpl<char_type>::assign(const char_type* sz, size_t n)
00243 {
00244     if (n)
00245     {
00246         if (n >= capacity())
00247         {
00248             grow_nocopy(n);
00249         }
00250         copy(m_psz, sz, n);
00251         m_psz[n] = 0;
00252         m_pEnd = m_psz + n;
00253         delete[] m_utf8string;
00254         m_utf8string = 0;
00255     } else {
00256         clear();
00257     }
00258 }
00259 
00260 template <typename char_type>
00261 void UT_StringImpl<char_type>::append(const char_type* sz, size_t n)
00262 {
00263     if (!n)
00264     {
00265         return;
00266     }
00267     if (!capacity())
00268     {
00269         assign(sz, n);
00270         return;
00271     }
00272     const size_t nLen = size();
00273     grow_copy(nLen + n);
00274     copy(m_psz + nLen, sz, n);
00275     m_psz[nLen + n] = 0;
00276     m_pEnd += n;
00277 }
00278 
00279 template <typename char_type>
00280 void UT_StringImpl<char_type>::append(const UT_StringImpl<char_type>& rhs)
00281 {
00282     append(rhs.m_psz, rhs.size());
00283 }
00284 
00285 template <typename char_type>
00286 void UT_StringImpl<char_type>::swap(UT_StringImpl<char_type>& rhs)
00287 {
00288     std::swap(m_psz , rhs.m_psz );
00289     std::swap(m_pEnd, rhs.m_pEnd);
00290     std::swap(m_size, rhs.m_size);
00291     std::swap(m_utf8string, rhs.m_utf8string);
00292 }
00293 
00294 template <typename char_type>
00295 void UT_StringImpl<char_type>::clear()
00296 {
00297     if (m_psz)
00298     {
00299         delete[] m_psz;
00300         m_psz = 0;
00301         m_pEnd = 0;
00302         m_size = 0;
00303     }
00304     if(m_utf8string) {
00305         delete[] m_utf8string;
00306         m_utf8string = 0;
00307     }
00308 }
00309 
00310 template <typename char_type>
00311 void UT_StringImpl<char_type>::reserve(size_t n)
00312 {
00313     grow_nocopy(n);
00314 }
00315 
00316 
00317 template <typename char_type>
00318 const char* UT_StringImpl<char_type>::utf8_data()
00319 {
00320     UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
00321     return "";
00322 }
00323 
00324 
00325 template <typename char_type>
00326 void UT_StringImpl<char_type>::grow_nocopy(size_t n)
00327 {
00328     grow_common(n, false);
00329 }
00330 
00331 template <typename char_type>
00332 void UT_StringImpl<char_type>::grow_copy(size_t n)
00333 {
00334     grow_common(n, true);
00335 }
00336 
00337 template <typename char_type>
00338 void UT_StringImpl<char_type>::grow_common(size_t n, bool bCopy)
00339 {
00340     ++n;    // allow for zero termination
00341     if (n > capacity())
00342     {
00343         const size_t nCurSize = size();
00344         n = std::max(n, static_cast<size_t>(nCurSize * g_rGrowBy));
00345         char_type* pNew = new char_type[n];
00346         if (bCopy && m_psz)
00347         {
00348             copy(pNew, m_psz, size() + 1);
00349         }
00350         delete[] m_psz;
00351         m_psz  = pNew;
00352         m_pEnd = m_psz + nCurSize;
00353         m_size = n;
00354         delete[] m_utf8string;
00355         m_utf8string = 0;
00356     }
00357 }
00358 
00359 template <typename char_type>
00360 void UT_StringImpl<char_type>::copy(char_type* pDest, const char_type* pSrc, size_t n)
00361 {
00362     if (pDest && pSrc && n)
00363         memcpy(pDest, pSrc, n * sizeof(char_type));
00364 }
00365 
00366 
00367 #endif  // UT_STRINGBUF_H

Generated on Mon May 28 2012 for AbiWord by  doxygen 1.7.1