diff -uNr abi-old/src/af/gr/xp/gr_Graphics.cpp abi-new/src/af/gr/xp/gr_Graphics.cpp
--- abi-old/src/af/gr/xp/gr_Graphics.cpp	2005-03-05 05:30:33.000000000 +0000
+++ abi-new/src/af/gr/xp/gr_Graphics.cpp	2005-03-09 09:48:56.000000000 +0000
@@ -1095,23 +1095,35 @@
 }
 
 /*!
-    return true if linebreak at character c is permissible
-    the built-in class is too simple to differentiate between breaks before and after character
+ *  return true if linebreak after/before character c is permissible.
 */
-bool GR_Graphics::canBreak(GR_RenderInfo & ri, UT_sint32 &iNext, bool /* bAfter */)
+bool GR_Graphics::canBreak(GR_RenderInfo & ri, UT_sint32 &iNext, bool bAfter)
 {
+	UT_return_val_if_fail(getApp(), false);
+	const class XAP_EncodingManager *EncMan=getApp()->getEncodingManager();
+	UT_UCS4Char c[3];
+
+	UT_return_val_if_fail(EncMan, false);
+	
 	iNext = -1; // we do not bother with this
 	UT_return_val_if_fail(ri.m_pText && ri.m_pText->getStatus() == UTIter_OK, false);
 	
 	*(ri.m_pText) += ri.m_iOffset;
 	UT_return_val_if_fail(ri.m_pText->getStatus() == UTIter_OK, false);
 	
-	UT_UCS4Char c = ri.m_pText->getChar();
+	/*
+	 * For CJK we need to consider the characters either side as well.
+	 */
+	c[1] = ri.m_pText->getChar();
+	--(*ri.m_pText);
+	c[0] = ri.m_pText->getChar();
+	++(*ri.m_pText); ++(*ri.m_pText);	
+	c[2] = ri.m_pText->getChar();
 
-	UT_return_val_if_fail(getApp(), false);
-	return getApp()->getEncodingManager()->can_break_at(c);
+	return EncMan->can_break_at(c, bAfter);
 }
 
+
 /*!
    resetJustification() makes the data represented by ri unjustified
    and returns value by which the total width changed as a result such
diff -uNr abi-old/src/af/xap/xp/xap_EncodingManager.cpp abi-new/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-old/src/af/xap/xp/xap_EncodingManager.cpp	2005-03-05 05:30:41.000000000 +0000
+++ abi-new/src/af/xap/xp/xap_EncodingManager.cpp	2005-03-09 09:49:09.369271800 +0000
@@ -536,6 +536,26 @@
 	return search_map_with_opt_suffix(m,fallback_key,fallback_key_final);
 }
 
+/*
+ * Given a array "list" of type UT_UCS4Char this function returns true or false
+ * depending on whether "c" is found in the list.
+ * 
+ * Note: For performance reasons this function assumes that the list entries
+ *       are in NUMERICAL ORDER starting with the smallest value.
+ *       For example, see cjkCantStartLine below.
+ */
+static bool isInList(UT_UCS4Char c, const UT_UCS4Char *list) 
+{
+	int i=0;
+
+	while ((c >= list[i]) && (list[i] != 0)) {
+		if (list[i]==c)
+			return true;
+		i++;
+	}
+	return false;
+}
+
 /* ************************* here begin tables *************************/
 
 /* this array describes mapping form current encoding to Tex's encoding name.
@@ -646,6 +666,7 @@
 };
 
 
+#if 0  // This function is made obsolete by rules in gr_Graphics::canBreak()
 /*
  TODO I'm pretty sure you can't break Korean at any character.
  TODO And what about Japanese Katakana and Hiragana?
@@ -656,6 +677,110 @@
 	{"1",cjk_languages},
 	{NULL}
 };
+#endif
+
+/*
+ *
+ * A list of characters that cannot appear at the beginning
+ * of a line. These must be in NUMERICAL ORDER.
+ *
+ * This is mainly for CJK languages in which we can break between 
+ * any character.
+ */
+static const UT_UCS4Char cjkCantStartLine[]=
+{
+	0x0021,	 // ASCII eclamation mark.
+	0x0029,  // ASCII right parenthesis
+	0x002e,  // ASCII full stop.
+	0x002c,  // ASCII colon.
+	0x003b,  // ASCII semicolon.
+	0x003e,  // ASCII greater-than sign.
+	0x003f,  // ASCII question mark.
+	0x2019,  // Right single quotation mark.  (fullwidth?)
+	0x201d,  // Right double quotation marks. (fullwidth?)
+	0x3001,  // Ideographic comma.
+	0x3002,  // Ideographic full stop.
+	0x300b,  // Right double angle bracket.
+	0xff01,  // Fullwidth exclamation mark.
+	0xff09,  // Fullwidth right parenthesis.
+	0xff0c,  // Fullwidth comma.
+	0xff1a,  // Fullwidth colon.
+	0xff1b,  // Fullwidth semicolon.
+	0xfe1e,  // Fullwidth greater-than sign.
+	0xff1f,  // Fullwidth question mark.
+	0xff3d,  // Fullwidth right square bracket.
+	0xff5d,  // Fullwidth right curly bracket.
+	0x0
+};
+
+/* These must also be in NUMERICAL ORDER. */
+static const UT_UCS4Char cjkCantEndLine[]=
+{
+	0x0028,  // ASCII left parenthesis.
+	0x003c,  // ASCII less-than sign.
+	0x201c,  // Left double quotation mark.
+	0x300a,  // Left double angle bracket.
+	0xff08,  // Fullwidth left parenthesis.
+	0xff1c,  // Fullwifth less-than sign.
+	0xff3b,  // Fullwidth left square bracket.
+	0xff5b,  // Fullwidth left curly bracket.
+	0x0
+};
+
+/*
+ * A list of characters that, when in pairs, must not be split.
+ * These must also be in NUMERICAL ORDER. 
+ */
+static const UT_UCS4Char cjkMustNotSplit[]=
+{
+	0x2014,  // EM Dash.
+	0x2036   // Horizontal ellipsis.
+};
+
+static UT_UCSChar canBreakAfter[] = 
+{
+	0x0020,   // UCS_SPACE
+	0x0021,   // !
+	0x0029,   // )
+	0x002c,   // ,
+	0x002d,   // UCS_MINUS
+	0x002e,   // .
+	0x003a,   // :
+	0x003b,   // ;
+	0x003e,   // >
+	0x003f,   // ?	
+	0x005d,   // ]
+	0x007d,   // }
+	0x2010,   // UCS_HYPHEN
+	0x2013,   // UCS_EN_DASH
+	0x2014,   // UCS_EM_DASH
+	0x3002,   // Ideographic full stop
+	0x300b,   // Right double angle bracket
+	0xff01,   // Fullwidth exclamation mark
+	0xff09,   // Fullwidth right parenthesis
+	0xff0c,   // Fullwidth comma
+	0xff0d,   // Fullwidth hypen-minus
+	0xff1a,   // Fullwidth colon
+	0xff1b,   // Fullwidth semi-colon
+	0xff1f,   // Fullwidth question mark
+	0xff3d,   // Fullwidth right square bracket
+	0xff5d,   // Fullwidth right curly bracket
+	0x0
+};
+
+static UT_UCSChar canBreakBefore[] = 
+{
+	0x0028,   // (
+	0x003c,   // <
+	0x005b,   // [
+	0x007b,   // {
+	0x300a,   // Left double angle bracket
+	0xff08,   // Fullwidth left parenthesis
+	0xff3b,   // Fullwidth left square bracket
+	0xff5b,   // Fullwidth left curly bracket
+	0x0
+};
+
 
 /*
  This table is useful since some iconv implementations don't know some cpNNNN 
@@ -961,6 +1086,39 @@
 
 /* ************************* here end tables *************************/
 
+/*
+ * Given a character returns a structure which specifies:
+ *
+ *	1, If this is a CJK character or not.
+ *  2, If it is forbidden from ending a line.
+ *  3, If it is forbidden from starting a line.
+ *  4. If pairs of this character must to kept together and not broken
+ *     accross lines.
+ */
+struct SCJKProp XAP_EncodingManager::charCJKProp(UT_UCS4Char c) const
+{
+	struct SCJKProp prop = {false, false, false, false};
+
+	// Does this fall into any of the ranges for CJK characters?
+	if ((c>=0x20000 && c<=0x2a6df) || (c>=0x2f800 && c<=0x2fa1f) ||
+	    (c>=0x3000  && c<=0x303f)  || (c>=0x3200  && c<=0x32ff)  ||
+	    (c>=0x3300  && c<=0x33ff)  || (c>=0x3400  && c<=0x4dbf)  ||
+	    (c>=0x4e00  && c<=0x9faf)  || (c>=0xf900  && c<=0xfaff)  || 
+	    (c>=0xfe30  && c<=0xfe4f))
+		prop.cjk = true;
+
+	if (isInList(c, cjkCantStartLine))
+		prop.cantStartLine = true;
+
+	if (isInList(c, cjkCantEndLine))
+		prop.cantEndLine = true;
+
+	if (isInList(c, cjkMustNotSplit))
+		prop.mustNotSplit = true;
+
+	return prop;
+}
+
 const XAP_LangInfo* XAP_EncodingManager::findLangInfo(const char* key,XAP_LangInfo::fieldidx idx)
 {
 	if (idx > XAP_LangInfo::max_idx)
@@ -1111,8 +1269,11 @@
 	{	
 	    const char* str = search_rmap_with_opt_suffix(langcode_to_cjk,SEARCH_PARAMS);
 	    is_cjk_ = *str == '1';
-	    str = search_rmap_with_opt_suffix(can_break_words_data,SEARCH_PARAMS);
-	    can_break_words_ = *str == '1';
+// 		This is made obsolete by new CJK line break handling in
+// 		gr_Graphics::canBreak.
+//
+//	    str = search_rmap_with_opt_suffix(can_break_words_data,SEARCH_PARAMS);
+//	    can_break_words_ = *str == '1';
 	}
 	{
 	    if (cjk_locale()) {
@@ -1160,10 +1321,13 @@
 
 int XAP_EncodingManager__swap_stou,XAP_EncodingManager__swap_utos;
 
+
+#if 0 // This is made obsolete by new CJK line breaking in gr_Graphics::canBreak
 bool XAP_EncodingManager::can_break_words() const
 {
     return can_break_words_;
 }
+#endif
 
 /*
     I'm not sure whether any non-cjk language doesn't make distinction
@@ -1172,38 +1336,117 @@
 */
 bool XAP_EncodingManager::single_case() const { return cjk_locale(); }
 
+#if 0 // This function is now performed by XAP_EncodingManager::char_cjk_prop
 bool XAP_EncodingManager::is_cjk_letter(UT_UCSChar c) const
 {
     if (!cjk_locale())
 	return 0;
     return (c>0xff);
 }
+#endif
 
 bool XAP_EncodingManager::noncjk_letters(const UT_UCSChar* str,int len) const
 {
     if (!cjk_locale())
 	return 1;
     for(int i=0;i<len;++i) {
-	if (is_cjk_letter(str[i]))
+//	if (is_cjk_letter(str[i]))
+	if (charCJKProp(str[i]).cjk)
 	    return 0;
     };
     return 1;
 }
 
+
 /*
-    This one correlates with can_break_words() very tightly.
-        Under CJK locales it returns 1 for cjk letters. 
-    Under non-CJK locales returns 0.
+ * It appears that without the benefit of spaces in CJK documents we have to be
+ * much more consistent about whether a character can be broken before or after.
+ * So using the above above lists, canBreakBefore and canBreakAfter, we deal
+ * with punctuation more accurately. Eg, you can break after a comma but not
+ * before.
 */
-bool XAP_EncodingManager::can_break_at(const UT_UCSChar c) const
+static bool westernCanBreakAt(const UT_UCSChar c, bool bAfter) 
 {
-    if (c == UCS_SPACE 
+	if (bAfter) 
+	{
+		if (isInList(c, canBreakAfter))
+			return true;
+	}
+	else
+	{
+		if (isInList(c, canBreakBefore))
+			return true;
+	}
+	return false;
+
+/*    if (c == UCS_SPACE 
 	 || c == UCS_MINUS 
 	 || c == UCS_HYPHEN
 	 || c == UCS_EN_DASH
 	 || c == UCS_EM_DASH)
 	return 1;
-    return is_cjk_letter(c);
+
+	return 0; */
+//    return is_cjk_letter(c); // CJK line breaking is now handled in 
+//                                GR_Graphics::canBreak().
+}
+ 
+/*
+ *  When editing Chinese documents certain characters cannot be allowed
+ *  to start a line (eg. ".,)>>}") and certain others cannot end a line
+ *  (eg. "(<<{"). Also, a few characters like "--" must occur in pairs
+ *  which cannot be split. This function returns true or false in such a
+ *  way as to ensure these rules. The characters affected are defined
+ *  in the table in xap_EncodingManager.cpp
+ *
+ *  c[] should be an array of three characters. c[1] is the character at
+ *  which we're being asked to break. c[0] is the char before and c[2] the
+ *  char after.
+ *
+ */
+bool XAP_EncodingManager::can_break_at(const UT_UCSChar c[], bool bAfter) const
+{
+	struct SCJKProp cjkProp[3]; int i;
+
+	// Get the CJK properties of the three chars.
+	for (i=0; i<3; i++) 
+		cjkProp[i] = charCJKProp(c[i]);
+	
+#ifdef UT_DEBUG
+	// This is just for debugging
+	const UT_uint32 iSize=256;
+	UT_uint32 iNumChars=0;
+	char debugbuf[iSize];
+	iNumChars +=snprintf(debugbuf, iSize, "Break %s ", bAfter?"after":"before");
+	for (i=0; i<3; i++)  {
+		iNumChars += snprintf(debugbuf+iNumChars, iSize-iNumChars, 
+		"c%d: %x(%s%s%s%s) ", i, c[i],
+		cjkProp[i].cjk?"C":".",
+		cjkProp[i].cantStartLine?"S":".",
+		cjkProp[i].cantEndLine?"E":".",
+		cjkProp[i].mustNotSplit?"N":".");
+	}
+	snprintf(debugbuf+iNumChars, iSize-iNumChars, "\n");
+    UT_DEBUGMSG(("%s",debugbuf));
+#endif
+	
+	if (bAfter) // We're being asked about breaking after.
+	{
+		if (cjkProp[1].mustNotSplit && c[1]==c[2])
+			return false;
+		if (cjkProp[1].cantEndLine || !cjkProp[1].cjk) 
+			return westernCanBreakAt(c[1], bAfter);
+		return !cjkProp[2].cantStartLine;
+	}
+	else        // We're being asked about breaking before.
+	{
+		if (cjkProp[1].mustNotSplit && c[1]==c[0])
+			return false;
+		if (cjkProp[1].cantStartLine || !cjkProp[1].cjk)
+				return westernCanBreakAt(c[1], bAfter);  
+		return !cjkProp[1].cantEndLine;
+	}
+	UT_ASSERT(UT_SHOULD_NOT_HAPPEN);
 }
 
 
@@ -1267,7 +1510,10 @@
 		"--->8--------------\n"
 		
 		"	WinLanguageCode is 0x%04x, WinCharsetCode is %d\n"
-		"	cjk_locale %d, can_break_words %d, swap_utos %d, swap_stou %d\n",
+		"	cjk_locale %d, "
+		// "can_break_words %d, " Made obsolete by new CJK line breaking in
+		//                        gr_Graphics:canBreak
+		"swap_utos %d, swap_stou %d\n",
 		getLanguageISOName(), getLanguageISOTerritory() ? getLanguageISOTerritory() : "NULL",
 		getNativeEncodingName(),getNativeSystemEncodingName(),
 		getNative8BitEncodingName(),getNativeNonUnicodeEncodingName(),
@@ -1275,7 +1521,7 @@
 		fallbackChar(1072),
 		getTexPrologue(),
 		getWinLanguageCode(), getWinCharsetCode(),
-		int(cjk_locale()), int(can_break_words()),int(swap_utos),int(swap_stou)
+		int(cjk_locale()), /*int(can_break_words()),*/ int(swap_utos),int(swap_stou)
 		));
 	UT_ASSERT( UT_iconv_isValid(iconv_handle_N2U) && UT_iconv_isValid(iconv_handle_U2N) );
 }
diff -uNr abi-old/src/af/xap/xp/xap_EncodingManager.h abi-new/src/af/xap/xp/xap_EncodingManager.h
--- abi-old/src/af/xap/xp/xap_EncodingManager.h	2005-03-05 05:30:41.000000000 +0000
+++ abi-new/src/af/xap/xp/xap_EncodingManager.h	2005-03-09 09:49:09.385269368 +0000
@@ -38,6 +38,17 @@
 #include "ut_iconv.h"
 #include "ut_xml.h"
 
+/*
+ * CJK character properties struct.
+ */
+struct SCJKProp {
+	bool cjk;
+	bool cantEndLine;
+	bool cantStartLine;
+	bool mustNotSplit;
+};
+
+
 struct ABI_EXPORT XAP_LangInfo
 {
 	/*no memeber can have NULL value. If string is empty, then value is
@@ -169,7 +180,7 @@
 	/*  whether words can be broken at any character of the word (wide 
 	    character, not byte). True for japanese.
 	*/
-	virtual bool can_break_words() const;
+//	virtual bool can_break_words() const;
 
 	/*
 	    returns true if there is no distinction between upper and lower
@@ -189,12 +200,18 @@
 		Under CJK locales it returns 1 for cjk letters. 
 	    Under non-CJK locales returns 0.
 	*/
-	virtual bool can_break_at(const UT_UCSChar c) const;
+	virtual bool can_break_at(const UT_UCSChar c[], bool bAfter) const;
 
 	/*
 	    This should be as precise as possible.
 	*/
-	virtual bool is_cjk_letter(UT_UCSChar c) const;
+//	virtual bool is_cjk_letter(UT_UCSChar c) const;
+
+	/*
+	 *   Returns a value that indicates if c is a CJK character and, if so,
+	 *   what it's line breaking properties are.
+	 */
+	virtual struct SCJKProp charCJKProp(UT_UCS4Char c) const;
 	
 	/*
 	    This is rather smart wrapper for wvLIDToCodePageConverter.
diff -uNr abi-old/src/text/fmt/xp/fp_TextRun.cpp abi-new/src/text/fmt/xp/fp_TextRun.cpp
--- abi-old/src/text/fmt/xp/fp_TextRun.cpp	2005-03-05 05:30:49.000000000 +0000
+++ abi-new/src/text/fmt/xp/fp_TextRun.cpp	2005-03-09 09:49:16.001263584 +0000
@@ -355,7 +355,7 @@
 		PD_StruxIterator text(getBlock()->getStruxDocHandle(),
 							  getBlockOffset() + fl_BLOCK_STRUX_OFFSET);
 		UT_return_val_if_fail(text.getStatus() == UTIter_OK, false);
-		text.setUpperLimit(text.getPosition() + getLength() - 1);
+		//text.setUpperLimit(text.getPosition() + getLength() - 1);
 		
 		UT_return_val_if_fail(m_pRenderInfo, false);
 		m_pRenderInfo->m_pText = &text;
@@ -389,7 +389,7 @@
 							  getBlockOffset() + fl_BLOCK_STRUX_OFFSET );
 
 		UT_return_val_if_fail(text.getStatus() == UTIter_OK, false);
-		text.setUpperLimit(text.getPosition() + getLength() - 1);
+//		text.setUpperLimit(text.getPosition() + getLength() - 1);
 		
 		UT_return_val_if_fail(m_pRenderInfo, false);
 		m_pRenderInfo->m_pText = &text;
@@ -532,7 +532,7 @@
 						  offset + fl_BLOCK_STRUX_OFFSET);
 
 	m_pRenderInfo->m_pText = &text;
-	text.setUpperLimit(text.getPosition() + getLength() - 1);
+//	text.setUpperLimit(text.getPosition() + getLength() - 1);
 	UT_uint32 iPosStart = text.getPosition();
 	
 	//bool bReverse = (getVisDirection() == UT_BIDI_RTL);
@@ -564,8 +564,8 @@
 			text.setPosition(iPos);
 		}
 		
-		if (bForce || iNext == (UT_sint32)i || bCanBreak)
-		   //	&& ((i + offset) != (getBlockOffset() + getLength() - 1))
+		if (bForce || iNext == (UT_sint32)i || bCanBreak
+		   	&& ((i + offset) != (getBlockOffset() + getLength() - 1)))// KAY: Enabled
 		{
 			UT_sint32 ispace = 0;
 			if(c == UCS_SPACE)
diff -uNr abi-old/src/text/fmt/xp/fv_View.cpp abi-new/src/text/fmt/xp/fv_View.cpp
--- abi-old/src/text/fmt/xp/fv_View.cpp	2005-03-05 05:30:49.000000000 +0000
+++ abi-new/src/text/fmt/xp/fv_View.cpp	2005-03-09 09:49:16.205232576 +0000
@@ -9856,8 +9856,10 @@
 			// CJK-FIXME: this can work incorrectly under CJK locales
 			// since it can give 'true' for UCS with value >0xff (like
 			// quotes, etc).
+			// New function might be better. - R.Kay
 			if (newWord ||
-				XAP_EncodingManager::get_instance()->is_cjk_letter(pSpan[i]))
+//				XAP_EncodingManager::get_instance()->is_cjk_letter(pSpan[i]))
+				(XAP_EncodingManager::get_instance()->charCJKProp(pSpan[i]).cjk))
                         {
 
                                 wCount.word++;