From: James French <frenchie@frenchie.id.au>
Date: Mon, 2 Aug 2010 16:18:48 +0000 (+0800)
Subject: A step closer to real unicode support
X-Git-Tag: 0.7.1~1
X-Git-Url: https://git.ucc.asn.au/?p=frenchie%2Ficalparse.git;a=commitdiff_plain;h=de767479276e2919b36d44af4e6c981ee81d56a5

A step closer to real unicode support

Doesn't glob multi-byte characters at line folds
---

diff --git a/icalparse.py b/icalparse.py
index edfca4c..3949c3d 100755
--- a/icalparse.py
+++ b/icalparse.py
@@ -53,25 +53,42 @@ def lineFolder(oldcal, length=75):
 	cal = []
 	sl = length - 1
 
-	for line in oldcal:
-		line = line.encode('utf-8')
+	for uline in oldcal:
+		line = uline.encode('utf-8')
+
 		# Line fits inside length, do nothing
-		if len(line.rstrip()) <= length:
+		if len(line) <= length:
 			cal.append(line)
+
 		else:
-			brokenline = [line[0:length]]
 			ll = length
-			while ll < len(line) + 1:
-				brokenline.append(line[ll:sl+ll])
-				ll += sl
-			brokenline = '\r\n '.join(brokenline)
-			cal.append(brokenline)
+			foldedline = []
+			while uline:
+				# This algorithm prevents slicing multi-byte chars in half
+
+				# Convert up to length octets to unicode, dropping any
+				# half characters
+				ufold = unicode(line[0:75], 'utf-8', 'ignore')
+				fold = ufold.encode('utf-8')
+
+				# Remove what we've converted from the line
+				uline = uline.replace(ufold,u'',1)
+				line = uline.encode('utf-8')
+
+				# And add the fold to the list
+				foldedline.append(fold)
+
+				# Subsequent lines are shorter as they include a space
+				ll = length - 1
+
+			# Finally, add the fold 'marker' to the line
+			cal.append('\r\n '.join(foldedline))
 
 	return cal
 
 
 def splitFields(cal):
-	'''Takes a list of lines in a calendar file and returns a list of tuples	
+	'''Takes a list of lines in a calendar file and returns a list of tuples
 	as (key, value) pairs'''
 
 	ical = [tuple(x.split(':',1)) for x in cal]