When using QUOTE_NONNUMERIC, we now test for "numericness" with

PyNumber_Check, rather than trying to convert to a float. Reimplemented writer - now raises exceptions when it sees a quotechar but neither doublequote or escapechar are set. Doublequote results are now more consistent (eg, single quote should generate """", rather than "", which is ambiguous).
2024-11-24 10:24:35 +08:00 · 2005-01-12 07:44:42 +00:00 · 2005-01-12 07:44:42 +00:00 · c89f284df8
commit c89f284df8
parent 31d8896ee2
3 changed files with 81 additions and 92 deletions
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@ -152,25 +152,35 @@ class Test_Csv(unittest.TestCase):
                         (bigstring, bigstring))

    def test_write_quoting(self):
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"')
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
        self.assertRaises(csv.Error,
                          self._write_test,
-                          ['a','1','p,q'], 'a,1,"p,q"',
+                          ['a',1,'p,q'], 'a,1,p,q',
                          quoting = csv.QUOTE_NONE)
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
                         quoting = csv.QUOTE_MINIMAL)
-        self._write_test(['a','1','p,q'], '"a",1,"p,q"',
+        self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
                         quoting = csv.QUOTE_NONNUMERIC)
-        self._write_test(['a','1','p,q'], '"a","1","p,q"',
+        self._write_test(['a',1,'p,q'], '"a","1","p,q"',
                         quoting = csv.QUOTE_ALL)

    def test_write_escape(self):
-        self._write_test(['a','1','p,q'], 'a,1,"p,q"',
+        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
                         escapechar='\\')
-# FAILED - needs to be fixed [am]:
-#        self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
-#                         escapechar='\\', doublequote = 0)
-        self._write_test(['a','1','p,q'], 'a,1,p\\,q',
+        self.assertRaises(csv.Error,
+                          self._write_test,
+                          ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+                          escapechar=None, doublequote=False) 
+        self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
+                         escapechar='\\', doublequote = False)
+        self._write_test(['"'], '""""', 
+                         escapechar='\\', quoting = csv.QUOTE_MINIMAL)
+        self._write_test(['"'], '\\"', 
+                         escapechar='\\', quoting = csv.QUOTE_MINIMAL,
+                         doublequote = False)
+        self._write_test(['"'], '\\"', 
+                         escapechar='\\', quoting = csv.QUOTE_NONE)
+        self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
                         escapechar='\\', quoting = csv.QUOTE_NONE)

    def test_writerows(self):
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -45,6 +45,9 @@ Library
  + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
    dictates.
  + the parser now removes the escapechar prefix from escaped characters.
+  + QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
+    to cast to float.
+  + writer doublequote handling improved.
  + Dialect classes passed to the module are no longer instantiated by
    the module before being parsed (the former validation scheme required
    this, but the mechanism was unreliable).
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@ -944,81 +944,65 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
 {
        DialectObj *dialect = self->dialect;
 	int i, rec_len;
+	char *lineterm;
+
+#define ADDCH(c) \
+	do {\
+		if (copy_phase) \
+			self->rec[rec_len] = c;\
+		rec_len++;\
+	} while(0)
+
+	lineterm = PyString_AsString(dialect->lineterminator);
+	if (lineterm == NULL)
+		return -1;

 	rec_len = self->rec_len;

-	/* If this is not the first field we need a field separator.
-	 */
-	if (self->num_fields > 0) {
-		if (copy_phase)
-			self->rec[rec_len] = dialect->delimiter;
-		rec_len++;
-	}
-	/* Handle preceding quote.
-	 */
-	switch (dialect->quoting) {
-	case QUOTE_ALL:
-		*quoted = 1;
-		if (copy_phase)
-			self->rec[rec_len] = dialect->quotechar;
-		rec_len++;
-		break;
-	case QUOTE_MINIMAL:
-	case QUOTE_NONNUMERIC:
-		/* We only know about quoted in the copy phase.
-		 */
-		if (copy_phase && *quoted) {
-			self->rec[rec_len] = dialect->quotechar;
-			rec_len++;
-		}
-		break;
-	case QUOTE_NONE:
-		break;
-	}
-	/* Copy/count field data.
-	 */
+	/* If this is not the first field we need a field separator */
+	if (self->num_fields > 0)
+		ADDCH(dialect->delimiter);
+
+	/* Handle preceding quote */
+	if (copy_phase && *quoted)
+		ADDCH(dialect->quotechar);
+
+	/* Copy/count field data */
 	for (i = 0;; i++) {
 		char c = field[i];
+		int want_escape = 0;

 		if (c == '\0')
 			break;
-		/* If in doublequote mode we escape quote chars with a
-		 * quote.
-		 */
-		if (dialect->quoting != QUOTE_NONE && 
-                    c == dialect->quotechar && dialect->doublequote) {
-			if (copy_phase)
-				self->rec[rec_len] = dialect->quotechar;
-			*quoted = 1;
-			rec_len++;
-		}

-		/* Some special characters need to be escaped.  If we have a
-		 * quote character switch to quoted field instead of escaping
-		 * individual characters.
-		 */
-		if (!*quoted
-		    && (c == dialect->delimiter || 
-                        c == dialect->escapechar || 
-                        c == '\n' || c == '\r')) {
-			if (dialect->quoting != QUOTE_NONE)
-				*quoted = 1;
-			else if (dialect->escapechar) {
-				if (copy_phase)
-					self->rec[rec_len] = dialect->escapechar;
-				rec_len++;
-			}
+		if (c == dialect->delimiter ||
+		    c == dialect->escapechar ||
+		    c == dialect->quotechar ||
+		    strchr(lineterm, c)) {
+			if (dialect->quoting == QUOTE_NONE)
+				want_escape = 1;
 			else {
-				PyErr_Format(error_obj, 
-                                             "delimiter must be quoted or escaped");
-				return -1;
+				if (c == dialect->quotechar) {
+					if (dialect->doublequote)
+						ADDCH(dialect->quotechar);
+					else
+						want_escape = 1;
+				}
+				if (!want_escape)
+					*quoted = 1;
+			}
+			if (want_escape) {
+				if (!dialect->escapechar) {
+					PyErr_Format(error_obj, 
+						     "need to escape, but no escapechar set");
+					return -1;
+				}
+				ADDCH(dialect->escapechar);
 			}
 		}
 		/* Copy field character into record buffer.
 		 */
-		if (copy_phase)
-			self->rec[rec_len] = c;
-		rec_len++;
+		ADDCH(c);
 	}

 	/* If field is empty check if it needs to be quoted.
@ -1033,20 +1017,14 @@ join_append_data(WriterObj *self, char *field, int quote_empty,
 			*quoted = 1;
 	}

-	/* Handle final quote character on field.
-	 */
 	if (*quoted) {
 		if (copy_phase)
-			self->rec[rec_len] = dialect->quotechar;
+			ADDCH(dialect->quotechar);
 		else
-			/* Didn't know about leading quote until we found it
-			 * necessary in field data - compensate for it now.
-			 */
-			rec_len++;
-		rec_len++;
+			rec_len += 2;
 	}
-
 	return rec_len;
+#undef ADDCH
 }

 static int
@ -1146,18 +1124,16 @@ csv_writerow(WriterObj *self, PyObject *seq)
 		if (field == NULL)
 			return NULL;

-		quoted = 0;
-		if (dialect->quoting == QUOTE_NONNUMERIC) {
-			PyObject *num;
-
-			num = PyNumber_Float(field);
-			if (num == NULL) {
-				quoted = 1;
-				PyErr_Clear();
-			}
-			else {
-				Py_DECREF(num);
-			}
+		switch (dialect->quoting) {
+		case QUOTE_NONNUMERIC:
+			quoted = !PyNumber_Check(field);
+			break;
+		case QUOTE_ALL:
+			quoted = 1;
+			break;
+		default:
+			quoted = 0;
+			break;
 		}

 		if (PyString_Check(field)) {