Describe the behavior of the SQL_ASCII encoding more accurately.

tglsfdc · tglsfdc · commit a9980ec37b3c · 2005-10-13T21:43:43.000Z
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.73 2005/06/21 04:02:29 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.74 2005/10/13 21:43:43 tgl Exp $ -->
 
 <chapter id="charset">
  <title>Localization</>
@@ -517,8 +517,8 @@ initdb --locale=sv_SE
         </row>
         <row>
          <entry><literal>SQL_ASCII</literal></entry>
-         <entry><acronym>ASCII</acronym></entry>
-         <entry>English</entry>
+         <entry>unspecified (see text)</entry>
+         <entry><emphasis>any</></entry>
          <entry>1</entry>
          <entry></entry>
         </row>
@@ -533,7 +533,7 @@ initdb --locale=sv_SE
          <entry><literal>UTF8</literal></entry>
          <entry>Unicode, 8-bit</entry>
          <entry><emphasis>all</></entry>
-         <entry>1-3</entry>
+         <entry>1-4</entry>
          <entry><literal>Unicode</></entry>
         </row>
         <row>
@@ -595,6 +595,21 @@ initdb --locale=sv_SE
       JDBC driver does not support <literal>MULE_INTERNAL</>, <literal>LATIN6</>,
       <literal>LATIN8</>, and <literal>LATIN10</>.
      </para>
+
+     <para>
+      The <literal>SQL_ASCII</> setting behaves considerably differently
+      from the other settings.  When the server character set is
+      <literal>SQL_ASCII</>, the server interprets byte values 0-127
+      according to the ASCII standard, while byte values 128-255 are taken
+      as uninterpreted characters.  No encoding conversion will be done when
+      the setting is <literal>SQL_ASCII</>.  Thus, this setting is not so
+      much a declaration that a specific encoding is in use, as a declaration
+      of ignorance about the encoding.  In most cases, if you are
+      working with any non-ASCII data, it is unwise to use the
+      <literal>SQL_ASCII</> setting, because
+      <productname>PostgreSQL</productname> will be unable to help you by
+      converting or validating non-ASCII characters.
+     </para>
     </sect2>
     
    <sect2>
@@ -884,9 +899,7 @@ $ <userinput>psql -l</userinput>
         </row>
         <row>
          <entry><literal>SQL_ASCII</literal></entry>
-         <entry><emphasis>SQL_ASCII</emphasis>,
-          <literal>MULE_INTERNAL</literal>,
-          <literal>UTF8</literal>
+         <entry><emphasis>any (no conversion will be performed)</emphasis>
          </entry>
         </row>
         <row>
@@ -956,7 +969,7 @@ $ <userinput>psql -l</userinput>
      </table>
 
     <para>
-     To enable the automatic character set conversion, you have to
+     To enable automatic character set conversion, you have to
      tell <productname>PostgreSQL</productname> the character set
      (encoding) you would like to use in the client. There are several
      ways to accomplish this:
@@ -1070,6 +1083,13 @@ RESET client_encoding;
      hexadecimal byte values in parentheses, e.g.,
      <literal>(826C)</literal>.
     </para>
+
+    <para>
+     If the client character set is defined as <literal>SQL_ASCII</>,
+     encoding conversion is disabled, regardless of the server's character
+     set.  Just as for the server, use of <literal>SQL_ASCII</> is unwise
+     unless you are working with all-ASCII data.
+    </para>
    </sect2>
 
    <sect2>