[Mew-dist 04011] Re: charset=shift_jis

SAKAI Kiyotaka ksakai at example.com
1998年 2月 27日 (金) 14:47:50 JST


>> In article <19980227133203E.nagae at example.com>, nagae at example.com (Nagae Hidetake) writes:

>   SHIFT_JIS は私も挑戦してみたのですが,文字単位で独立していないので,
> 美しい実装が思いつきません.うーむ.

euc-jp と shitft_jis に対応させました。
さっきのパッチの後に当てて下さい。

# sjis -> jis への変換ルーチンは、Japanese.pm にあったものを借りて使っ
# ています。
-- 
酒井 清隆 (E-mail: ksakai at example.com)


Index: EncDec.pm.in
===================================================================
RCS file: /home/cvsroot/im-86/IM.in/EncDec.pm.in,v
retrieving revision 1.2
diff -u -r1.2 EncDec.pm.in
--- EncDec.pm.in	1998/02/27 01:27:25	1.2
+++ EncDec.pm.in	1998/02/27 05:45:25
@@ -176,8 +176,12 @@
 	$ret = iso_8859_to_ctext($ret, $1);
     } elsif ($cs =~ /cn-gb/i) {
 	$ret = cn_gb_to_ctext($ret);
+    } elsif ($cs =~ /euc-jp/i) {
+	$ret = euc_jp_to_ctext($ret);
     } elsif ($cs =~ /euc-kr/i) {
 	$ret = euc_kr_to_ctext($ret);
+    } elsif ($cs =~ /shift_jis/i) {
+	$ret = shift_jis_to_ctext($ret);
     }
     return $ret;
 }
@@ -185,7 +189,7 @@
 sub iso_8859_to_ctext ($$) {
     my ($str, $num) = @_;
     my @index = ("A", "A", "B", "C", "D", "L", "G", "F", "H", "M");
-    $str =~ s/([\x80-\xff]+)/"\e-$index[$num]$1\e-A"/ge;
+    $str =~ s/([\x80-\xff]+)/\e-$index[$num]$1\e-A/g;
     return $str;
 }
 
@@ -195,6 +199,16 @@
     return $str;
 }
 
+sub euc_jp_to_ctext ($) {
+    my $str = shift;
+    $str =~ s/((\x8f[\xa0-\xff][\xa0-\xff])+)/"\e\$(D"
+	. remove_msb(remove_ss($1, "\x8f")) . "\e-A"/ge;
+    $str =~ s/(([\xa0-\xff][\xa0-\xff])+)/"\e\$(B"
+	. remove_msb($1) . "\e(B"/ge;
+    $str =~ s/((\x8e[\x80-\xff])+)/"\e)I" . remove_ss($1, "\x8e") . "\e-A"/ge;
+    return $str;
+}
+
 sub euc_kr_to_ctext ($) {
     my $str = shift;
     $str =~ s/([\x80-\xff]+)/"\e\$(C" . remove_msb($1) . "\e(B"/ge;
@@ -203,8 +217,45 @@
 
 sub remove_msb ($) {
     my $str = shift;
-    $str =~ s/([\x80-\xff])/chr(ord($1)-0x80)/ge;
+    $str =~ tr/\x80-\xff/\x00-\x7f/;
     return $str;
+}
+
+sub remove_ss ($$) {
+    my ($str, $si) = @_;
+    $str =~ s/$si//g;
+    return $str;
+}
+
+sub shift_jis_to_ctext ($) {
+    my $str = shift;
+    $str =~ s/(([\x81-\x9f\xe0-\xef].)+)/"\e\$(B" . s2j($1) . "\e(B"/ge;
+    $str =~ s/([\xa0-\xdf]+)/\e)I\1\e-A/g;
+    return $str;
+}
+
+sub s2j($) {
+    my $str = shift;
+    my ($c1, $c2);
+    my $ret = "";
+
+    while ($str) {
+	($c1, $c2, $str) = unpack('CCa*', $str);
+	if (0xa1 <= $c1 && $c1 <= 0xdf) {
+	    $c2 = $c1;
+	    $c1 = 0x8e;
+	} elsif ($c2 >= 0x9f) {
+	    $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60);
+	    $c2 += 2;
+	} else {
+	    $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61);
+	    $c2 += 0x60 + ($c2 < 0x7f);
+	}
+	$c1 &= 0x7f;
+	$c2 &= 0x7f;
+	$ret .= pack('CC', $c1, $c2);
+    }
+    return $ret;
 }
 
 ##################################################



Mew-dist メーリングリストの案内