[Mew-dist 04011] Re: charset=shift_jis
SAKAI Kiyotaka
ksakai at example.com
1998年 2月 27日 (金) 14:47:50 JST
>> In article <19980227133203E.nagae at example.com>, nagae at example.com (Nagae Hidetake) writes:
> SHIFT_JIS は私も挑戦してみたのですが,文字単位で独立していないので,
> 美しい実装が思いつきません.うーむ.
euc-jp と shitft_jis に対応させました。
さっきのパッチの後に当てて下さい。
# sjis -> jis への変換ルーチンは、Japanese.pm にあったものを借りて使っ
# ています。
--
酒井 清隆 (E-mail: ksakai at example.com)
Index: EncDec.pm.in
===================================================================
RCS file: /home/cvsroot/im-86/IM.in/EncDec.pm.in,v
retrieving revision 1.2
diff -u -r1.2 EncDec.pm.in
--- EncDec.pm.in 1998/02/27 01:27:25 1.2
+++ EncDec.pm.in 1998/02/27 05:45:25
@@ -176,8 +176,12 @@
$ret = iso_8859_to_ctext($ret, $1);
} elsif ($cs =~ /cn-gb/i) {
$ret = cn_gb_to_ctext($ret);
+ } elsif ($cs =~ /euc-jp/i) {
+ $ret = euc_jp_to_ctext($ret);
} elsif ($cs =~ /euc-kr/i) {
$ret = euc_kr_to_ctext($ret);
+ } elsif ($cs =~ /shift_jis/i) {
+ $ret = shift_jis_to_ctext($ret);
}
return $ret;
}
@@ -185,7 +189,7 @@
sub iso_8859_to_ctext ($$) {
my ($str, $num) = @_;
my @index = ("A", "A", "B", "C", "D", "L", "G", "F", "H", "M");
- $str =~ s/([\x80-\xff]+)/"\e-$index[$num]$1\e-A"/ge;
+ $str =~ s/([\x80-\xff]+)/\e-$index[$num]$1\e-A/g;
return $str;
}
@@ -195,6 +199,16 @@
return $str;
}
+sub euc_jp_to_ctext ($) {
+ my $str = shift;
+ $str =~ s/((\x8f[\xa0-\xff][\xa0-\xff])+)/"\e\$(D"
+ . remove_msb(remove_ss($1, "\x8f")) . "\e-A"/ge;
+ $str =~ s/(([\xa0-\xff][\xa0-\xff])+)/"\e\$(B"
+ . remove_msb($1) . "\e(B"/ge;
+ $str =~ s/((\x8e[\x80-\xff])+)/"\e)I" . remove_ss($1, "\x8e") . "\e-A"/ge;
+ return $str;
+}
+
sub euc_kr_to_ctext ($) {
my $str = shift;
$str =~ s/([\x80-\xff]+)/"\e\$(C" . remove_msb($1) . "\e(B"/ge;
@@ -203,8 +217,45 @@
sub remove_msb ($) {
my $str = shift;
- $str =~ s/([\x80-\xff])/chr(ord($1)-0x80)/ge;
+ $str =~ tr/\x80-\xff/\x00-\x7f/;
return $str;
+}
+
+sub remove_ss ($$) {
+ my ($str, $si) = @_;
+ $str =~ s/$si//g;
+ return $str;
+}
+
+sub shift_jis_to_ctext ($) {
+ my $str = shift;
+ $str =~ s/(([\x81-\x9f\xe0-\xef].)+)/"\e\$(B" . s2j($1) . "\e(B"/ge;
+ $str =~ s/([\xa0-\xdf]+)/\e)I\1\e-A/g;
+ return $str;
+}
+
+sub s2j($) {
+ my $str = shift;
+ my ($c1, $c2);
+ my $ret = "";
+
+ while ($str) {
+ ($c1, $c2, $str) = unpack('CCa*', $str);
+ if (0xa1 <= $c1 && $c1 <= 0xdf) {
+ $c2 = $c1;
+ $c1 = 0x8e;
+ } elsif ($c2 >= 0x9f) {
+ $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60);
+ $c2 += 2;
+ } else {
+ $c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61);
+ $c2 += 0x60 + ($c2 < 0x7f);
+ }
+ $c1 &= 0x7f;
+ $c2 &= 0x7f;
+ $ret .= pack('CC', $c1, $c2);
+ }
+ return $ret;
}
##################################################
Mew-dist メーリングリストの案内