[Mew-dist 10313] Re: Japanese nickname in Addrbook
Kazumasa Utashiro
utashiro at example.com
1999年 8月 21日 (土) 01:21:35 JST
From: Kazu Yamamoto (山本和彦) <kazu at example.com>
Subject: [Mew-dist 10285] Re: Japanese nickname in Addrbook
Date: Fri, 20 Aug 1999 17:34:39 +0900
> > IM の初期の頃の議論で $&, $`, $' をプログラム中で 1回でも使用すると、
> > パターンマッチごとに $&, $`, $' に対応する文字列をコピーして持つように
> > なるためパフォーマンスが悪くなるという話だったので、$&, $`, $' は敢え
> > て使わないようになっていたのですが、実際のところ、どうなんでしょうね。
>
> Perl ハッカーで「正規表現」監訳者である歌代さん(上司ともいう)が添削し
> て下さることになりましたので、125 では直ると思います。
im-124 の中を探すと、これだけみつかります。
./IM/Japanese.pm:226: $line =~ s/(($C_sjis)+|($C_sjis_kana)+)/s2j($&, $')/geo;
./IM/Japanese.pm:290: $line =~ s/(($C_euc)+|($C_euc_kana)+)/e2j($&, $')/geo;
./IM/Japanese.pm:316: $line =~ s/$C_sjis|$C_sjis_kana/s2e($&)/geo;
./IM/Scan.pm:895: $_ = $';
./imput.in:414: &add_header(\@Header, 0, $1, $');
./imsetup.in:208: $mhdir = $mail = $';
./imsetup.in:212: $mhalias = $';
./imsetup.in:215: $mhalias = $';
./imsetup.in:218: $address = $';
./imsetup.in:291: $domain = $';
imput と imsetup は影響範囲が狭いのでとりあえず放置することにして、
Japanese.pm と Scan.pm を修正しました。
From: Motonori Nakamura <motonori at example.com>
Subject: Re: naughtiness check sub
Date: Fri, 22 Aug 1997 09:20:51 +0900
> sub conv_from_euc {
> my $line = shift;
> while ($line =~ /(.*)($C_euc|$C_euc_kana)(.*)/s) {
> $line = $1 . &e2j($2, $5) . $5;
> }
> return $line;
> }
>
> なんてすると、コピーが発生しまくって、「生麦生米生卵」という
> 短い文字列に対してでさえ 10倍遅くなってしまうのですが、
> 何か妙案はあるでしょうか?
については jcode.pl と同様に、処理のレベルを一段増やすことで対応しまし
た。Perl5 を前提とすれば、lookahead を使って今までの構造を維持すること
もできるのですが、それがそれほど優れた方法とも思えないのでやめました。
ついでに jcode.pl の copyright もうるさいのでとっちゃいました。
Scan.pm の方は定石ね。
実は eyeball-check しかしていないので、誰かテストしてください。
--utashiro
-------------- next part --------------
*** IM/Japanese.pm.bak Fri Aug 20 15:15:03 1999
--- IM/Japanese.pm Sat Aug 21 00:28:42 1999
***************
*** 223,254 ****
#
sub conv_from_sjis ($) {
my $line = shift;
! $line =~ s/(($C_sjis)+|($C_sjis_kana)+)/s2j($&, $')/geo;
return $line;
}
-
- # s2e() is taken from jcode.pl-1.9/2.3 by utashiro at example.com
- ######################################################################
- #
- # jcode.pl: Perl library for Japanese character code conversion
- #
- # Copyright (c) 1995,1996,1997 Kazumasa Utashiro <utashiro at example.com>
- # Internet Initiative Japan Inc.
- # 1-4 Sanban-cho, Chiyoda-ku, Tokyo 102, Japan
- #
- # Copyright (c) 1992,1993,1994 Kazumasa Utashiro
- # Software Research Associates, Inc.
- #
- # Original version was developed under the name of srekcah at example.com
- # February 1992 and it was called kconv.pl at the beginning. This
- # address was a pen name for group of individuals and it is no longer
- # valid.
- #
- # Use and redistribution for any purpose, without significant
- # modification, is granted as long as all copyright notices are
- # retained. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
- # ANY EXPRESS OR IMPLIED WARRANTIES ARE DISCLAIMED.
- ######################################################################
sub s2e ($) {
my $code = shift;
my ($c1, $c2) = unpack('CC', $code);
--- 223,236 ----
#
sub conv_from_sjis ($) {
my $line = shift;
! $line =~ s/((?:$C_sjis|$C_sjis_kana)+)/sjis2jis($1).$E_asc/geo;
! return $line;
! }
! sub sjis2jis ($) {
! my $line = shift;
! $line =~ s/((?:$C_sjis)+|(?:$C_sjis_kana)+)/s2j($1)/geo;
return $line;
}
sub s2e ($) {
my $code = shift;
my ($c1, $c2) = unpack('CC', $code);
***************
*** 264,281 ****
}
return pack('CC', $c1, $c2);
}
! # s2j() is based on _sjis2jis() of jcode.pl-1.9/2.0 by utashiro at example.com
! sub s2j ($$) {
! my ($cur, $rest) = @_;
! if ($cur =~ /^($C_sjis_kana+)/o) {
$cur =~ tr/\xa1-\xdf/\x21-\x5f/;
! return "$E_kana$cur" if ($rest =~ /^($C_sjis|$C_sjis_kana)/o);
! return "$E_kana$cur$E_asc";
} else {
$cur =~ s/(..)/s2e($1)/ge;
$cur =~ tr/\xa1-\xfe/\x21-\x7e/;
! return "$E_jp$cur" if ($rest =~ /^($C_sjis|$C_sjis_kana)/o);
! return "$E_jp$cur$E_asc";
}
}
--- 246,260 ----
}
return pack('CC', $c1, $c2);
}
! sub s2j ($) {
! my $cur = @_;
! if ($cur =~ /^$C_sjis_kana/o) {
$cur =~ tr/\xa1-\xdf/\x21-\x5f/;
! return "$E_kana$cur";
} else {
$cur =~ s/(..)/s2e($1)/ge;
$cur =~ tr/\xa1-\xfe/\x21-\x7e/;
! return "$E_jp$cur";
}
}
***************
*** 287,306 ****
#
sub conv_from_euc ($) {
my $line = shift;
! $line =~ s/(($C_euc)+|($C_euc_kana)+)/e2j($&, $')/geo;
return $line;
}
- # e2j() is based on _euc2jis() of jcode.pl-1.9/2.0 by utashiro at example.com
sub e2j ($$) {
! my ($cur, $rest) = @_;
$cur =~ tr/\xa1-\xfe/\x21-\x7e/;
if ($cur =~ /\x8e/) {
$cur =~ tr/\x8e//d;
! return "$E_kana$cur" if ($rest =~ /^($C_euc|$C_euc_kana)/o);
! return "$E_kana$cur$E_asc";
} else {
! return "$E_jp$cur" if ($rest =~ /^($C_euc|$C_euc_kana)/o);
! return "$E_jp$cur$E_asc";
}
}
--- 266,287 ----
#
sub conv_from_euc ($) {
my $line = shift;
! $line =~ s/((?:$C_euc|$C_euc_kana)+)/euc2jis($1).$E_asc/geo;
! return $line;
! }
! sub euc2jis ($) {
! my $line = shift;
! $line =~ s/((?:$C_euc)+|(?:$C_euc_kana)+)/e2j($1)/geo;
return $line;
}
sub e2j ($$) {
! my $cur = @_;
$cur =~ tr/\xa1-\xfe/\x21-\x7e/;
if ($cur =~ /\x8e/) {
$cur =~ tr/\x8e//d;
! return "$E_kana$cur";
} else {
! return "$E_jp$cur";
}
}
***************
*** 313,319 ****
sub conv_euc_from_sjis ($) {
my $line = shift;
! $line =~ s/$C_sjis|$C_sjis_kana/s2e($&)/geo;
return $line;
}
--- 294,300 ----
sub conv_euc_from_sjis ($) {
my $line = shift;
! $line =~ s/($C_sjis|$C_sjis_kana)/s2e($1)/geo;
return $line;
}
*** IM/Scan.pm.bak Fri Aug 20 15:15:04 1999
--- IM/Scan.pm Sat Aug 21 00:20:28 1999
***************
*** 888,898 ****
$_ = $line;
s/"([^"]+)"/w2n($1)/geo;
s/,\s+/,/g;
! if (/^(\S+)\s+(\S+)\s+(\S+)/) {
$key = $1;
! $addr = $2;
! $petname = $3;
! $_ = $';
next if ($key =~ /:$/);
next if $petname eq '*';
} else {
--- 888,897 ----
$_ = $line;
s/"([^"]+)"/w2n($1)/geo;
s/,\s+/,/g;
! if (s/^(\S+)\s+(\S+)\s+(\S+)//) {
$key = $1;
! $addr = $2;
! $petname = $3;
next if ($key =~ /:$/);
next if $petname eq '*';
} else {
Mew-dist メーリングリストの案内