[Mew-dist 07260] Re: IM::Iso2022.pm

Yutaka KAWASE yutaka at example.com
1999年 1月 23日 (土) 01:59:45 JST


かわせです。なんべんもすいません・・・

   引用しているメイルは、
   From: Yutaka KAWASE <yutaka at example.com>
   Subject: [Mew-dist 07243] Re: IM::Iso2022.pm
   Date: Fri, 22 Jan 1999 02:06:22 +0900
   Message-ID: <19990122020613Q.yutaka at example.com>
   のものです。

> > 実は、ここには、
> > 
> > $rcode = &header_iso2022jp_conv (\@Header, code_conv_flag);
> > 
> > が抜けています。これを呼べば、配列 @Header に入っているヘッダ
> > フィールド群を一括変換してくれます。

添付のようなスクリプトを実行してみるのですが・・・うまくいかないのです。

Iso2022jp.pm に 沢山 print() を挿入して様子をみてみると、無限ループに
なってしまってます。

何が悪いのでしょうか・・・

--
yutaka kawase <yutaka at example.com>

-------------- next part --------------
# -*-Perl-*-
################################################################
###
###			     Iso2022jp.pm
###
### Author:  Internet Message Group <img at example.com>
### Created: Apr 23, 1997
### Revised: Sep  5, 1998
###

my $PM_VERSION = "IM::Iso2022jp.pm version 980905(IM100)";

package IM::Iso2022jp;
require 5.003;
require Exporter;

use IM::Util;
use IM::EncDec qw(b_encode_string q_encode_string);
use IM::Header qw(hdr_cat);
use IM::Japanese qw(code_check conv_iso2022jp);
use integer;
use strict;
use vars qw(@ISA @EXPORT);

@ISA = qw(Exporter);
@EXPORT = qw(set_debug_encode
	struct_iso2022jp_mimefy
	line_iso2022jp_mimefy
	header_iso2022jp_conv
);

=head1 NAME

Iso2022jp - MIME header encoder for ISO-2022-JP character set

=head1 SYNOPSIS

$encoded_string_for_structured_header = &struct_iso2022jp_mimefy(string);
$encoded_string_for_unstructured_header = &line_iso2022jp_mimefy(string);

=head1 DESCRIPTION

=cut

use vars qw($Jp_Bin $Jp_Qin $Jp_out
	    $Jis_kanji $Jis_roman
	    $C_pascii);
($Jp_Bin, $Jp_Qin, $Jp_out) = ('=?ISO-2022-JP?B?', '=?ISO-2022-JP?Q?', '?=');
($Jis_kanji, $Jis_roman) = ('\e\$[\@B]', '\e\([BJ]');
$C_pascii = '[\x21-\x7e]+';

##### STRUCTURED HEADER LINE ISO-2022-JP MIME CONVERSION #####
#
# struct_iso2022jp_mimefy(lines)
#	lines: continuous header lines to be converted
#	return value: (lines, err)
#	  lines: converted lines (NULL if error)
#
sub struct_iso2022jp_mimefy ($) {
    my $line_in = shift;
    my ($c, $groupsyntax);
    my ($inquote, $incomment, $addrquote) = (0, 0, 0);
    my ($groupcolon, $need_space, $need_encode) = (0, 0, 0);
    my ($line_out, $line_work) = ('', '');
    my ($n);
    im_debug("encoding structured: $line_in\n") if (&debug('encode'));
    while ($line_in ne '') {
	if ($line_in =~ /^($Jis_kanji[^\e]+$Jis_roman([ \t]*$Jis_kanji[^\e]+$Jis_roman)*)(.*)/os){
	    $c = $1;
	    $line_in = $3;
	    $need_encode = 1;
	} elsif ($line_in =~ /^($Jis_roman)(.*)/os) {	# XXXX
	    $c = $1;
	    $line_in = $2;
	    $need_encode = 1;
	} else {
	    ($c, $line_in) = unpack('a a*', $line_in);
	}
	if (!$inquote && $c =~ /^\s$/) {
	    # split/encode
	    if ($line_work ne '' && $need_encode) {
		$need_encode = 0;
		$line_out =~ /([^\n]*)$/;
		$n = length($1);
		$line_out .= &word_iso2022jp_mimefy($n, $line_work,
						    $need_space, 1).$c;
	    } else {
		$line_out = &hdr_cat($line_out, $line_work.$c, '');
	    }
	    $line_work = '';
#	    $need_space = 0;
	    next;
	} elsif ($inquote) {
	    $line_work .= $c;
	    if ($c eq '"') {
		$inquote = 0;
	    } elsif ($c eq '\\') {
		my $tmp;
		($tmp, $line_in) = unpack('a a*', $line_in);
		$line_work .= $tmp;
	    }
	    next;
	} elsif ($incomment) {
	    if ($c eq '(') {
		$incomment++;
	    } elsif ($c eq ')') {
		$incomment--;
		if ($incomment == 0) {
		    # encode
		    if ($line_work ne '' && $need_encode) {
			$need_encode = 0;
			$line_out =~ /([^\n]*)$/;
			$n = length($1);
			$line_out .= &word_iso2022jp_mimefy($n, $line_work,
							    $need_space, 1).$c;
		    } else {
			$line_out = &hdr_cat($line_out, $line_work.$c, '');
		    }
		    $line_work = '';
		    $need_space = 1;
		    next;
		}
	    } elsif ($c eq '\\') {
		$line_work .= $c;
		($c, $line_in) = unpack('a a*', $line_in);
	    }
	    $line_work .= $c;
	    next;
	} elsif ($c eq '"') {
	    $inquote = 1;
	} elsif ($c eq '(') {	# beggining of a comment
	    $incomment++;
	    # encode and split
	    if ($line_work ne '' && $need_encode) {
		$need_encode = 0;
		$line_out =~ /([^\n]*)$/;
		$n = length($1);
		$line_out .= &word_iso2022jp_mimefy($n, $line_work, 0, 1).$c;
	    } else {
		$line_out = &hdr_cat($line_out, $line_work.$c, '');
	    }
	    $line_work = '';
	    $need_space = 0;
	    next;
	} elsif ($c eq ')') {
	    im_err("Unbalanced comment parenthesis ('(', ')'): "
		   ."$line_out$line_work -- $c -- $line_in\n");
#	    &error_exit;
	    return '';
	} elsif ($c eq '<') {
	    # encode
	    $addrquote++;
	    if ($addrquote == 1) {
		if ($line_work ne '' && $need_encode) {
		    $need_encode = 0;
		    $line_out =~ /([^\n]*)$/;
		    $n = length($1);
		    $line_work = &word_iso2022jp_mimefy($n, $line_work,
							$need_space, 1).' ';
		    $line_out .= $line_work;
		} else {
		    $line_out = &hdr_cat($line_out, $line_work, '');
		}
		$line_work = $c;
		$need_space = 1;
		next;
	    }
	} elsif ($c eq '>') {
	    $addrquote--;
	    if ($addrquote == 0) {
		# split
		$line_out = &hdr_cat($line_out, $line_work.$c, '');
		$line_work = '';
		$need_space = 1;
		next;
	    }
	} elsif ($c eq '\\') {
	    $line_work .= $c;
	    ($c, $line_in) = unpack('a a*', $line_in);
	} elsif ($c eq ':') {
	    $line_work .= $c;
	    ($c, $line_in) = unpack('a a*', $line_in);
	    $groupcolon = 1 if ($c ne ':');
	} elsif ($c eq ';') {
	    if ($groupcolon) {
		$groupcolon = 0;
		$groupsyntax = 1;
	    }
	} elsif ($c eq ',') {
	    unless ($groupcolon) {
		# trail
		if ($line_work ne '' && $need_encode) {
		    $need_encode = 0;
		    $line_out =~ /([^\n]*)$/;
		    $n = length($1);
		    $line_out .= &word_iso2022jp_mimefy($n, $line_work,
							$need_space, 1).' '.$c;
		} else {
		    $line_out = &hdr_cat($line_out, $line_work.$c, '');
		}
		$line_work = '';
		$need_space = 1;
		next;
	    }
	}
	$line_work .= $c;
    }
    # trail
    if ($line_work ne '' && $need_encode) {
	$need_encode = 0;
	$line_out =~ /([^\n]*)$/;
	$n = length($1);
	$line_out .= &word_iso2022jp_mimefy($n, $line_work, $need_space, 1);
    } else {
	$line_out = &hdr_cat($line_out, $line_work, '');
    }
    im_debug("encoded structured: $line_out\n") if (&debug('encode'));
    if ($addrquote) {
	im_err("Unbalanced address quotes ('<', '>'): $line_out\n");
#	&error_exit;
	return '';
    }
    if ($inquote) {
	im_err("Unbalanced quotes ('\"'): $line_out\n");
#	&error_exit;
	return '';
    }
    if ($incomment) {
	im_err("Unbalanced comment parenthesis ('(', ')'):  $line_out\n");
#	&error_exit;
	return '';
    }
    if ($line_out =~ /$Jis_kanji[^\e]+$Jis_roman/o){
	im_err("invalid iso-2022-jp charset location in structured field: "
	       . "$line_out\n");
#	&error_exit;
	return '';
    }
    return $line_out;
}

##### UNSTRUCTURED HEADER LINE ISO-2022-JP MIME CONVERSION #####
#
# line_iso2022jp_mimefy(lines)
#	lines: continuous header lines to be converted
#	return value: converted lines
#
sub line_iso2022jp_mimefy ($) {
    my ($line_in) = @_;
    my ($line_out, $this_word, $this_space, $this_code, $follow, $n);
    $follow = 0;
    $this_space = '';
    $line_out = '';
    im_debug("encoding unstructured: $line_in\n") if (&debug('encode'));
    while ($line_in ne '') {
	if ($line_in =~ /^\n([ \t]*)(.*)/s) {	# fold headdings
	    $line_in = $2;
	    if ($this_space ne '') {
		$line_out .= $this_space;
		$this_space = '';
	    }
	    if ($1 ne '') {
		$line_out .= "\n$1";
	    } else {
		$line_out .= "\n";
	    }
	    $follow = 0;
	    next;
	}
	$this_space = '';
	if ($line_in =~ /^([ \t]+)(.*)/s) {	# just spaces
	    $line_in = $2;
	    $this_space = $1;
	}
	$this_word = '';
	$this_code = 'us-ascii';
	while ($line_in ne '') {
	    if ($line_in =~ /^($C_pascii)(.*)/os) {
		$line_in = $2;
		$this_word .= $1;
	    } elsif ($line_in =~ /^($Jis_kanji[^\e]+$Jis_roman([ \t]*$Jis_kanji[^\e]+$Jis_roman)*)(.*)/os) {
		last
		  if ($this_code ne 'us-ascii' && $this_code ne 'iso-2022-jp');
		$line_in = $3;
		$this_word .= $1;
		$this_code = 'iso-2022-jp';
	    } elsif ($line_in =~ /^($Jis_roman)(.*)/os){	# XXX
		last
		  if ($this_code ne 'us-ascii' && $this_code ne 'iso-2022-jp');
		$line_in = $2;
		$this_word .= $1;
		$this_code = 'iso-2022-jp';
	    } elsif ($line_in =~ /^[ \t]+/) {	# just spaces
		last;
	    } elsif ($line_in =~ /^\n[ \t]*/) {	# fold headdings
		last;
	    } else {
		# anything else (XXX should be Q-encoded?)
		last if ($this_code ne 'us-ascii' && $this_code ne 'unknown');
		(my $tmp, $line_in) = unpack('a a*', $line_in);
		$this_word .= $tmp;
		$this_code = 'unknown';
	    }
	}
	if ($this_code eq 'us-ascii' || $this_code eq 'unknown') {
	    $line_out = &hdr_cat($line_out, $this_word, $this_space);
	    $this_space = '';
	    $follow = 0;
	} elsif ($this_code eq 'iso-2022-jp') {
	    # ISO-2022-JP encoding
	    im_debug("encoding: $this_word\n") if (&debug('encode'));
	    if ($this_space ne '') {
		if ($follow) {
		    $this_word = $this_space . $this_word;
		} else {
		    $line_out .= $this_space;
		}
	    }
	    $line_out =~ /([^\n]*)$/;
	    $n = length($1);
	    $line_out .= &word_iso2022jp_mimefy($n, $this_word, $follow, 0);
	    $this_space = '';
	    $follow = 1;
	}
    }
    return $line_out;
}

##### WORD ISO-2022-JP MIME CONVERSION #####
#
# word_iso2022jp_mimefy(size, word, need_pre_space, struct)
#	size: length already occupied in the last line
#	word: word to be converted
#	need_pre_space: space should be prepended
#	struct: true if in structured field
#	return value: encoded words
#
sub word_iso2022jp_mimefy ($$$$) {
    my ($size, $word_in, $need_pre_space, $struct) = @_;
    my ($word_out) = '';
    my ($word_conv, $n, $word_sub, $word_rest);

    if ($main::NoFolding) {
	if ($main::HdrQEncoding) {
	    $word_out .= $Jp_Qin;
	    $word_out .= &q_encode_string($word_in, $struct);
	} else {
	    $word_out .= $Jp_Bin;
	    $word_out .= &b_encode_string($word_in);
	}
	$word_out .= $Jp_out;
	return $word_out;
    }

    $size = $main::Folding_length - $size;
    im_debug("encoding word($size): $word_in\n") if (&debug('encode'));
    if ($size - length($Jp_Bin) - length($Jp_out) - 12 <= 0) {
	$word_out .= "\n\t";
	$size = $main::Folding_length;
    } elsif ($need_pre_space) {
	$word_out .= ' ';
    }
    while ($word_in ne '') {
	print "[debug] -- top of while --\n";
	$word_conv = '';
	$word_out =~ /([^\n]*)$/;
	$n = int(($size - (length($1) + length($Jp_Bin)
			   + length($Jp_out) + 12))/4*3);
	while (($n > 0) && $word_in ne '') {
	    print "[debug] dummy\n";
#	    if ($word_in !~ /$Jis_kanji/o) {
#		# us-ascii part
#		($word_sub, $word_in) = unpack("a$n a*", $word_in);
#		$word_conv .= $word_sub;
#		$n -= length($word_sub);
#		next;
#	    }
	    if ($word_in =~ /^([^\e]+)(.*)/s) {
		print "[debug] dummy\n";
		# us-ascii part
		($word_sub, $word_in) = unpack("a$n a*", $1);
		$word_in .= $2;
		$word_conv .= $word_sub;
		$n -= length($word_sub);
		next;
	    } elsif ($word_in =~ /^($Jis_roman)([^\e]+)(.*)/s) {
		print "[debug] dummy\n";
		# JIS roman part
		if ($n < 3) {
		    $n = 0;
		    next;
		}
		($word_sub, $word_in) = unpack("a$n a*", $2); # work_in?
		$word_sub = $1 . $word_sub;
		$word_in .= $3;
		$word_conv .= $word_sub;
		$n -= length($word_sub);
		next;
	    } elsif ($word_in =~ /($Jis_kanji)([^\e]+)($Jis_roman)(.*)/os) {
		print "[debug] dummy\n";
		# iso-2022-jp part
		$n = int($n/2)*2 - 6;
		if ($n < 2) {
		    $n = 0;
		    next;
		}
		($word_sub, $word_rest) = unpack("a$n a*", $2);
		if ($word_rest) {
		    $word_in = "$1$word_rest$3$4";
		} else {
		    $word_in = $4;
		}
		$word_conv .= "$1$word_sub$3";
		$n -= length($word_sub)+6;
		next;
	    } else {
		print "[debug] dummy\n";
		# Unsupported charset (XXX)
		$word_conv .= $word_in;
		$word_in = '';
	    }
	    print "[debug] dummy\n";
	}
	print "[debug] checkpoint A\n";
	if ($word_conv ne '') {
	    print "[debug] dummy\n";
	    if ($main::HdrQEncoding) {
		print "[debug] dummy\n";
		$word_out .= $Jp_Qin;
		$word_out .= &q_encode_string($word_conv, $struct);
	    } else {
		print "[debug] dummy\n";
		$word_out .= $Jp_Bin;
		$word_out .= &b_encode_string($word_conv);
	    }
	    print "[debug] dummy\n";
	    $word_out .= $Jp_out;
	}
	print "[debug] checkpoint B\n";
	if ($word_in) {
	    print "[debug] checkpoint C |$word_out|\n";
	    $word_out .= "\n\t";
	}
	$size = $main::Folding_length;
	print "[debug] --end of while--\n";
    }
    return $word_out;
}

##### HEADER ISO-2022-JP CONVERSION #####
#
# header_iso2022jp_conv(Header)
#	return value: status
#	  0: success
#	 -1: failure
#
sub header_iso2022jp_conv ($$) {
    my ($header, $code_conv) = @_;
    my ($i, $c);
    my ($field_name, $field_value);
    for ($i = 0; $i <= $#$header; $i++) {
	im_debug("Iso2022jp: converting: $$header[$i]\n") if (&debug('encode'));
	$c = &code_check($$header[$i]);
	if ($code_conv) {
	    if ($c eq 'sORe') {
		if ($main::Body_code ne '') {
		    $c = lc($main::Body_code);
		} else {
		    $c = lc($main::Default_code);
		}
	    }
	    im_debug("Iso2022jp: code conversion from $c\n")
		if (&debug('encode'));
	    if ($c eq 'sjis' || $c eq 'euc') {
		$$header[$i] = &conv_iso2022jp($$header[$i], uc($c));
	    }
	    $c = 'jis';
	}
	if ($c eq 'jis') {
	    if ($$header[$i] =~ /^([\w-]+:\s*)(\S.*)/s) {
		$field_name = $1;
		$field_value = $2;
		if ($field_name =~ /^Apparently-To:/i
		 || $field_name =~ /^(Resent-)?(To|Cc|Bcc|Dcc|From|Sender|Reply-To):/i
		 || $field_name =~ /^Originator:/i
		 || $field_name =~ /^Errors-To:/i
		 || $field_name =~ /^Return-Receipt-To:/i) {
		    # structured field
		    my $l = &struct_iso2022jp_mimefy($field_value);
		    return -1 if ($l eq '');
		    $$header[$i] = "$field_name$l";
		} else {
		    $$header[$i] = $field_name.&line_iso2022jp_mimefy($field_value);
		}
	    }
	}
	im_debug("Iso2022jp: converted: $$header[$i]\n")
	  if (&debug('encode'));
    }
    return 0;
}

1;

### Copyright (C) 1997, 1998 IM developing team.
### All rights reserved.
### 
### Redistribution and use in source and binary forms, with or without
### modification, are permitted provided that the following conditions
### are met:
### 
### 1. Redistributions of source code must retain the above copyright
###    notice, this list of conditions and the following disclaimer.
### 2. Redistributions in binary form must reproduce the above copyright
###    notice, this list of conditions and the following disclaimer in the
###    documentation and/or other materials provided with the distribution.
### 3. Neither the name of the team nor the names of its contributors
###    may be used to endorse or promote products derived from this software
###    without specific prior written permission.
### 
### THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND
### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
### PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE
### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
### BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
### WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
### OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
### IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------- next part --------------
#!/usr/bin/perl
require 'jcode.pl';
use IM::Iso2022jp;
@header = (
	   'To: 川瀬裕 <yutaka\@mailhsot.net>',
	   'Cc: かわせゆたか <yutaka\@get.ne.jp>',
	   'Subject: へへへのへ',
	   'X-header: ほほほ'
	   );

for (@header) {
    $line = $_;
    &jcode'convert(*line, 'jis');
    push(@jisheader, $line);
}

for (@jisheader) {
    print "$_\n";
}

if ( &header_iso2022jp_conv(\@jisheader) ) {
print "--------\n";
for (@jisheader) {
    print "$_\n";
}
}


Mew-dist メーリングリストの案内