# ========================================================================== # Resume.pm - XML-based resume parser superclass # Andrew Ho (andrew@zeuscat.com) # # This file contains embedded documentation in Perl POD format; use # "perldoc Resume" to view, or utilities such as pod2html or pod2man to # convert; or, search for the string "=head" in this file. # # $Id$ # ========================================================================== package Resume; require 5.004; use strict; use XML::Parser (); use vars qw($VERSION @ISA @METAINFORMATION); # -------------------------------------------------------------------------- $VERSION = 0.90; @ISA = 'XML::Parser'; $| = 1; @METAINFORMATION = qw(author email keywords); sub debug($) { print STDERR shift || '', "\n" if $VERSION < 1 } # -------------------------------------------------------------------------- # Constructor sub new { my $class = shift; my $self = bless { _dispatch => { @_ }, _parser => new XML::Parser( Style => 'Tree' ), }, $class; $self; } sub dispatch { my $self = shift; $self->{_dispatch} = shift; } # -------------------------------------------------------------------------- # Public parser methods sub parse { my $self = shift; my $parsed = $self->{_parser}->parse(@_); $self->_process($parsed->[0], $parsed->[1], 0); } sub parsefile { my $self = shift; my $parsed = $self->{_parser}->parsefile(@_); $self->_process($parsed->[0], $parsed->[1], 0); } # -------------------------------------------------------------------------- # Internal methods sub _tag(@) { my $self = shift; my($tag) = @_; if($self->{_dispatch} && exists $self->{_dispatch}->{$tag}) { return $self->{_dispatch}->{$tag}->(@_); } elsif($self->{_dispatch} && exists $self->{_dispatch}->{_default}) { return $self->{_dispatch}->{_default}->(@_); } elsif(lc $tag eq 'meta') { return($self->_meta(@_)); } else { return $self->_default(@_); } } sub _default(@) { my $self = shift; my($tag, $attribute, $text) = @_; my $attributes = join ' ', map { join '', $_, '="', $attribute->{$_}, '"' } sort keys %{$attribute}; $attributes = $attributes ? ' ' . $attributes : ''; return join '', '<', $tag, $attributes, '>', $text, '\n"; } sub _meta(@) { my $self = shift; my($tag, $attribute, $text) = @_; my $attributes = join ' ', map { join '', $_, '="', $attribute->{$_}, '"' } sort keys %{$attribute}; $attributes = $attributes ? ' ' . $attributes : ''; if(exists $attribute->{name} && exists $attribute->{content}) { return('', $attribute->{name}, $attribute->{content}); } else { return ''; } } sub _process { my $self = shift; my($tag, $ref, $indent) = @_; my $returnvalue = ''; my %metainformation = (); if($tag eq '0') { $ref =~ s/^\s+//; $ref =~ s/\s+$//; $ref =~ s/\s+/\ /gsm; $returnvalue = $ref if $ref; } elsif('ARRAY' eq ref $ref) { my($attribute, @children) = @{$ref}; my $childreturn = ''; while(@children) { my($_childreturn, %_metainformation) = $self->_process(@children[0,1], $indent + 1); $childreturn .= $_childreturn; while(my($key, $value) = each %_metainformation) { $metainformation{$key} = $value; } @children = @children[2..$#children]; } $metainformation{$tag} = $childreturn if grep { $_ eq $tag} @METAINFORMATION; my %_metainformation; ($returnvalue, %_metainformation) = $self->_tag($tag, $attribute, $childreturn, %metainformation); if(%_metainformation) { while(my($key, $value) = each %_metainformation) { $metainformation{$key} = $value; } } } else { die "got ref " . ref $ref; } wantarray ? ($returnvalue, %metainformation) : $returnvalue; } # -------------------------------------------------------------------------- __END__ =head1 NAME Resume.pm - XML-based resume parser superclass =head1 SYNOPSIS use Resume; $parser = Resume->new(%dispatch_table); print $parser->parse($text); $parser->dispatch(%new_dispatch_table); print $parser->parsefile($filename); =head1 DESCRIPTION This module is a stream-based XML parser which provides a dispatch table based method of formatting XML text. Its purpose is to assist in the transformation of XML into a structured document, for example, a resume. Catch an arbitrary XML element: text With an entry in the dispatch table like this: element => sub { my( $tag, $attribute, $text, %metainformation ) = @_; %attributes = %{$attribute}; # return a string } As the parser parses the XML document and sees EelementE, it calls the element subroutine and returns its results. Meta information is set by the global @METAINFORMATION variable. Meta information is passed from child elements to parent elements. =head1 BUGS The dispatch table method does not scale. =head1 SEE ALSO L, L =head1 AUTHOR Andrew Ho EFE =cut