| ###################################################################### |
| # Copyright (C) 1999, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| ###################################################################### |
| # See: ftp://elsie.nci.nih.gov/pub/tzdata<year> |
| # where <year> is "1999b" or a similar string. |
| ###################################################################### |
| # This package handles the parsing of time zone files. |
| # Author: Alan Liu |
| ###################################################################### |
| # Usage: |
| # Call ParseFile for each file to be imported. Then call Postprocess |
| # to remove unused rules and links. |
| |
| package TZ; |
| use strict; |
| use Carp; |
| use vars qw(@ISA @EXPORT $VERSION $YEAR $STANDARD); |
| require 'dumpvar.pl'; |
| |
| @ISA = qw(Exporter); |
| @EXPORT = qw(ParseFile |
| Postprocess |
| ); |
| $VERSION = '0.1'; |
| |
| $STANDARD = '-'; # Name of the Standard Time rule |
| |
| ###################################################################### |
| # Param: File name |
| # Param: Ref to hash of zones |
| # Param: Ref to hash of rules |
| # Param: Current year |
| sub ParseFile { |
| my ($FILE, $ZONES, $RULES, $YEAR) = @_; |
| |
| local(*FILE); |
| open(FILE,"<$FILE") or confess "Can't open $FILE: $!"; |
| my $zone; # Current zone |
| my $badLineCount = 0; |
| while (<FILE>) { |
| s/\#.*//; |
| next if (!/\S/); |
| #|# Zone NAME GMTOFF RULES FORMAT [UNTIL] |
| #|Zone America/Montreal -4:54:16 - LMT 1884 |
| #| -5:00 Mont E%sT |
| #|Zone America/Thunder_Bay -5:57:00 - LMT 1895 |
| #| -5:00 Canada E%sT 1970 |
| #| -5:00 Mont E%sT 1973 |
| #| -5:00 - EST 1974 |
| #| -5:00 Canada E%sT |
| my ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil); |
| if (/^zone/i) { |
| # Zone block start |
| if (/^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/i |
| || /^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)()/i) { |
| $zone = $1; |
| ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = |
| ($2, $3, $4, $5); |
| } else { |
| print STDERR "Can't parse in $FILE: $_"; |
| ++$badLineCount; |
| } |
| } elsif (/^\s/ && $zone) { |
| # Zone continuation |
| if (/^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/ |
| || /^\s+(\S+)\s+(\S+)\s+(\S+)()/) { |
| ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = |
| ($1, $2, $3, $4); |
| } else { |
| print STDERR "Can't parse in $FILE: $_"; |
| ++$badLineCount; |
| } |
| } elsif (/^rule/i) { |
| # Here is where we parse a single line of the rule table. |
| # Our goal is to accept only rules applying to the current |
| # year. This is normally a matter of accepting rules |
| # that match the current year. However, in some cases this |
| # is more complicated. For example: |
| #|# Tonga |
| #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S |
| #|Rule Tonga 1999 max - Oct Sat>=1 2:00s 1:00 S |
| #|Rule Tonga 2000 max - Apr Sun>=16 2:00s 0 - |
| # To handle this properly, we save every rule we encounter |
| # (thus overwriting older ones with newer ones, since rules |
| # are listed in order), and also use slot [2] to mark when |
| # we see a current year rule. When that happens, we stop |
| # saving rules. Thus we match the latest rule we see, or |
| # a matching rule if we find one. The format of slot [2] |
| # is just a 2 bit flag ([2]&1 means slot [0] matched, |
| # [2]&2 means slot [1] matched). |
| |
| # Note that later, when the rules are post processed |
| # (see Postprocess), the slot [2] will be overwritten |
| # with the compressed rule string used to implement |
| # equality testing. |
| |
| $zone = undef; |
| # Rule |
| #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S |
| #|Rule US 1918 1919 - Mar lastSun 2:00 1:00 W # War |
| #|Rule US 1918 1919 - Oct lastSun 2:00 0 S |
| #|Rule US 1942 only - Feb 9 2:00 1:00 W # War |
| #|Rule US 1945 only - Sep 30 2:00 0 S |
| #|Rule US 1967 max - Oct lastSun 2:00 0 S |
| #|Rule US 1967 1973 - Apr lastSun 2:00 1:00 D |
| #|Rule US 1974 only - Jan 6 2:00 1:00 D |
| #|Rule US 1975 only - Feb 23 2:00 1:00 D |
| #|Rule US 1976 1986 - Apr lastSun 2:00 1:00 D |
| #|Rule US 1987 max - Apr Sun>=1 2:00 1:00 D |
| if (/^rule\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+ |
| (\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/xi) { |
| my ($name, $from, $to, $type, $in, $on, $at, $save, $letter) = |
| ($1, $2, $3, $4, $5, $6, $7, $8, $9); |
| my $i = $save ? 0:1; |
| |
| if (!exists $RULES->{$name}) { |
| $RULES->{$name} = []; |
| } |
| my $ruleArray = $RULES->{$name}; |
| |
| # Check our bit mask to see if we've already matched |
| # a current rule. If so, do nothing. If not, then |
| # save this rule line as the best one so far. |
| if (@{$ruleArray} < 3 || |
| !($ruleArray->[2] & $i)) { |
| my $h = $ruleArray->[$i]; |
| $ruleArray->[$i]->{from} = $from; |
| $ruleArray->[$i]->{to} = $to; |
| $ruleArray->[$i]->{type} = $type; |
| $ruleArray->[$i]->{in} = $in; |
| $ruleArray->[$i]->{on} = $on; |
| $ruleArray->[$i]->{at} = $at; |
| $ruleArray->[$i]->{save} = $save; |
| $ruleArray->[$i]->{letter} = $letter; |
| |
| # Does this rule match the current year? If so, |
| # set the bit mask so we don't overwrite this rule. |
| # This makes us ingore rules for subsequent years |
| # that are already listed in the database -- as long |
| # as we have an overriding rule for the current year. |
| if (($from == $YEAR && $to =~ /only/i) || |
| ($from <= $YEAR && |
| (($to =~ /^\d/ && $YEAR <= $to) || $to =~ /max/i))) { |
| $ruleArray->[2] |= $i; |
| } |
| } |
| } else { |
| print STDERR "Can't parse in $FILE: $_"; |
| ++$badLineCount; |
| } |
| } elsif (/^link/i) { |
| #|# Old names, for S5 users |
| #| |
| #|# Link LINK-FROM LINK-TO |
| #|Link America/New_York EST5EDT |
| #|Link America/Chicago CST6CDT |
| #|Link America/Denver MST7MDT |
| #|Link America/Los_Angeles PST8PDT |
| #|Link America/Indianapolis EST |
| #|Link America/Phoenix MST |
| #|Link Pacific/Honolulu HST |
| if (/^link\s+(\S+)\s+(\S+)/i) { |
| # We currently only record a single link -- if there |
| # are more than one, we should modify this. |
| my ($from, $to) = ($1, $2); |
| $ZONES->{$from}->{link} = $to; |
| } else { |
| print STDERR "Can't parse in $FILE: $_"; |
| ++$badLineCount; |
| } |
| } else { |
| # Unexpected line |
| print STDERR "Ignoring in $FILE: $_"; |
| ++$badLineCount; |
| } |
| if ($zoneRule && |
| ($zoneUntil !~ /\S/ || ($zoneUntil =~ /^\d/ && |
| $zoneUntil >= $YEAR))) { |
| $ZONES->{$zone}->{gmtoff} = $zoneGmtoff; |
| $ZONES->{$zone}->{rule} = $zoneRule; |
| $ZONES->{$zone}->{format} = $zoneFormat; |
| $ZONES->{$zone}->{until} = $zoneUntil; |
| } |
| } |
| close(FILE); |
| } |
| |
| ###################################################################### |
| # Param: Ref to hash of zones |
| # Param: Ref to hash of rules |
| sub Postprocess { |
| my ($ZONES, $RULES) = @_; |
| my %ruleInUse; |
| |
| # Eliminate zone links that have no corresponding zone |
| foreach (keys %$ZONES) { |
| if (exists $ZONES->{$_}->{link} && !exists $ZONES->{$_}->{rule}) { |
| if (0) { |
| print STDERR |
| "Deleting link from historical/nonexistent zone: ", |
| $_, " -> ", $ZONES->{$_}->{link}, "\n"; |
| } |
| delete $ZONES->{$_}; |
| } |
| } |
| |
| # Check that each zone has a corresponding rule |
| foreach (sort keys %$ZONES) { |
| my $ruleName = $ZONES->{$_}->{rule}; |
| next if ($ruleName eq $STANDARD); |
| if (exists $RULES->{$ruleName}) { |
| $ruleInUse{$ruleName} = 1; |
| } else { |
| # This means the zone is using the standard rule now |
| $ZONES->{$_}->{rule} = $STANDARD; |
| } |
| } |
| |
| # Check that both parts are there for rules |
| # Check for unused rules |
| # Make coded string for comparisons |
| foreach (keys %$RULES) { |
| if (!exists $ruleInUse{$_}) { |
| if (0) { |
| print STDERR "Deleting historical/unused rule: $_\n"; |
| } |
| delete $RULES->{$_}; |
| } elsif (!$RULES->{$_}->[0] || !$RULES->{$_}->[1]) { |
| print STDERR "Rule doesn't have both parts: $_\n"; |
| } else { |
| # Generate code |
| # This has all the data about a rule; it can be used |
| # to see if two rules behave identically |
| $RULES->{$_}->[2] = |
| lc($RULES->{$_}->[0]->{in} . "," . |
| $RULES->{$_}->[0]->{on} . "," . |
| $RULES->{$_}->[0]->{at} . "," . |
| $RULES->{$_}->[0]->{save} . ";" . |
| $RULES->{$_}->[1]->{in} . "," . |
| $RULES->{$_}->[1]->{on} . "," . |
| $RULES->{$_}->[1]->{at}); # [1]->{save} is always zero |
| } |
| } |
| } |