tools/scripts/cpysearch/Cpy.pm - external/github.com/unicode-org/icu - Git at Google

 #
 # Copyright (C) 2017 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html
 #
 #  ***********************************************************************
 #  * COPYRIGHT:
 #  * Copyright (c) 2011, International Business Machines Corporation
 #  * and others. All Rights Reserved.
 #  ***********************************************************************
 #
 # Common functionality between cpysearch.pl and cpyscan.pl
 #

 package Cpy;
 use strict;
 use warnings;
 use base 'Exporter';

 our @EXPORT = qw(any glob_to_regex should_ignore);

 # any(CODE, LIST)
 # Evaluate CODE for each element of LIST till CODE($_) returns 1. Return 0 if
 # not found.
 sub any(&@) {
     my $code = shift;
     local $_;
     &$code && return 1 for @_;
     0;
 }

 # Perl doesn't have fnmatch. Closest thing is File::FnMatch but that's
 # UNIX-only (see its caveats). So, as a workaround, convert globs to regular
 # expressions. Translated from Python's fnmatch module.
 sub glob_to_regex($) {
     my ($glob, $i, $len, $regex);
     $glob = shift;
     $i = 0;
     $len = length($glob);
     $regex = "";

     # charat(STR, IDX)
     # Return the character in the argument at the given index.
     my $charat = sub($$) { return substr(shift, shift, 1) };

     while ($i < $len) {
         my ($c, $out);
         $c = &$charat($glob, $i++);
         if    ($c eq '*') { $out = '.*' }
         elsif ($c eq '?') { $out = '.' }
         elsif ($c eq '[') { # glob classes
             my $j = $i;

             # Get the closing index of the class. ] appearing here is part
             # of the class.
             if    ($j < $len && &$charat($glob, $j) eq '!') { $j++ }
             if    ($j < $len && &$charat($glob, $j) eq ']') { $j++ }
             while ($j < $len && &$charat($glob, $j) ne ']') { $j++ }

             # Didn't find closing brace. Use literal [
             if ($j >= $len) { $out = "\\[" }

             else {
                 # The complete class contents (except the braces)
                 my $s = substr($regex, $i, $j - $i);
                 $s =~ s/\\/\\\\/g;
                 $i = $j + 1; # change position to outside class

                 # Negation
                 if (&$charat($s, 0) eq '!') { $s = '^'.substr($s, 1); }
                 # Literal ^
                 elsif (&$charat($s, 0) eq '^') { $s = '\\'.$s; }

                 $out = "[$s]";
             }
         }
         else { $out = quotemeta($c) }
         $regex .= $out;
     }
     return $regex;
 }

 # Load cpyskip.txt contents.
 # Try local .cpyskip.txt
 # no support for HTTP fetch.
 our $cpyskip_file = ".cpyskip.txt";
 our @cpyskip_lines;
 if (open(our $cpyskip_fh, "<", $cpyskip_file)) {
     @cpyskip_lines = <$cpyskip_fh>;
     close $cpyskip_fh;
     # print "Using local cpyskip.txt\n";
 } else {
     die "Could not open $cpyskip_file";
 }
 our @ignore_globs = map  { chomp; glob_to_regex($_) }
                     grep { /^\s*[^#\s]+/ }
                     @cpyskip_lines;

 #for my $rgx (@ignore_globs) {print $rgx . "\n"}
 #exit(0);

 # list of file extensions to ignore
 our @ignore_extensions = qw(svn dll ilk idb pdb dsp dsw opt ncb vcproj sln suo
     cvsignore cnv res icu exe obj bin exp lib out plg jar spp stub policy ttf
     TTF otf);
 our $ignore_exts = join '|',
                    map { "\\.$_" }
                    @ignore_extensions;

 # ignore regex
 our $ignore_regex = "data/out/build|CVS|\\~|\\#|Debug|Release|positions|unidata|sources\.txt|$ignore_exts";

 # Check if this file should be ignored.
 sub should_ignore($) {
     my $filename = shift;
     return 1 if $filename eq $cpyskip_file;
     return 1 if $filename =~ /$ignore_regex/;
     for my $r (@ignore_globs) { return 1 if $filename =~ /$r/ }
     0;
 }

 1;
	#
	# Copyright (C) 2017 and later: Unicode, Inc. and others.
	# License & terms of use: http://www.unicode.org/copyright.html
	#
	# ***********************************************************************
	# * COPYRIGHT:
	# * Copyright (c) 2011, International Business Machines Corporation
	# * and others. All Rights Reserved.
	# ***********************************************************************
	#
	# Common functionality between cpysearch.pl and cpyscan.pl
	#

	package Cpy;
	use strict;
	use warnings;
	use base 'Exporter';

	our @EXPORT = qw(any glob_to_regex should_ignore);

	# any(CODE, LIST)
	# Evaluate CODE for each element of LIST till CODE($_) returns 1. Return 0 if
	# not found.
	sub any(&@) {
	my $code = shift;
	local $_;
	&$code && return 1 for @_;
	0;
	}

	# Perl doesn't have fnmatch. Closest thing is File::FnMatch but that's
	# UNIX-only (see its caveats). So, as a workaround, convert globs to regular
	# expressions. Translated from Python's fnmatch module.
	sub glob_to_regex($) {
	my ($glob, $i, $len, $regex);
	$glob = shift;
	$i = 0;
	$len = length($glob);
	$regex = "";

	# charat(STR, IDX)
	# Return the character in the argument at the given index.
	my $charat = sub($$) { return substr(shift, shift, 1) };

	while ($i < $len) {
	my ($c, $out);
	$c = &$charat($glob, $i++);
	if ($c eq '') { $out = '.' }
	elsif ($c eq '?') { $out = '.' }
	elsif ($c eq '[') { # glob classes
	my $j = $i;

	# Get the closing index of the class. ] appearing here is part
	# of the class.
	if ($j < $len && &$charat($glob, $j) eq '!') { $j++ }
	if ($j < $len && &$charat($glob, $j) eq ']') { $j++ }
	while ($j < $len && &$charat($glob, $j) ne ']') { $j++ }

	# Didn't find closing brace. Use literal [
	if ($j >= $len) { $out = "\\[" }

	else {
	# The complete class contents (except the braces)
	my $s = substr($regex, $i, $j - $i);
	$s =~ s/\\/\\\\/g;
	$i = $j + 1; # change position to outside class

	# Negation
	if (&$charat($s, 0) eq '!') { $s = '^'.substr($s, 1); }
	# Literal ^
	elsif (&$charat($s, 0) eq '^') { $s = '\\'.$s; }

	$out = "[$s]";
	}
	}
	else { $out = quotemeta($c) }
	$regex .= $out;
	}
	return $regex;
	}

	# Load cpyskip.txt contents.
	# Try local .cpyskip.txt
	# no support for HTTP fetch.
	our $cpyskip_file = ".cpyskip.txt";
	our @cpyskip_lines;
	if (open(our $cpyskip_fh, "<", $cpyskip_file)) {
	@cpyskip_lines = <$cpyskip_fh>;
	close $cpyskip_fh;
	# print "Using local cpyskip.txt\n";
	} else {
	die "Could not open $cpyskip_file";
	}
	our @ignore_globs = map { chomp; glob_to_regex($_) }
	grep { /^\s*[^#\s]+/ }
	@cpyskip_lines;

	#for my $rgx (@ignore_globs) {print $rgx . "\n"}
	#exit(0);

	# list of file extensions to ignore
	our @ignore_extensions = qw(svn dll ilk idb pdb dsp dsw opt ncb vcproj sln suo
	cvsignore cnv res icu exe obj bin exp lib out plg jar spp stub policy ttf
	TTF otf);
	our $ignore_exts = join '\|',
	map { "\\.$_" }
	@ignore_extensions;

	# ignore regex
	our $ignore_regex = "data/out/build\|CVS\|\\~\|\\#\|Debug\|Release\|positions\|unidata\|sources\.txt\|$ignore_exts";

	# Check if this file should be ignored.
	sub should_ignore($) {
	my $filename = shift;
	return 1 if $filename eq $cpyskip_file;
	return 1 if $filename =~ /$ignore_regex/;
	for my $r (@ignore_globs) { return 1 if $filename =~ /$r/ }
	0;
	}

	1;