src/tools/docwriter/sources.py - third_party/freetype2 - Git at Google

 #
 #  sources.py
 #
 #    Convert source code comments to multi-line blocks (library file).
 #
 #  Copyright 2002-2018 by
 #  David Turner.
 #
 #  This file is part of the FreeType project, and may only be used,
 #  modified, and distributed under the terms of the FreeType project
 #  license, LICENSE.TXT.  By continuing to use, modify, or distribute
 #  this file you indicate that you have read the license and
 #  understand and accept it fully.

 """Utility for parsing source files.

 This library file contains definitions of classes needed to decompose C
 source code files into a series of multi-line 'blocks'.  There are two
 kinds of blocks.

   * Normal blocks, which contain source code or ordinary comments.

   * Documentation blocks, which have restricted formatting, and whose text
     always start with a documentation markup tag like `<Function>',
     `<Type>', etc.

 The routines to process the content of documentation blocks are contained
 in file `content.py'; the classes and methods found here only deal with
 text parsing and basic documentation block extraction.
 """

 from __future__ import print_function

 import fileinput
 import logging
 import re

 log = logging.getLogger( __name__ )

 ################################################################
 ##
 ##  SOURCE BLOCK FORMAT CLASS
 ##
 ##  A simple class containing compiled regular expressions to detect
 ##  potential documentation format block comments within C source code.
 ##
 ##  The `column' pattern must contain a group to `unbox' the content of
 ##  documentation comment blocks.
 ##
 ##  Later on, paragraphs are converted to long lines, which simplifies the
 ##  regular expressions that act upon the text.
 ##
 class  SourceBlockFormat( object ):

     def  __init__( self, iden, start, column, end ):
         """Create a block pattern, used to recognize special documentation
         blocks."""
         self.id     = iden
         self.start  = re.compile( start, re.VERBOSE )
         self.column = re.compile( column, re.VERBOSE )
         self.end    = re.compile( end, re.VERBOSE )


 #
 # Format 1 documentation comment blocks.
 #
 #    /************************************/ (at least 2 asterisks)
 #    /*                                  */
 #    /*                                  */
 #    /*                                  */
 #    /************************************/ (at least 2 asterisks)
 #
 start = r'''
   \s*      # any number of whitespace
   /\*{2,}/ # followed by '/' and at least two asterisks then '/'
   \s*$     # probably followed by whitespace
 '''

 column = r'''
   \s*      # any number of whitespace
   /\*{1}   # followed by '/' and precisely one asterisk
   ([^*].*) # followed by anything (group 1)
   \*{1}/   # followed by one asterisk and a '/'
   \s*$     # probably followed by whitespace
 '''

 re_source_block_format1 = SourceBlockFormat( 1, start, column, start )


 #
 # Format 2 documentation comment blocks.
 #
 #    /************************************ (at least 2 asterisks)
 #     *
 #     *                                    (1 asterisk)
 #     *
 #     */                                   (1 or more asterisks)
 #
 start = r'''
   \s*     # any number of whitespace
   /\*{2,} # followed by '/' and at least two asterisks
   \s*$    # probably followed by whitespace
 '''

 column = r'''
   \s*           # any number of whitespace
   \*{1}(?![*/]) # followed by precisely one asterisk not followed by `/'
   (.*)          # then anything (group1)
 '''

 end = r'''
   \s*  # any number of whitespace
   \*+/ # followed by at least one asterisk, then '/'
 '''

 re_source_block_format2 = SourceBlockFormat( 2, start, column, end )


 #
 # The list of supported documentation block formats.  We could add new ones
 # quite easily.
 #
 re_source_block_formats = [re_source_block_format1, re_source_block_format2]


 #
 # The following regular expressions correspond to markup tags within the
 # documentation comment blocks.  They are equivalent despite their different
 # syntax.
 #
 # A markup tag consists of letters or character `-', to be found in group 1.
 #
 # Notice that a markup tag _must_ begin a new paragraph.
 #
 re_markup_tag1 = re.compile( r'''\s*<((?:\w|-)*)>''' )  # <xxxx> format
 re_markup_tag2 = re.compile( r'''\s*@((?:\w|-)*):''' )  # @xxxx: format

 #
 # The list of supported markup tags.  We could add new ones quite easily.
 #
 re_markup_tags = [re_markup_tag1, re_markup_tag2]


 #
 # A regular expression to detect a cross reference, after markup tags have
 # been stripped off.
 #
 # Two syntax forms are supported:
 #
 #   @<name>
 #   @<name>[<id>]
 #
 # where both `<name>' and `<id>' consist of alphanumeric characters, `_',
 # and `-'.  Use `<id>' if there are multiple, valid `<name>' entries.
 #
 # Example: @foo[bar]
 #
 re_crossref = re.compile( r"""
                             @
                             (?P<name>(?:\w|-)+
                                      (?:\[(?:\w|-)+\])?)
                             (?P<rest>.*)
                           """, re.VERBOSE )

 #
 # Two regular expressions to detect italic and bold markup, respectively.
 # Group 1 is the markup, group 2 the rest of the line.
 #
 # Note that the markup is limited to words consisting of letters, digits,
 # the characters `_' and `-', or an apostrophe (but not as the first
 # character).
 #
 re_italic = re.compile( r"_((?:\w|-)(?:\w|'|-)*)_(.*)" )     #  _italic_
 re_bold   = re.compile( r"\*((?:\w|-)(?:\w|'|-)*)\*(.*)" )   #  *bold*

 #
 # This regular expression code to identify an URL has been taken from
 #
 #   https://mail.python.org/pipermail/tutor/2002-September/017228.html
 #
 # (with slight modifications).
 #
 urls = r'(?:https?|telnet|gopher|file|wais|ftp)'
 ltrs = r'\w'
 gunk = r'/#~:.?+=&%@!\-'
 punc = r'.:?\-'
 any_sym  = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
                                       'gunk' : gunk,
                                       'punc' : punc }
 url  = r"""
          (
            \b(?![<])             # start at word boundary ignore if < found
            %(urls)s :            # need resource and a colon
            [%(any)s] +?          # followed by one or more of any valid
                                  # character, but be conservative and
                                  # take only what you need to...
            (?=                   # [look-ahead non-consumptive assertion]
              [%(punc)s]*         # either 0 or more punctuation
              (?:                 # [non-grouping parentheses]
                [^%(any)s] | $    # followed by a non-url char
                                  # or end of the string
              )
            )
          )
         """ % {'urls' : urls,
                'any'  : any_sym,
                'punc' : punc }

 re_url = re.compile( url, re.VERBOSE | re.MULTILINE )

 #
 # A regular expression that stops collection of comments for the current
 # block.
 #
 re_source_sep = re.compile( r'\s*/\*\s*\*/' )   #  /* */

 #
 # A regular expression to find possible C identifiers while outputting
 # source code verbatim, covering things like `*foo' or `(bar'.  Group 1 is
 # the prefix, group 2 the identifier -- since we scan lines from left to
 # right, sequentially splitting the source code into prefix and identifier
 # is fully sufficient for our purposes.
 #
 re_source_crossref = re.compile( r'(\W*)(\w*)' )

 #
 # A regular expression that matches a list of reserved C source keywords.
 #
 re_source_keywords = re.compile( '''\\b ( typedef   |
                                           struct    |
                                           enum      |
                                           union     |
                                           const     |
                                           char      |
                                           int       |
                                           short     |
                                           long      |
                                           void      |
                                           signed    |
                                           unsigned  |
                                           include   |
                                           define    |
                                           undef     |
                                           if        |
                                           ifdef     |
                                           ifndef    |
                                           else      |
                                           endif   ) \\b''', re.VERBOSE )


 ################################################################
 ##
 ##  SOURCE BLOCK CLASS
 ##
 ##  There are two important fields in a `SourceBlock' object.
 ##
 ##    self.lines
 ##      A list of text lines for the corresponding block.
 ##
 ##    self.content
 ##      For documentation comment blocks only, this is the block content
 ##      that has been `unboxed' from its decoration.  This is `None' for all
 ##      other blocks (i.e., sources or ordinary comments with no starting
 ##      markup tag)
 ##
 class  SourceBlock( object ):

     def  __init__( self, processor, filename, lineno, lines ):
         self.processor = processor
         self.filename  = filename
         self.lineno    = lineno
         self.lines     = lines[:]
         self.format    = processor.format
         self.content   = []

         if self.format == None:
             return

         # extract comment lines
         lines = []

         for line0 in self.lines:
             m = self.format.column.match( line0 )
             if m:
                 lines.append( m.group( 1 ) )

         # now, look for a markup tag
         for l in lines:
             l = l.strip()
             if len( l ) > 0:
                 for tag in re_markup_tags:
                     if tag.match( l ):
                         self.content = lines
                         return

     def  location( self ):
         return "(" + self.filename + ":" + repr( self.lineno ) + ")"

     # debugging only -- not used in normal operations
     def  dump( self ):
         if self.content:
             print( "{{{content start---" )
             for l in self.content:
                 print( l )
             print( "---content end}}}" )
             return

         for line in self.lines:
             print( line )


 ################################################################
 ##
 ##  SOURCE PROCESSOR CLASS
 ##
 ##  The `SourceProcessor' is in charge of reading a C source file and
 ##  decomposing it into a series of different `SourceBlock' objects.
 ##
 ##  A SourceBlock object consists of the following data.
 ##
 ##    - A documentation comment block using one of the layouts above.  Its
 ##      exact format will be discussed later.
 ##
 ##    - Normal sources lines, including comments.
 ##
 ##
 class  SourceProcessor( object ):

     def  __init__( self ):
         """Initialize a source processor."""
         self.blocks   = []
         self.filename = None
         self.format   = None
         self.lines    = []

     def  reset( self ):
         """Reset a block processor and clean up all its blocks."""
         self.blocks = []
         self.format = None

     def  parse_file( self, filename ):
         """Parse a C source file and add its blocks to the processor's
         list."""
         self.reset()

         self.filename = filename
         log.debug( "Parsing file %s.", filename )

         fileinput.close()
         self.format = None
         self.lineno = 0
         self.lines  = []

         for line in fileinput.input( filename ):
             # strip trailing newlines, important on Windows machines!
             if line[-1] == '\012':
                 line = line[0:-1]

             if self.format == None:
                 self.process_normal_line( line )
             else:
                 if self.format.end.match( line ):
                     # A normal block end.  Add it to `lines' and create a
                     # new block
                     self.lines.append( line )
                     self.add_block_lines()
                 elif self.format.column.match( line ):
                     # A normal column line.  Add it to `lines'.
                     self.lines.append( line )
                 else:
                     # An unexpected block end.  Create a new block, but
                     # don't process the line.
                     self.add_block_lines()

                     # we need to process the line again
                     self.process_normal_line( line )

         # record the last lines
         self.add_block_lines()

     def  process_normal_line( self, line ):
         """Process a normal line and check whether it is the start of a new
         block."""
         for f in re_source_block_formats:
             if f.start.match( line ):
                 self.add_block_lines()
                 self.format = f
                 self.lineno = fileinput.filelineno()

         self.lines.append( line )

     def  add_block_lines( self ):
         """Add the current accumulated lines and create a new block."""
         if self.lines != []:
             block = SourceBlock( self,
                                  self.filename,
                                  self.lineno,
                                  self.lines )

             self.blocks.append( block )
             self.format = None
             self.lines  = []

     # debugging only, not used in normal operations
     def  dump( self ):
         """Print all blocks in a processor."""
         for b in self.blocks:
             b.dump()

 # eof
	#
	# sources.py
	#
	# Convert source code comments to multi-line blocks (library file).
	#
	# Copyright 2002-2018 by
	# David Turner.
	#
	# This file is part of the FreeType project, and may only be used,
	# modified, and distributed under the terms of the FreeType project
	# license, LICENSE.TXT. By continuing to use, modify, or distribute
	# this file you indicate that you have read the license and
	# understand and accept it fully.

	"""Utility for parsing source files.

	This library file contains definitions of classes needed to decompose C
	source code files into a series of multi-line 'blocks'. There are two
	kinds of blocks.

	* Normal blocks, which contain source code or ordinary comments.

	* Documentation blocks, which have restricted formatting, and whose text
	always start with a documentation markup tag like `<Function>',
	`<Type>', etc.

	The routines to process the content of documentation blocks are contained
	in file `content.py'; the classes and methods found here only deal with
	text parsing and basic documentation block extraction.
	"""

	from __future__ import print_function

	import fileinput
	import logging
	import re

	log = logging.getLogger( __name__ )

	################################################################
	##
	## SOURCE BLOCK FORMAT CLASS
	##
	## A simple class containing compiled regular expressions to detect
	## potential documentation format block comments within C source code.
	##
	## The `column' pattern must contain a group to `unbox' the content of
	## documentation comment blocks.
	##
	## Later on, paragraphs are converted to long lines, which simplifies the
	## regular expressions that act upon the text.
	##
	class SourceBlockFormat( object ):

	def __init__( self, iden, start, column, end ):
	"""Create a block pattern, used to recognize special documentation
	blocks."""
	self.id = iden
	self.start = re.compile( start, re.VERBOSE )
	self.column = re.compile( column, re.VERBOSE )
	self.end = re.compile( end, re.VERBOSE )


	#
	# Format 1 documentation comment blocks.
	#
	# /************************************/ (at least 2 asterisks)
	# /* */
	# /* */
	# /* */
	# /************************************/ (at least 2 asterisks)
	#
	start = r'''
	\s* # any number of whitespace
	/\*{2,}/ # followed by '/' and at least two asterisks then '/'
	\s*$ # probably followed by whitespace
	'''

	column = r'''
	\s* # any number of whitespace
	/\*{1} # followed by '/' and precisely one asterisk
	([^].) # followed by anything (group 1)
	\*{1}/ # followed by one asterisk and a '/'
	\s*$ # probably followed by whitespace
	'''

	re_source_block_format1 = SourceBlockFormat( 1, start, column, start )


	#
	# Format 2 documentation comment blocks.
	#
	# /************************************ (at least 2 asterisks)
	# *
	# * (1 asterisk)
	# *
	# */ (1 or more asterisks)
	#
	start = r'''
	\s* # any number of whitespace
	/\*{2,} # followed by '/' and at least two asterisks
	\s*$ # probably followed by whitespace
	'''

	column = r'''
	\s* # any number of whitespace
	\{1}(?![/]) # followed by precisely one asterisk not followed by `/'
	(.*) # then anything (group1)
	'''

	end = r'''
	\s* # any number of whitespace
	\*+/ # followed by at least one asterisk, then '/'
	'''

	re_source_block_format2 = SourceBlockFormat( 2, start, column, end )


	#
	# The list of supported documentation block formats. We could add new ones
	# quite easily.
	#
	re_source_block_formats = [re_source_block_format1, re_source_block_format2]


	#
	# The following regular expressions correspond to markup tags within the
	# documentation comment blocks. They are equivalent despite their different
	# syntax.
	#
	# A markup tag consists of letters or character `-', to be found in group 1.
	#
	# Notice that a markup tag _must_ begin a new paragraph.
	#
	re_markup_tag1 = re.compile( r'''\s<((?:\w\|-))>''' ) # <xxxx> format
	re_markup_tag2 = re.compile( r'''\s@((?:\w\|-)):''' ) # @xxxx: format

	#
	# The list of supported markup tags. We could add new ones quite easily.
	#
	re_markup_tags = [re_markup_tag1, re_markup_tag2]


	#
	# A regular expression to detect a cross reference, after markup tags have
	# been stripped off.
	#
	# Two syntax forms are supported:
	#
	# @<name>
	# @<name>[<id>]
	#
	# where both `<name>' and `<id>' consist of alphanumeric characters, `_',
	# and `-'. Use `<id>' if there are multiple, valid `<name>' entries.
	#
	# Example: @foo[bar]
	#
	re_crossref = re.compile( r"""
	@
	(?P<name>(?:\w\|-)+
	(?:\[(?:\w\|-)+\])?)
	(?P<rest>.*)
	""", re.VERBOSE )

	#
	# Two regular expressions to detect italic and bold markup, respectively.
	# Group 1 is the markup, group 2 the rest of the line.
	#
	# Note that the markup is limited to words consisting of letters, digits,
	# the characters `_' and `-', or an apostrophe (but not as the first
	# character).
	#
	re_italic = re.compile( r"_((?:\w\|-)(?:\w\|'\|-))_(.)" ) # _italic_
	re_bold = re.compile( r"\((?:\w\|-)(?:\w\|'\|-))\(.)" ) # bold

	#
	# This regular expression code to identify an URL has been taken from
	#
	# https://mail.python.org/pipermail/tutor/2002-September/017228.html
	#
	# (with slight modifications).
	#
	urls = r'(?:https?\|telnet\|gopher\|file\|wais\|ftp)'
	ltrs = r'\w'
	gunk = r'/#~:.?+=&%@!\-'
	punc = r'.:?\-'
	any_sym = "%(ltrs)s%(gunk)s%(punc)s" % { 'ltrs' : ltrs,
	'gunk' : gunk,
	'punc' : punc }
	url = r"""
	(
	\b(?![<]) # start at word boundary ignore if < found
	%(urls)s : # need resource and a colon
	[%(any)s] +? # followed by one or more of any valid
	# character, but be conservative and
	# take only what you need to...
	(?= # [look-ahead non-consumptive assertion]
	[%(punc)s]* # either 0 or more punctuation
	(?: # [non-grouping parentheses]
	[^%(any)s] \| $ # followed by a non-url char
	# or end of the string
	)
	)
	)
	""" % {'urls' : urls,
	'any' : any_sym,
	'punc' : punc }

	re_url = re.compile( url, re.VERBOSE \| re.MULTILINE )

	#
	# A regular expression that stops collection of comments for the current
	# block.
	#
	re_source_sep = re.compile( r'\s/\\s\/' ) # /* */

	#
	# A regular expression to find possible C identifiers while outputting
	# source code verbatim, covering things like `*foo' or `(bar'. Group 1 is
	# the prefix, group 2 the identifier -- since we scan lines from left to
	# right, sequentially splitting the source code into prefix and identifier
	# is fully sufficient for our purposes.
	#
	re_source_crossref = re.compile( r'(\W)(\w)' )

	#
	# A regular expression that matches a list of reserved C source keywords.
	#
	re_source_keywords = re.compile( '''\\b ( typedef \|
	struct \|
	enum \|
	union \|
	const \|
	char \|
	int \|
	short \|
	long \|
	void \|
	signed \|
	unsigned \|
	include \|
	define \|
	undef \|
	if \|
	ifdef \|
	ifndef \|
	else \|
	endif ) \\b''', re.VERBOSE )


	################################################################
	##
	## SOURCE BLOCK CLASS
	##
	## There are two important fields in a `SourceBlock' object.
	##
	## self.lines
	## A list of text lines for the corresponding block.
	##
	## self.content
	## For documentation comment blocks only, this is the block content
	## that has been `unboxed' from its decoration. This is `None' for all
	## other blocks (i.e., sources or ordinary comments with no starting
	## markup tag)
	##
	class SourceBlock( object ):

	def __init__( self, processor, filename, lineno, lines ):
	self.processor = processor
	self.filename = filename
	self.lineno = lineno
	self.lines = lines[:]
	self.format = processor.format
	self.content = []

	if self.format == None:
	return

	# extract comment lines
	lines = []

	for line0 in self.lines:
	m = self.format.column.match( line0 )
	if m:
	lines.append( m.group( 1 ) )

	# now, look for a markup tag
	for l in lines:
	l = l.strip()
	if len( l ) > 0:
	for tag in re_markup_tags:
	if tag.match( l ):
	self.content = lines
	return

	def location( self ):
	return "(" + self.filename + ":" + repr( self.lineno ) + ")"

	# debugging only -- not used in normal operations
	def dump( self ):
	if self.content:
	print( "{{{content start---" )
	for l in self.content:
	print( l )
	print( "---content end}}}" )
	return

	for line in self.lines:
	print( line )


	################################################################
	##
	## SOURCE PROCESSOR CLASS
	##
	## The `SourceProcessor' is in charge of reading a C source file and
	## decomposing it into a series of different `SourceBlock' objects.
	##
	## A SourceBlock object consists of the following data.
	##
	## - A documentation comment block using one of the layouts above. Its
	## exact format will be discussed later.
	##
	## - Normal sources lines, including comments.
	##
	##
	class SourceProcessor( object ):

	def __init__( self ):
	"""Initialize a source processor."""
	self.blocks = []
	self.filename = None
	self.format = None
	self.lines = []

	def reset( self ):
	"""Reset a block processor and clean up all its blocks."""
	self.blocks = []
	self.format = None

	def parse_file( self, filename ):
	"""Parse a C source file and add its blocks to the processor's
	list."""
	self.reset()

	self.filename = filename
	log.debug( "Parsing file %s.", filename )

	fileinput.close()
	self.format = None
	self.lineno = 0
	self.lines = []

	for line in fileinput.input( filename ):
	# strip trailing newlines, important on Windows machines!
	if line[-1] == '\012':
	line = line[0:-1]

	if self.format == None:
	self.process_normal_line( line )
	else:
	if self.format.end.match( line ):
	# A normal block end. Add it to `lines' and create a
	# new block
	self.lines.append( line )
	self.add_block_lines()
	elif self.format.column.match( line ):
	# A normal column line. Add it to `lines'.
	self.lines.append( line )
	else:
	# An unexpected block end. Create a new block, but
	# don't process the line.
	self.add_block_lines()

	# we need to process the line again
	self.process_normal_line( line )

	# record the last lines
	self.add_block_lines()

	def process_normal_line( self, line ):
	"""Process a normal line and check whether it is the start of a new
	block."""
	for f in re_source_block_formats:
	if f.start.match( line ):
	self.add_block_lines()
	self.format = f
	self.lineno = fileinput.filelineno()

	self.lines.append( line )

	def add_block_lines( self ):
	"""Add the current accumulated lines and create a new block."""
	if self.lines != []:
	block = SourceBlock( self,
	self.filename,
	self.lineno,
	self.lines )

	self.blocks.append( block )
	self.format = None
	self.lines = []

	# debugging only, not used in normal operations
	def dump( self ):
	"""Print all blocks in a processor."""
	for b in self.blocks:
	b.dump()

	# eof