[autofit] Speed up computation of `af_reverse_character_map_new`. (4/4)
With this commit, the start-up time for creating the reverse character map
gets reduced from more than 300% to about 25% (as tested with `arial.ttf`
version 7.00).
* src/autofit/afadjust.c [FT_CONFIG_OPTION_USE_HARFBUZZ]: Include
`afgsub.h`.
(add_substitute): New auxiliary function to recursively add elements to
the reverse cmap.
(af_reverse_character_map_new): New code that replaces the old code
removed in the previous commit.
diff --git a/src/autofit/afadjust.c b/src/autofit/afadjust.c
index 3a30b75..fdab1ae 100644
--- a/src/autofit/afadjust.c
+++ b/src/autofit/afadjust.c
@@ -18,6 +18,9 @@
*/
#include "afadjust.h"
+#ifdef FT_CONFIG_OPTION_USE_HARFBUZZ
+# include "afgsub.h"
+#endif
#include <freetype/freetype.h>
#include <freetype/internal/ftobjs.h>
@@ -1169,6 +1172,108 @@
}
+#ifdef FT_CONFIG_OPTION_USE_HARFBUZZ
+
+ static FT_Error
+ add_substitute( FT_UInt glyph_idx,
+ size_t value,
+ FT_UInt32 codepoint,
+ FT_Hash reverse_map,
+ FT_Hash subst_map,
+ FT_Memory memory )
+ {
+ FT_Error error;
+
+ FT_UInt first_substitute = value & 0xFFFF;
+
+ FT_UInt used = reverse_map->used;
+
+
+ /*
+ OpenType features like 'unic' map lowercase letter glyphs to uppercase
+ forms (and vice versa), which could lead to the use of wrong entries
+ in the adjustment database. For this reason we don't overwrite,
+ prioritizing cmap entries.
+
+ XXX Note, however, that this cannot cover all cases since there might
+ be contradictory entries for glyphs not in the cmap. A possible
+ solution might be to specially mark pairs of related lowercase and
+ uppercase characters in the adjustment database that have diacritics
+ on different vertical sides (for example, U+0122 'Ģ' and U+0123 'ģ').
+ The auto-hinter could then perform a topological analysis to do the
+ right thing.
+ */
+ error = ft_hash_num_insert_no_overwrite( first_substitute, codepoint,
+ reverse_map, memory );
+ if ( error )
+ return error;
+
+ if ( reverse_map->used > used )
+ {
+ size_t* subst = ft_hash_num_lookup( first_substitute, subst_map );
+
+
+ if ( subst )
+ {
+ error = add_substitute( first_substitute, *subst, codepoint,
+ reverse_map, subst_map, memory );
+ if ( error )
+ return error;
+ }
+ }
+
+ /* The remaining substitutes. */
+ if ( value & 0xFFFF0000U )
+ {
+ FT_UInt num_substitutes = value >> 16;
+
+ FT_UInt i;
+
+
+ for ( i = 1; i <= num_substitutes; i++ )
+ {
+ size_t* substitute = ft_hash_num_lookup( glyph_idx + ( i << 16 ),
+ subst_map );
+
+
+ used = reverse_map->used;
+
+ error = ft_hash_num_insert_no_overwrite( *substitute,
+ codepoint,
+ reverse_map,
+ memory );
+ if ( error )
+ return error;
+
+ if ( reverse_map->used > used )
+ {
+ size_t* subst = ft_hash_num_lookup( *substitute, subst_map );
+
+
+ if ( subst )
+ {
+ error = add_substitute( *substitute, *subst, codepoint,
+ reverse_map, subst_map, memory );
+ if ( error )
+ return error;
+ }
+ }
+ }
+ }
+
+ return FT_Err_Ok;
+ }
+
+#endif /* FT_CONFIG_OPTION_USE_HARFBUZZ */
+
+
+ /* Construct a 'reverse cmap' (i.e., a mapping from glyph indices to */
+ /* character codes) for all glyphs that an input code point could turn */
+ /* into. */
+ /* */
+ /* If HarfBuzz support is not available, this is the direct inversion */
+ /* of the cmap table, otherwise the mapping gets extended with data */
+ /* from the 'GSUB' table. */
FT_LOCAL_DEF( FT_Error )
af_reverse_character_map_new( FT_Hash *map,
AF_StyleMetrics metrics )
@@ -1184,19 +1289,6 @@
FT_UInt32 codepoint;
FT_Offset i;
-#ifdef FT_CONFIG_OPTION_USE_HARFBUZZ
- /* The next four variables are initialized to avoid compiler warnings. */
- hb_font_t *hb_font = NULL;
- hb_face_t *hb_face = NULL;
-
- hb_set_t *gsub_lookups = NULL;
-
- hb_script_t script;
-
- unsigned int script_count = 1;
- hb_tag_t script_tags[2] = { HB_TAG_NONE, HB_TAG_NONE };
-#endif
-
FT_TRACE4(( "af_reverse_character_map_new:"
" building reverse character map (style `%s')\n",
@@ -1219,9 +1311,69 @@
if ( error )
goto Exit;
+ /* Initialize reverse cmap with data directly from the cmap table. */
+ for ( i = 0; i < AF_ADJUSTMENT_DATABASE_LENGTH; i++ )
+ {
+ FT_Int cmap_glyph;
+
+
+ /*
+ We cannot restrict `codepoint` to character ranges; we have no
+ control what data the script-specific portion of the GSUB table
+ actually holds.
+
+ An example is `arial.ttf` version 7.00; in this font, there are
+ lookups for Cyrillic (lookup 43), Greek (lookup 44), and Latin
+ (lookup 45) that map capital letter glyphs to small capital glyphs.
+ It is tempting to expect that script-specific versions of the 'c2sc'
+ feature only use script-specific lookups. However, this is not the
+ case in this font: the feature uses all three lookups regardless of
+ the script.
+
+ The auto-hinter, while assigning glyphs to styles, uses the first
+ coverage result it encounters for a particular glyph. For example,
+ if the coverage for Cyrillic is tested before Latin (as is currently
+ the case), glyphs without a cmap entry that are covered in 'c2sc'
+ are treated as Cyrillic.
+
+ If we now look at glyph 3498, which is a small-caps version of the
+ Latin character 'A grave' (U+00C0, glyph 172), we can see that it is
+ registered as belonging to a Cyrillic style due to the algorithm
+ just described. As a result, checking only for characters from the
+ Latin range would miss this glyph; we thus have to test all
+ character codes in the database.
+ */
+ codepoint = adjustment_database[i].codepoint;
+
+ cmap_glyph = FT_Get_Char_Index( face, codepoint );
+ if ( cmap_glyph == 0 )
+ continue;
+
+ error = ft_hash_num_insert( cmap_glyph, codepoint, *map, memory );
+ if ( error )
+ goto Exit;
+ }
+
#ifdef FT_CONFIG_OPTION_USE_HARFBUZZ
{
+ hb_font_t *hb_font;
+ hb_face_t *hb_face;
+
+ hb_set_t *gsub_lookups;
+ hb_script_t script;
+
+ unsigned int script_count = 1;
+ hb_tag_t script_tags[2] = { HB_TAG_NONE, HB_TAG_NONE };
+
+ FT_Hash subst_map = NULL;
+
+ hb_codepoint_t idx;
+ FT_UInt hash_idx;
+ FT_Int glyph_idx;
+ size_t value;
+
+
/* No need to check whether HarfBuzz has allocation issues; */
/* it continues to work in such cases and simply returns */
/* 'empty' objects that do nothing. */
@@ -1245,8 +1397,7 @@
#ifdef FT_DEBUG_LEVEL_TRACE
{
- hb_codepoint_t idx;
- FT_Bool have_idx = FALSE;
+ FT_Bool have_idx = FALSE;
FT_TRACE4(( " GSUB lookups to check:\n" ));
@@ -1267,17 +1418,91 @@
}
#endif
+ if ( FT_QNEW( subst_map ) )
+ goto Exit_HarfBuzz;
+
+ error = ft_hash_num_init( subst_map, memory );
+ if ( error )
+ goto Exit_HarfBuzz;
+
+ idx = HB_SET_VALUE_INVALID;
+ while ( hb( set_next )( gsub_lookups, &idx ) )
+ {
+ FT_UInt32 offset = globals->gsub_lookups_single_alternate[idx];
+
+
+ /* Put all substitutions into a single hash table. Note that */
+ /* the hash values usually contain more than a single character */
+ /* code; this can happen if different 'SingleSubst' subtables */
+ /* map a given glyph index to different substitutions, or if */
+ /* 'AlternateSubst' subtable entries are present. */
+ if ( offset )
+ af_map_lookup( globals, subst_map, offset );
+ }
+
+ /*
+ Now iterate over the collected substitution data in `subst_map`
+ (using recursion to resolve one-to-many mappings) and insert the
+ data into the reverse cmap.
+
+ As an example, suppose we have the following cmap and substitution
+ data:
+
+ cmap: X -> a
+ Y -> b
+ Z -> c
+
+ substitutions: a -> b
+ b -> c, d
+ d -> e
+
+ The reverse map now becomes as follows.
+
+ a -> X
+ b -> Y
+ c -> Z (via cmap, ignoring mapping from 'b')
+ d -> Y (via 'b')
+ e -> Y (via 'b' and 'd')
+ */
+
+ hash_idx = 0;
+ while ( ft_hash_num_iterator( &hash_idx,
+ &glyph_idx,
+ &value,
+ subst_map ) )
+ {
+ size_t* val;
+
+
+ /* Ignore keys that do not point to the first substitute. */
+ if ( glyph_idx & 0xFFFF0000 )
+ continue;
+
+ /* Ignore glyph indices that are not related to accents. */
+ val = ft_hash_num_lookup( glyph_idx, *map );
+ if ( !val )
+ continue;
+
+ codepoint = *val;
+
+ error = add_substitute( glyph_idx, value, codepoint,
+ *map, subst_map, memory );
+ if ( error )
+ break;
+ }
+
+ Exit_HarfBuzz:
+ hb( set_destroy )( gsub_lookups );
+
+ ft_hash_num_free( subst_map, memory );
+ FT_FREE( subst_map );
+
+ if ( error )
+ goto Exit;
}
#endif /* FT_CONFIG_OPTION_USE_HARFBUZZ */
-#ifdef FT_CONFIG_OPTION_USE_HARFBUZZ
- if ( hb( version_atleast )( 7, 2, 0 ) )
- {
- hb( set_destroy )( gsub_lookups );
- }
-#endif
-
FT_TRACE4(( " reverse character map built successfully"
" with %d entries\n", ( *map )->used ));