# ----------------------------------------------------------------------------
# pyglet
# Copyright (c) 2006-2008 Alex Holkner
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of pyglet nor the names of its
# contributors may be used to endorse or promote products
# derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# ----------------------------------------------------------------------------
# $Id: ttf.py 1579 2008-01-15 14:47:19Z Alex.Holkner $
"""
Implementation of the Truetype file format.
Typical applications will not need to use this module directly; look at
`pyglyph.font` instead.
References:
* http://developer.apple.com/fonts/TTRefMan/RM06
* http://www.microsoft.com/typography/otspec
"""
__docformat__ = 'restructuredtext'
__version__ = '$Id: ttf.py 1579 2008-01-15 14:47:19Z Alex.Holkner $'
import codecs
import os
import mmap
import struct
class TruetypeInfo:
"""Information about a single Truetype face.
The class memory-maps the font file to read the tables, so
it is vital that you call the `close` method to avoid large memory
leaks. Once closed, you cannot call any of the ``get_*`` methods.
Not all tables have been implemented yet (or likely ever will).
Currently only the name and metric tables are read; in particular
there is no glyph or hinting information.
"""
_name_id_lookup = {
'copyright': 0,
'family': 1,
'subfamily': 2,
'identifier': 3,
'name': 4,
'version': 5,
'postscript': 6,
'trademark': 7,
'manufacturer': 8,
'designer': 9,
'description': 10,
'vendor-url': 11,
'designer-url': 12,
'license': 13,
'license-url': 14,
'preferred-family': 16,
'preferred-subfamily': 17,
'compatible-name': 18,
'sample': 19,
}
_platform_id_lookup = {
'unicode': 0,
'macintosh': 1,
'iso': 2,
'microsoft': 3,
'custom': 4
}
_microsoft_encoding_lookup = {
1: 'utf_16_be',
2: 'shift_jis',
4: 'big5',
6: 'johab',
10: 'utf_16_be'
}
_macintosh_encoding_lookup = {
0: 'mac_roman'
}
def __init__(self, filename):
"""Read the given truetype file.
:Parameters:
`filename`
The name of any Windows, OS2 or Macintosh Truetype file.
The object must be closed (see `close`) after use.
An exception will be raised if the file does not exist or cannot
be read.
"""
if not filename: filename = ''
len = os.stat(filename).st_size
self._fileno = os.open(filename, os.O_RDONLY)
if hasattr(mmap, 'MAP_SHARED'):
self._data = mmap.mmap(self._fileno, len, mmap.MAP_SHARED,
mmap.PROT_READ)
else:
self._data = mmap.mmap(self._fileno, len, None, mmap.ACCESS_READ)
offsets = _read_offset_table(self._data, 0)
self._tables = {}
for table in _read_table_directory_entry.array(self._data,
offsets.size, offsets.num_tables):
self._tables[table.tag] = table
self._names = None
self._horizontal_metrics = None
self._character_advances = None
self._character_kernings = None
self._glyph_kernings = None
self._character_map = None
self._glyph_map = None
self._font_selection_flags = None
self.header = \
_read_head_table(self._data, self._tables['head'].offset)
self.horizontal_header = \
_read_horizontal_header(self._data, self._tables['hhea'].offset)
def get_font_selection_flags(self):
"""Return the font selection flags, as defined in OS/2 table"""
if not self._font_selection_flags:
OS2_table = \
_read_OS2_table(self._data, self._tables['OS/2'].offset)
self._font_selection_flags = OS2_table.fs_selection
return self._font_selection_flags
def is_bold(self):
"""Returns True iff the font describes itself as bold."""
return bool(self.get_font_selection_flags() & 0x20)
def is_italic(self):
"""Returns True iff the font describes itself as italic."""
return bool(self.get_font_selection_flags() & 0x1)
def get_names(self):
"""Returns a dictionary of names defined in the file.
The key of each item is a tuple of ``platform_id``, ``name_id``,
where each ID is the number as described in the Truetype format.
The value of each item is a tuple of
``encoding_id``, ``language_id``, ``value``, where ``value`` is
an encoded string.
"""
if self._names:
return self._names
naming_table = \
_read_naming_table(self._data, self._tables['name'].offset)
name_records = \
_read_name_record.array(self._data,
self._tables['name'].offset + naming_table.size,
naming_table.count)
storage = naming_table.string_offset + self._tables['name'].offset
self._names = {}
for record in name_records:
value = self._data[record.offset + storage:\
record.offset + storage + record.length]
key = record.platform_id, record.name_id
value = (record.encoding_id, record.language_id, value)
if not key in self._names:
self._names[key] = []
self._names[key].append(value)
return self._names
def get_name(self, name, platform=None, languages=None):
"""Returns the value of the given name in this font.
:Parameters:
`name`
Either an integer, representing the name_id desired (see
font format); or a string describing it, see below for
valid names.
`platform`
Platform for the requested name. Can be the integer ID,
or a string describing it. By default, the Microsoft
platform is searched first, then Macintosh.
`languages`
A list of language IDs to search. The first language
which defines the requested name will be used. By default,
all English dialects are searched.
If the name is not found, ``None`` is returned. If the name
is found, the value will be decoded and returned as a unicode
string. Currently only some common encodings are supported.
Valid names to request are (supply as a string)::
'copyright'
'family'
'subfamily'
'identifier'
'name'
'version'
'postscript'
'trademark'
'manufacturer'
'designer'
'description'
'vendor-url'
'designer-url'
'license'
'license-url'
'preferred-family'
'preferred-subfamily'
'compatible-name'
'sample'
Valid platforms to request are (supply as a string)::
'unicode'
'macintosh'
'iso'
'microsoft'
'custom'
"""
names = self.get_names()
if type(name) == str:
name = self._name_id_lookup[name]
if not platform:
for platform in ('microsoft','macintosh'):
value = self.get_name(name, platform, languages)
if value:
return value
if type(platform) == str:
platform = self._platform_id_lookup[platform]
if not (platform, name) in names:
return None
if platform == 3: # setup for microsoft
encodings = self._microsoft_encoding_lookup
if not languages:
# Default to english languages for microsoft
languages = (0x409,0x809,0xc09,0x1009,0x1409,0x1809)
elif platform == 1: # setup for macintosh
encodings = self.__macintosh_encoding_lookup
if not languages:
# Default to english for macintosh
languages = (0,)
for record in names[(platform, name)]:
if record[1] in languages and record[0] in encodings:
decoder = codecs.getdecoder(encodings[record[0]])
return decoder(record[2])[0]
return None
def get_horizontal_metrics(self):
"""Return all horizontal metric entries in table format."""
if not self._horizontal_metrics:
ar = _read_long_hor_metric.array(self._data,
self._tables['hmtx'].offset,
self.horizontal_header.number_of_h_metrics)
self._horizontal_metrics = ar
return self._horizontal_metrics
def get_character_advances(self):
"""Return a dictionary of character->advance.
They key of the dictionary is a unit-length unicode string,
and the value is a float giving the horizontal advance in
em.
"""
if self._character_advances:
return self._character_advances
ga = self.get_glyph_advances()
gmap = self.get_glyph_map()
self._character_advances = {}
for i in range(len(ga)):
if i in gmap and not gmap[i] in self._character_advances:
self._character_advances[gmap[i]] = ga[i]
return self._character_advances
def get_glyph_advances(self):
"""Return a dictionary of glyph->advance.
They key of the dictionary is the glyph index and the value is a float
giving the horizontal advance in em.
"""
hm = self.get_horizontal_metrics()
return [float(m.advance_width) / self.header.units_per_em for m in hm]
def get_character_kernings(self):
"""Return a dictionary of (left,right)->kerning
The key of the dictionary is a tuple of ``(left, right)``
where each element is a unit-length unicode string. The
value of the dictionary is the horizontal pairwise kerning
in em.
"""
if not self._character_kernings:
gmap = self.get_glyph_map()
kerns = self.get_glyph_kernings()
self._character_kernings = {}
for pair, value in kerns.items():
lglyph, rglyph = pair
lchar = lglyph in gmap and gmap[lglyph] or None
rchar = rglyph in gmap and gmap[rglyph] or None
if lchar and rchar:
self._character_kernings[(lchar, rchar)] = value
return self._character_kernings
def get_glyph_kernings(self):
"""Return a dictionary of (left,right)->kerning
The key of the dictionary is a tuple of ``(left, right)``
where each element is a glyph index. The value of the dictionary is
the horizontal pairwise kerning in em.
"""
if self._glyph_kernings:
return self._glyph_kernings
header = \
_read_kern_header_table(self._data, self._tables['kern'].offset)
offset = self._tables['kern'].offset + header.size
kernings = {}
for i in range(header.n_tables):
header = _read_kern_subtable_header(self._data, offset)
if header.coverage & header.horizontal_mask \
and not header.coverage & header.minimum_mask \
and not header.coverage & header.perpendicular_mask:
if header.coverage & header.format_mask == 0:
self._add_kernings_format0(kernings, offset + header.size)
offset += header.length
self._glyph_kernings = kernings
return kernings
def _add_kernings_format0(self, kernings, offset):
header = _read_kern_subtable_format0(self._data, offset)
kerning_pairs = _read_kern_subtable_format0Pair.array(self._data,
offset + header.size, header.n_pairs)
for pair in kerning_pairs:
if (pair.left, pair.right) in kernings:
kernings[(pair.left, pair.right)] += pair.value \
/ float(self.header.units_per_em)
else:
kernings[(pair.left, pair.right)] = pair.value \
/ float(self.header.units_per_em)
def get_glyph_map(self):
"""Calculate and return a reverse character map.
Returns a dictionary where the key is a glyph index and the
value is a unit-length unicode string.
"""
if self._glyph_map:
return self._glyph_map
cmap = self.get_character_map()
self._glyph_map = {}
for ch, glyph in cmap.items():
if not glyph in self._glyph_map:
self._glyph_map[glyph] = ch
return self._glyph_map
def get_character_map(self):
"""Return the character map.
Returns a dictionary where the key is a unit-length unicode
string and the value is a glyph index. Currently only
format 4 character maps are read.
"""
if self._character_map:
return self._character_map
cmap = _read_cmap_header(self._data, self._tables['cmap'].offset)
records = _read_cmap_encoding_record.array(self._data,
self._tables['cmap'].offset + cmap.size, cmap.num_tables)
self._character_map = {}
for record in records:
if record.platform_id == 3 and record.encoding_id == 1:
# Look at Windows Unicode charmaps only
offset = self._tables['cmap'].offset + record.offset
format_header = _read_cmap_format_header(self._data, offset)
if format_header.format == 4:
self._character_map = \
self._get_character_map_format4(offset)
break
return self._character_map
def _get_character_map_format4(self, offset):
# This is absolutely, without question, the *worst* file
# format ever. Whoever the fuckwit is that thought this up is
# a fuckwit.
header = _read_cmap_format4Header(self._data, offset)
seg_count = header.seg_count_x2 / 2
array_size = struct.calcsize('>%dH' % seg_count)
end_count = self._read_array('>%dH' % seg_count,
offset + header.size)
start_count = self._read_array('>%dH' % seg_count,
offset + header.size + array_size + 2)
id_delta = self._read_array('>%dh' % seg_count,
offset + header.size + array_size + 2 + array_size)
id_range_offset_address = \
offset + header.size + array_size + 2 + array_size + array_size
id_range_offset = self._read_array('>%dH' % seg_count,
id_range_offset_address)
character_map = {}
for i in range(0, seg_count):
if id_range_offset[i] != 0:
if id_range_offset[i] == 65535:
continue # Hack around a dodgy font (babelfish.ttf)
for c in range(start_count[i], end_count[i] + 1):
addr = id_range_offset[i] + 2*(c - start_count[i]) + \
id_range_offset_address + 2*i
g = struct.unpack('>H', self._data[addr:addr+2])[0]
if g != 0:
character_map[unichr(c)] = (g + id_delta[i]) % 65536
else:
for c in range(start_count[i], end_count[i] + 1):
g = (c + id_delta[i]) % 65536
if g != 0:
character_map[unichr(c)] = g
return character_map
def _read_array(self, format, offset):
size = struct.calcsize(format)
return struct.unpack(format, self._data[offset:offset+size])
def close(self):
"""Close the font file.
This is a good idea, since the entire file is memory mapped in
until this method is called. After closing cannot rely on the
``get_*`` methods.
"""
self._data.close()
os.close(self._fileno)
def _read_table(*entries):
""" Generic table constructor used for table formats listed at
end of file."""
fmt = '>'
names = []
for entry in entries:
name, type = entry.split(':')
names.append(name)
fmt += type
class _table_class:
size = struct.calcsize(fmt)
def __init__(self, data, offset):
items = struct.unpack(fmt, data[offset:offset+self.size])
self.pairs = zip(names, items)
for name, value in self.pairs:
setattr(self, name, value)
def __repr__(self):
s = '{' + ', '.join(['%s = %s' % (name, value) \
for name, value in self.pairs]) + '}'
return s
@staticmethod
def array(data, offset, count):
tables = []
for i in range(count):
tables.append(_table_class(data, offset))
offset += _table_class.size
return tables
return _table_class
# Table formats (see references)
_read_offset_table = _read_table('scalertype:I',
'num_tables:H',
'search_range:H',
'entry_selector:H',
'range_shift:H')
_read_table_directory_entry = _read_table('tag:4s',
'check_sum:I',
'offset:I',
'length:I')
_read_head_table = _read_table('version:i',
'font_revision:i',
'check_sum_adjustment:L',
'magic_number:L',
'flags:H',
'units_per_em:H',
'created:Q',
'modified:Q',
'x_min:h',
'y_min:h',
'x_max:h',
'y_max:h',
'mac_style:H',
'lowest_rec_p_pEM:H',
'font_direction_hint:h',
'index_to_loc_format:h',
'glyph_data_format:h')
_read_OS2_table = _read_table('version:H',
'x_avg_char_width:h',
'us_weight_class:H',
'us_width_class:H',
'fs_type:H',
'y_subscript_x_size:h',
'y_subscript_y_size:h',
'y_subscript_x_offset:h',
'y_subscript_y_offset:h',
'y_superscript_x_size:h',
'y_superscript_y_size:h',
'y_superscript_x_offset:h',
'y_superscript_y_offset:h',
'y_strikeout_size:h',
'y_strikeout_position:h',
's_family_class:h',
'panose1:B',
'panose2:B',
'panose3:B',
'panose4:B',
'panose5:B',
'panose6:B',
'panose7:B',
'panose8:B',
'panose9:B',
'panose10:B',
'ul_unicode_range1:L',
'ul_unicode_range2:L',
'ul_unicode_range3:L',
'ul_unicode_range4:L',
'ach_vend_id:I',
'fs_selection:H',
'us_first_char_index:H',
'us_last_char_index:H',
's_typo_ascender:h',
's_typo_descender:h',
's_typo_line_gap:h',
'us_win_ascent:H',
'us_win_descent:H',
'ul_code_page_range1:L',
'ul_code_page_range2:L',
'sx_height:h',
's_cap_height:h',
'us_default_char:H',
'us_break_char:H',
'us_max_context:H')
_read_kern_header_table = _read_table('version_num:H',
'n_tables:H')
_read_kern_subtable_header = _read_table('version:H',
'length:H',
'coverage:H')
_read_kern_subtable_header.horizontal_mask = 0x1
_read_kern_subtable_header.minimum_mask = 0x2
_read_kern_subtable_header.perpendicular_mask = 0x4
_read_kern_subtable_header.override_mask = 0x5
_read_kern_subtable_header.format_mask = 0xf0
_read_kern_subtable_format0 = _read_table('n_pairs:H',
'search_range:H',
'entry_selector:H',
'range_shift:H')
_read_kern_subtable_format0Pair = _read_table('left:H',
'right:H',
'value:h')
_read_cmap_header = _read_table('version:H',
'num_tables:H')
_read_cmap_encoding_record = _read_table('platform_id:H',
'encoding_id:H',
'offset:L')
_read_cmap_format_header = _read_table('format:H',
'length:H')
_read_cmap_format4Header = _read_table('format:H',
'length:H',
'language:H',
'seg_count_x2:H',
'search_range:H',
'entry_selector:H',
'range_shift:H')
_read_horizontal_header = _read_table('version:i',
'Advance:h',
'Descender:h',
'LineGap:h',
'advance_width_max:H',
'min_left_side_bearing:h',
'min_right_side_bearing:h',
'x_max_extent:h',
'caret_slope_rise:h',
'caret_slope_run:h',
'caret_offset:h',
'reserved1:h',
'reserved2:h',
'reserved3:h',
'reserved4:h',
'metric_data_format:h',
'number_of_h_metrics:H')
_read_long_hor_metric = _read_table('advance_width:H',
'lsb:h')
_read_naming_table = _read_table('format:H',
'count:H',
'string_offset:H')
_read_name_record = _read_table('platform_id:H',
'encoding_id:H',
'language_id:H',
'name_id:H',
'length:H',
'offset:H')
|