# -*- coding: utf-8 -*-
"""
pygments.lexers.math
~~~~~~~~~~~~~~~~~~~~
Lexers for math languages.
:copyright: Copyright 2006-2010 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pygments.lexer import Lexer,RegexLexer,bygroups,include,do_insertions
from pygments.token import Comment,String,Punctuation,Keyword,Name,\
Operator, Number, Text, Generic
from pygments.lexers.agile import PythonLexer
__all__ = ['MuPADLexer', 'MatlabLexer', 'MatlabSessionLexer', 'NumPyLexer',
'RConsoleLexer', 'SLexer']
class MuPADLexer(RegexLexer):
"""
A `MuPAD <http://www.mupad.com>`_ lexer.
Contributed by Christopher Creutzig <christopher@creutzig.de>.
*New in Pygments 0.8.*
"""
name = 'MuPAD'
aliases = ['mupad']
filenames = ['*.mu']
tokens = {
'root' : [
(r'//.*?$', Comment.Single),
(r'/\*', Comment.Multiline, 'comment'),
(r'"(?:[^"\\]|\\.)*"', String),
(r'\(|\)|\[|\]|\{|\}', Punctuation),
(r'''(?x)\b(?:
next|break|end|
axiom|end_axiom|category|end_category|domain|end_domain|inherits|
if|%if|then|elif|else|end_if|
case|of|do|otherwise|end_case|
while|end_while|
repeat|until|end_repeat|
for|from todowntostepend_for import
proc|local|option|save|begin|end_proc|
delete|frame
)\b''', Keyword),
(r'''(?x)\b(?:
DOM_ARRAY|DOM_BOOL|DOM_COMPLEX|DOM_DOMAIN|DOM_EXEC|DOM_EXPR|
DOM_FAIL|DOM_FLOAT|DOM_FRAME|DOM_FUNC_ENV|DOM_HFARRAY|DOM_IDENT|
DOM_INT|DOM_INTERVAL|DOM_LIST|DOM_NIL|DOM_NULL|DOM_POLY|DOM_PROC|
DOM_PROC_ENV|DOM_RAT|DOM_SET|DOM_STRING|DOM_TABLE|DOM_VAR
)\b''', Name.Class),
(r'''(?x)\b(?:
PI|EULER|E|CATALAN|
NIL|FAIL|undefined|infinity|
TRUE|FALSE|UNKNOWN
)\b''',
Name.Constant),
(r'\b(?:dom|procname)\b', Name.Builtin.Pseudo),
(r'\.|,|:|;|=|\+|-|\*|/|\^|@|>|<|\$|\||!|\'|%|~=', Operator),
(r'''(?x)\b(?:
and|or|not|xor|
assuming|
div|mod|
union|minus|intersect|in|subset
)\b''',
Operator.Word),
(r'\b(?:I|RDN_INF|RD_NINF|RD_NAN)\b', Number),
#(r'\b(?:adt|linalg|newDomain|hold)\b', Name.Builtin),
(r'''(?x)
((?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
(?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*)\s*([(])''',
bygroups(Name.Function, Punctuation)),
(r'''(?x)
(?:[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)
(?:::[a-zA-Z_#][a-zA-Z_#0-9]*|`[^`]*`)*''', Name.Variable),
(r'[0-9]+(?:\.[0-9]*)?(?:e[0-9]+)?', Number),
(r'\.[0-9]+(?:e[0-9]+)?', Number),
(r'.', Text)
],
'comment' : [
(r'[^*/]', Comment.Multiline),
(r'/\*', Comment.Multiline, '#push'),
(r'\*/', Comment.Multiline, '#pop'),
(r'[*/]', Comment.Multiline)
]
}
class MatlabLexer(RegexLexer):
"""
For Matlab (or GNU Octave) source code.
Contributed by Ken Schutte <kschutte@csail.mit.edu>.
*New in Pygments 0.10.*
"""
name = 'Matlab'
aliases = ['matlab', 'octave']
filenames = ['*.m']
mimetypes = ['text/matlab']
#
# These lists are generated automatically.
# Run the following in bash shell:
#
# for f in elfun specfun elmat; do
# echo -n "$f = "
# matlab -nojvm -r "help $f;exit;" | perl -ne \
# 'push(@c,$1) if /^ (\w+)\s+-/; END {print q{["}.join(q{","},@c).qq{"]\n};}'
# done
#
# elfun: Elementary math functions
# specfun: Special Math functions
# elmat: Elementary matrices and matrix manipulation
#
# taken from Matlab version 7.4.0.336 (R2007a)
#
elfun = ["sin","sind","sinh","asin","asind","asinh","cos","cosd","cosh",
"acos","acosd","acosh","tan","tand","tanh","atan","atand","atan2",
"atanh","sec","secd","sech","asec","asecd","asech","csc","cscd",
"csch","acsc","acscd","acsch","cot","cotd","coth","acot","acotd",
"acoth","hypot","exp","expm1","log","log1p","log10","log2","pow2",
"realpow","reallog","realsqrt","sqrt","nthroot","nextpow2","abs",
"angle","complex","conj","imag","real","unwrap","isreal","cplxpair",
"fix","floor","ceil","round","mod","rem","sign"]
specfun = ["airy","besselj","bessely","besselh","besseli","besselk","beta",
"betainc","betaln","ellipj","ellipke","erf","erfc","erfcx",
"erfinv","expint","gamma","gammainc","gammaln","psi","legendre",
"cross","dot","factor","isprime","primes","gcd","lcm","rat",
"rats","perms","nchoosek","factorial","cart2sph","cart2pol",
"pol2cart","sph2cart","hsv2rgb","rgb2hsv"]
elmat = ["zeros","ones","eye","repmat","rand","randn","linspace","logspace",
"freqspace","meshgrid","accumarray","size","length","ndims","numel",
"disp","isempty","isequal","isequalwithequalnans","cat","reshape",
"diag","blkdiag","tril","triu","fliplr","flipud","flipdim","rot90",
"find","end","sub2ind","ind2sub","bsxfun","ndgrid","permute",
"ipermute","shiftdim","circshift","squeeze","isscalar","isvector",
"ans","eps","realmax","realmin","pi","i","inf","nan","isnan",
"isinf","isfinite","j","why","compan","gallery","hadamard","hankel",
"hilb","invhilb","magic","pascal","rosser","toeplitz","vander",
"wilkinson"]
tokens = {
'root': [
# line starting with '!' is sent as a system command. not sure what
# label to use...
(r'^!.*', String.Other),
(r'%.*$', Comment),
(r'^\s*function', Keyword, 'deffunc'),
# from 'iskeyword' on version 7.4.0.336 (R2007a):
(r'(break|case|catch|classdef|continue|else|elseif|end|for|function|'
r'global|if|otherwise|parfor|persistent|return|switch|try|while)\b',
Keyword),
("(" + "|".join(elfun+specfun+elmat) + r')\b', Name.Builtin),
# operators:
(r'-|==|~=|<|>|<=|>=|&&|&|~|\|\|?', Operator),
# operators requiring escape for re:
(r'\.\*|\*|\+|\.\^|\.\\|\.\/|\/|\\', Operator),
# punctuation:
(r'\[|\]|\(|\)|\{|\}|:|@|\.|,', Punctuation),
(r'=|:|;', Punctuation),
# quote can be transpose, instead of string:
# (not great, but handles common cases...)
(r'(?<=[\w\)\]])\'', Operator),
(r'(?<![\w\)\]])\'', String, 'string'),
('[a-zA-Z_][a-zA-Z0-9_]*', Name),
(r'.', Text),
],
'string': [
(r'[^\']*\'', String, '#pop')
],
'deffunc': [
(r'(\s*)(?:(.+)(\s*)(=)(\s*))?(.+)(\()(.*)(\))(\s*)',
bygroups(Text.Whitespace, Text, Text.Whitespace, Punctuation,
Text.Whitespace, Name.Function, Punctuation, Text,
Punctuation, Text.Whitespace), '#pop'),
],
}
def analyse_text(text):
if re.match('^\s*%', text, re.M): # comment
return 0.9
elif re.match('^!\w+', text, re.M): # system cmd
return 0.9
return 0.1
line_re = re.compile('.*?\n')
class MatlabSessionLexer(Lexer):
"""
For Matlab (or GNU Octave) sessions. Modeled after PythonConsoleLexer.
Contributed by Ken Schutte <kschutte@csail.mit.edu>.
*New in Pygments 0.10.*
"""
name = 'Matlab session'
aliases = ['matlabsession']
def get_tokens_unprocessed(self, text):
mlexer = MatlabLexer(**self.options)
curcode = ''
insertions = []
for match in line_re.finditer(text):
line = match.group()
if line.startswith('>>'):
insertions.append((len(curcode),
[(0, Generic.Prompt, line[:3])]))
curcode += line[3:]
elif line.startswith('???'):
idx = len(curcode)
# without is showing error on same line as before...?
line = "\n" + line
token = (0, Generic.Traceback, line)
insertions.append( (idx, [token,]) )
else:
if curcode:
for item in do_insertions(
insertions, mlexer.get_tokens_unprocessed(curcode)):
yield item
curcode = ''
insertions = []
yield match.start(), Generic.Output, line
if curcode: # or item:
for item in do_insertions(
insertions, mlexer.get_tokens_unprocessed(curcode)):
yield item
class NumPyLexer(PythonLexer):
'''
A Python lexer recognizing Numerical Python builtins.
*New in Pygments 0.10.*
'''
name = 'NumPy'
aliases = ['numpy']
# override the mimetypes to not inherit them from python
mimetypes = []
filenames = []
EXTRA_KEYWORDS = set([
'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
'set_numeric_ops', 'set_printoptions', 'set_string_function',
'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
])
def get_tokens_unprocessed(self, text):
for index, token, value in \
PythonLexer.get_tokens_unprocessed(self, text):
if token is Name and value in self.EXTRA_KEYWORDS:
yield index, Keyword.Pseudo, value
else:
yield index, token, value
class RConsoleLexer(Lexer):
"""
For R console transcripts or R CMD BATCH output files.
"""
name = 'RConsole'
aliases = ['rconsole', 'rout']
filenames = ['*.Rout']
def get_tokens_unprocessed(self, text):
slexer = SLexer(**self.options)
current_code_block = ''
insertions = []
for match in line_re.finditer(text):
line = match.group()
if line.startswith('>') or line.startswith('+'):
# Colorize the prompt as such,
# then put rest of line into current_code_block
insertions.append((len(current_code_block),
[(0, Generic.Prompt, line[:2])]))
current_code_block += line[2:]
else:
# We have reached a non-prompt line!
# If we have stored prompt lines, need to process them first.
if current_code_block:
# Weave together the prompts and highlight code.
for item in do_insertions(insertions,
slexer.get_tokens_unprocessed(current_code_block)):
yield item
# Reset vars for next code block.
current_code_block = ''
insertions = []
# Now process the actual line itself, this is output from R.
yield match.start(), Generic.Output, line
# If we happen to end on a code block with nothing after it, need to
# process the last code block. This is neither elegant nor DRY so
# should be changed.
if current_code_block:
for item in do_insertions(insertions,
slexer.get_tokens_unprocessed(current_code_block)):
yield item
class SLexer(RegexLexer):
"""
For S, S-plus, and R source code.
*New in Pygments 0.10.*
"""
name = 'S'
aliases = ['splus', 's', 'r']
filenames = ['*.S', '*.R']
mimetypes = ['text/S-plus', 'text/S', 'text/R']
tokens = {
'comments': [
(r'#.*$', Comment.Single),
],
'valid_name': [
(r'[a-zA-Z][0-9a-zA-Z\._]+', Text),
(r'`.+`', String.Backtick),
],
'punctuation': [
(r'\[|\]|\[\[|\]\]|\$|\(|\)|@|:::?|;|,', Punctuation),
],
'keywords': [
(r'for(?=\s*\()|while(?=\s*\()|if(?=\s*\()|(?<=\s)else|'
r'(?<=\s)break(?=;|$)|return(?=\s*\()|function(?=\s*\()',
Keyword.Reserved)
],
'operators': [
(r'<-|-|==|<=|>=|<|>|&&|&|!=|\|\|?', Operator),
(r'\*|\+|\^|/|%%|%/%|=', Operator),
(r'%in%|%*%', Operator)
],
'builtin_symbols': [
(r'(NULL|NA|TRUE|FALSE|NaN)\b', Keyword.Constant),
(r'(T|F)\b', Keyword.Variable),
],
'numbers': [
(r'(?<![0-9a-zA-Z\)\}\]`\"])(?=\s*)[-\+]?[0-9]+'
r'(\.[0-9]*)?(E[0-9][-\+]?(\.[0-9]*)?)?', Number),
(r'\.[0-9]*(E[0-9][-\+]?(\.[0-9]*)?)?', Number),
],
'statements': [
include('comments'),
# whitespaces
(r'\s+', Text),
(r'\'', String, 'string_squote'),
(r'\"', String, 'string_dquote'),
include('builtin_symbols'),
include('numbers'),
include('keywords'),
include('punctuation'),
include('operators'),
include('valid_name'),
],
'root': [
include('statements'),
# blocks:
(r'\{|\}', Punctuation),
#(r'\{', Punctuation, 'block'),
(r'.', Text),
],
#'block': [
# include('statements'),
# ('\{', Punctuation, '#push'),
# ('\}', Punctuation, '#pop')
#],
'string_squote': [
(r'[^\']*\'', String, '#pop'),
],
'string_dquote': [
(r'[^\"]*\"', String, '#pop'),
],
}
def analyse_text(text):
return '<-' in text
|