RxCompiler.cs :  » 2.6.4-mono-.net-core » System.Text » System » Text » RegularExpressions » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » 2.6.4 mono .net core » System.Text 
System.Text » System » Text » RegularExpressions » RxCompiler.cs
using System;
using System.Collections;
using System.Globalization;
using System.Reflection;
using System.Reflection.Emit;

namespace System.Text.RegularExpressions{

  /* This behaves like a growing list of tuples (base, offsetpos) */
  class RxLinkRef: LinkRef {
    public int[] offsets;
    public int current = 0;

    public RxLinkRef ()
    {
      offsets = new int [8];
    }

    // the start of the branch instruction
    // in the program stream
    public void PushInstructionBase (int offset)
    {
      if ((current & 1) != 0)
        throw new Exception ();
      if (current == offsets.Length) {
        int[] newarray = new int [offsets.Length * 2];
        Array.Copy (offsets, newarray, offsets.Length);
        offsets = newarray;
      }
      offsets [current++] = offset;
    }

    // the position in the program stream where the jump offset is stored
    public void PushOffsetPosition (int offset)
    {
      if ((current & 1) == 0)
        throw new Exception ();
      offsets [current++] = offset;
    }

  }

  class RxCompiler : ICompiler {
    protected byte[] program = new byte [32];
    protected int curpos = 0;

    public RxCompiler () {
    }

    void MakeRoom (int bytes)
    {
      while (curpos + bytes > program.Length) {
        int newsize = program.Length * 2;
        byte[] newp = new byte [newsize];
        Buffer.BlockCopy (program, 0, newp, 0, program.Length);
        program = newp;
      }
    }

    void Emit (byte val)
    {
      MakeRoom (1);
      program [curpos] = val;
      ++curpos;
    }

    void Emit (RxOp opcode)
    {
      Emit ((byte)opcode);
    }

    void Emit (ushort val)
    {
      MakeRoom (2);
      program [curpos] = (byte)val;
      program [curpos + 1] = (byte)(val >> 8);
      curpos += 2;
    }

    void Emit (int val)
    {
      MakeRoom (4);
      program [curpos] = (byte)val;
      program [curpos + 1] = (byte)(val >> 8);
      program [curpos + 2] = (byte)(val >> 16);
      program [curpos + 3] = (byte)(val >> 24);
      curpos += 4;
    }

    void BeginLink (LinkRef lref) {
      RxLinkRef link = lref as RxLinkRef;
      link.PushInstructionBase (curpos);
    }

    void EmitLink (LinkRef lref)
    {
      RxLinkRef link = lref as RxLinkRef;
      link.PushOffsetPosition (curpos);
      Emit ((ushort)0);
    }

    // ICompiler implementation
    public void Reset ()
    {
      curpos = 0;
    }

    public IMachineFactory GetMachineFactory ()
    {
      byte[] code = new byte [curpos];
      Buffer.BlockCopy (program, 0, code, 0, curpos);
      //Console.WriteLine ("Program size: {0}", curpos);

      return new RxInterpreterFactory (code, null);
    }

    public void EmitFalse ()
    {
      Emit (RxOp.False);
    }

    public void EmitTrue ()
    {
      Emit (RxOp.True);
    }

    /* Overriden by CILCompiler */
    public virtual void EmitOp (RxOp op, bool negate, bool ignore, bool reverse) {
      int offset = 0;
      if (negate)
        offset += 1;
      if (ignore)
        offset += 2;
      if (reverse)
        offset += 4;

      Emit ((RxOp)((int)op + offset));
    }

    public virtual void EmitOpIgnoreReverse (RxOp op, bool ignore, bool reverse) {
      int offset = 0;
      if (ignore)
        offset += 1;
      if (reverse)
        offset += 2;

      Emit ((RxOp)((int)op + offset));
    }

    public virtual void EmitOpNegateReverse (RxOp op, bool negate, bool reverse) {
      int offset = 0;
      if (negate)
        offset += 1;
      if (reverse)
        offset += 2;

      Emit ((RxOp)((int)op + offset));
    }

    public void EmitCharacter (char c, bool negate, bool ignore, bool reverse)
    {
      if (ignore)
        c = Char.ToLower (c);
      if (c < 256) {
        EmitOp (RxOp.Char, negate, ignore, reverse);
        Emit ((byte)c);
      } else {
        EmitOp (RxOp.UnicodeChar, negate, ignore, reverse);
        Emit ((ushort)c);
      }
    }

    void EmitUniCat (UnicodeCategory cat, bool negate, bool reverse)
    {
      EmitOpNegateReverse (RxOp.CategoryUnicode, negate, reverse);
      Emit ((byte)cat);
    }

    void EmitCatGeneral (Category cat, bool negate, bool reverse)
    {
      EmitOpNegateReverse (RxOp.CategoryGeneral, negate, reverse);
      Emit ((byte)cat);
    }

    public void EmitCategory (Category cat, bool negate, bool reverse)
    {
      switch (cat) {
      case Category.Any:
      case Category.EcmaAny:
        EmitOpNegateReverse (RxOp.CategoryAny, negate, reverse);
        break;
      case Category.AnySingleline:
        EmitOpNegateReverse (RxOp.CategoryAnySingleline, negate, reverse);
        break;
      case Category.Word:
        EmitOpNegateReverse (RxOp.CategoryWord, negate, reverse);
        break;
      case Category.Digit:
        EmitOpNegateReverse (RxOp.CategoryDigit, negate, reverse);
        break;
      case Category.WhiteSpace:
        EmitOpNegateReverse (RxOp.CategoryWhiteSpace, negate, reverse);
        break;
      /* FIXME: translate EcmaWord, EcmaWhiteSpace into Bitmaps? EcmaWhiteSpace will fit very well with the IL engine */
      case Category.EcmaWord:
        EmitOpNegateReverse (RxOp.CategoryEcmaWord, negate, reverse);
        break;
      case Category.EcmaDigit:
        EmitRange ('0', '9', negate, false, reverse);
        break;
      case Category.EcmaWhiteSpace:
        EmitOpNegateReverse (RxOp.CategoryEcmaWhiteSpace, negate, reverse);
        break;
      case Category.UnicodeSpecials:
        EmitOpNegateReverse (RxOp.CategoryUnicodeSpecials, negate, reverse);
        break;
      // Unicode categories...
      // letter
      case Category.UnicodeLu: EmitUniCat (UnicodeCategory.UppercaseLetter, negate, reverse); break;
      case Category.UnicodeLl: EmitUniCat (UnicodeCategory.LowercaseLetter, negate, reverse); break;
      case Category.UnicodeLt: EmitUniCat (UnicodeCategory.TitlecaseLetter, negate, reverse); break;
      case Category.UnicodeLm: EmitUniCat (UnicodeCategory.ModifierLetter, negate, reverse); break;
      case Category.UnicodeLo: EmitUniCat (UnicodeCategory.OtherLetter, negate, reverse); break;
      // mark
      case Category.UnicodeMn: EmitUniCat (UnicodeCategory.NonSpacingMark, negate, reverse); break;
      case Category.UnicodeMe: EmitUniCat (UnicodeCategory.EnclosingMark, negate, reverse); break;
      case Category.UnicodeMc: EmitUniCat (UnicodeCategory.SpacingCombiningMark, negate, reverse); break;
      case Category.UnicodeNd: EmitUniCat (UnicodeCategory.DecimalDigitNumber, negate, reverse); break;
      // number
      case Category.UnicodeNl: EmitUniCat (UnicodeCategory.LetterNumber, negate, reverse); break;
      case Category.UnicodeNo: EmitUniCat (UnicodeCategory.OtherNumber, negate, reverse); break;
      // separator
      case Category.UnicodeZs: EmitUniCat (UnicodeCategory.SpaceSeparator, negate, reverse); break;
      case Category.UnicodeZl: EmitUniCat (UnicodeCategory.LineSeparator, negate, reverse); break;
      case Category.UnicodeZp: EmitUniCat (UnicodeCategory.ParagraphSeparator, negate, reverse); break;
      // punctuation
      case Category.UnicodePd: EmitUniCat (UnicodeCategory.DashPunctuation, negate, reverse); break;
      case Category.UnicodePs: EmitUniCat (UnicodeCategory.OpenPunctuation, negate, reverse); break;
      case Category.UnicodePi: EmitUniCat (UnicodeCategory.InitialQuotePunctuation, negate, reverse); break;
      case Category.UnicodePe: EmitUniCat (UnicodeCategory.ClosePunctuation, negate, reverse); break;
      case Category.UnicodePf: EmitUniCat (UnicodeCategory.FinalQuotePunctuation, negate, reverse); break;
      case Category.UnicodePc: EmitUniCat (UnicodeCategory.ConnectorPunctuation, negate, reverse); break;
      case Category.UnicodePo: EmitUniCat (UnicodeCategory.OtherPunctuation, negate, reverse); break;
      // symbol
      case Category.UnicodeSm: EmitUniCat (UnicodeCategory.MathSymbol, negate, reverse); break;
      case Category.UnicodeSc: EmitUniCat (UnicodeCategory.CurrencySymbol, negate, reverse); break;
      case Category.UnicodeSk: EmitUniCat (UnicodeCategory.ModifierSymbol, negate, reverse); break;
      case Category.UnicodeSo: EmitUniCat (UnicodeCategory.OtherSymbol, negate, reverse); break;
      // other
      case Category.UnicodeCc: EmitUniCat (UnicodeCategory.Control, negate, reverse); break;
      case Category.UnicodeCf: EmitUniCat (UnicodeCategory.Format, negate, reverse); break;
      case Category.UnicodeCo: EmitUniCat (UnicodeCategory.PrivateUse, negate, reverse); break;
      case Category.UnicodeCs: EmitUniCat (UnicodeCategory.Surrogate, negate, reverse); break;
      case Category.UnicodeCn: EmitUniCat (UnicodeCategory.OtherNotAssigned, negate, reverse); break; 
      // Unicode block ranges...
      case Category.UnicodeBasicLatin:
        EmitRange ('\u0000', '\u007F', negate, false, reverse); break;
      case Category.UnicodeLatin1Supplement:
        EmitRange ('\u0080', '\u00FF', negate, false, reverse); break;
      case Category.UnicodeLatinExtendedA:
        EmitRange ('\u0100', '\u017F', negate, false, reverse); break;
      case Category.UnicodeLatinExtendedB:
        EmitRange ('\u0180', '\u024F', negate, false, reverse); break;
      case Category.UnicodeIPAExtensions:
        EmitRange ('\u0250', '\u02AF', negate, false, reverse); break;
      case Category.UnicodeSpacingModifierLetters:
        EmitRange ('\u02B0', '\u02FF', negate, false, reverse); break;
      case Category.UnicodeCombiningDiacriticalMarks:
        EmitRange ('\u0300', '\u036F', negate, false, reverse); break;
      case Category.UnicodeGreek:
        EmitRange ('\u0370', '\u03FF', negate, false, reverse); break;
      case Category.UnicodeCyrillic:
        EmitRange ('\u0400', '\u04FF', negate, false, reverse); break;
      case Category.UnicodeArmenian:
        EmitRange ('\u0530', '\u058F', negate, false, reverse); break;
      case Category.UnicodeHebrew:
        EmitRange ('\u0590', '\u05FF', negate, false, reverse); break;
      case Category.UnicodeArabic:
        EmitRange ('\u0600', '\u06FF', negate, false, reverse); break;
      case Category.UnicodeSyriac:
        EmitRange ('\u0700', '\u074F', negate, false, reverse); break;
      case Category.UnicodeThaana:
        EmitRange ('\u0780', '\u07BF', negate, false, reverse); break;
      case Category.UnicodeDevanagari:
        EmitRange ('\u0900', '\u097F', negate, false, reverse); break;
      case Category.UnicodeBengali:
        EmitRange ('\u0980', '\u09FF', negate, false, reverse); break;
      case Category.UnicodeGurmukhi:
        EmitRange ('\u0A00', '\u0A7F', negate, false, reverse); break;
      case Category.UnicodeGujarati:
        EmitRange ('\u0A80', '\u0AFF', negate, false, reverse); break;
      case Category.UnicodeOriya:
        EmitRange ('\u0B00', '\u0B7F', negate, false, reverse); break;
      case Category.UnicodeTamil:
        EmitRange ('\u0B80', '\u0BFF', negate, false, reverse); break;
      case Category.UnicodeTelugu:
        EmitRange ('\u0C00', '\u0C7F', negate, false, reverse); break;
      case Category.UnicodeKannada:
        EmitRange ('\u0C80', '\u0CFF', negate, false, reverse); break;
      case Category.UnicodeMalayalam:
        EmitRange ('\u0D00', '\u0D7F', negate, false, reverse); break;
      case Category.UnicodeSinhala:
        EmitRange ('\u0D80', '\u0DFF', negate, false, reverse); break;
      case Category.UnicodeThai:
        EmitRange ('\u0E00', '\u0E7F', negate, false, reverse); break;
      case Category.UnicodeLao:
        EmitRange ('\u0E80', '\u0EFF', negate, false, reverse); break;
      case Category.UnicodeTibetan:
        EmitRange ('\u0F00', '\u0FFF', negate, false, reverse); break;
      case Category.UnicodeMyanmar:
        EmitRange ('\u1000', '\u109F', negate, false, reverse); break;
      case Category.UnicodeGeorgian:
        EmitRange ('\u10A0', '\u10FF', negate, false, reverse); break;
      case Category.UnicodeHangulJamo:
        EmitRange ('\u1100', '\u11FF', negate, false, reverse); break;
      case Category.UnicodeEthiopic:
        EmitRange ('\u1200', '\u137F', negate, false, reverse); break;
      case Category.UnicodeCherokee:
        EmitRange ('\u13A0', '\u13FF', negate, false, reverse); break;
      case Category.UnicodeUnifiedCanadianAboriginalSyllabics:
        EmitRange ('\u1400', '\u167F', negate, false, reverse); break;
      case Category.UnicodeOgham:
        EmitRange ('\u1680', '\u169F', negate, false, reverse); break;
      case Category.UnicodeRunic:
        EmitRange ('\u16A0', '\u16FF', negate, false, reverse); break;
      case Category.UnicodeKhmer:
        EmitRange ('\u1780', '\u17FF', negate, false, reverse); break;
      case Category.UnicodeMongolian:
        EmitRange ('\u1800', '\u18AF', negate, false, reverse); break;
      case Category.UnicodeLatinExtendedAdditional:
        EmitRange ('\u1E00', '\u1EFF', negate, false, reverse); break;
      case Category.UnicodeGreekExtended:
        EmitRange ('\u1F00', '\u1FFF', negate, false, reverse); break;
      case Category.UnicodeGeneralPunctuation:
        EmitRange ('\u2000', '\u206F', negate, false, reverse); break;
      case Category.UnicodeSuperscriptsandSubscripts:
        EmitRange ('\u2070', '\u209F', negate, false, reverse); break;
      case Category.UnicodeCurrencySymbols:
        EmitRange ('\u20A0', '\u20CF', negate, false, reverse); break;
      case Category.UnicodeCombiningMarksforSymbols:
        EmitRange ('\u20D0', '\u20FF', negate, false, reverse); break;
      case Category.UnicodeLetterlikeSymbols:
        EmitRange ('\u2100', '\u214F', negate, false, reverse); break;
      case Category.UnicodeNumberForms:
        EmitRange ('\u2150', '\u218F', negate, false, reverse); break;
      case Category.UnicodeArrows:
        EmitRange ('\u2190', '\u21FF', negate, false, reverse); break;
      case Category.UnicodeMathematicalOperators:
        EmitRange ('\u2200', '\u22FF', negate, false, reverse); break;
      case Category.UnicodeMiscellaneousTechnical:
        EmitRange ('\u2300', '\u23FF', negate, false, reverse); break;
      case Category.UnicodeControlPictures:
        EmitRange ('\u2400', '\u243F', negate, false, reverse); break;
      case Category.UnicodeOpticalCharacterRecognition:
        EmitRange ('\u2440', '\u245F', negate, false, reverse); break;
      case Category.UnicodeEnclosedAlphanumerics:
        EmitRange ('\u2460', '\u24FF', negate, false, reverse); break;
      case Category.UnicodeBoxDrawing:
        EmitRange ('\u2500', '\u257F', negate, false, reverse); break;
      case Category.UnicodeBlockElements:
        EmitRange ('\u2580', '\u259F', negate, false, reverse); break;
      case Category.UnicodeGeometricShapes:
        EmitRange ('\u25A0', '\u25FF', negate, false, reverse); break;
      case Category.UnicodeMiscellaneousSymbols:
        EmitRange ('\u2600', '\u26FF', negate, false, reverse); break;
      case Category.UnicodeDingbats:
        EmitRange ('\u2700', '\u27BF', negate, false, reverse); break;
      case Category.UnicodeBraillePatterns:
        EmitRange ('\u2800', '\u28FF', negate, false, reverse); break;
      case Category.UnicodeCJKRadicalsSupplement:
        EmitRange ('\u2E80', '\u2EFF', negate, false, reverse); break;
      case Category.UnicodeKangxiRadicals:
        EmitRange ('\u2F00', '\u2FDF', negate, false, reverse); break;
      case Category.UnicodeIdeographicDescriptionCharacters:
        EmitRange ('\u2FF0', '\u2FFF', negate, false, reverse); break;
      case Category.UnicodeCJKSymbolsandPunctuation:
        EmitRange ('\u3000', '\u303F', negate, false, reverse); break;
      case Category.UnicodeHiragana:
        EmitRange ('\u3040', '\u309F', negate, false, reverse); break;
      case Category.UnicodeKatakana:
        EmitRange ('\u30A0', '\u30FF', negate, false, reverse); break;
      case Category.UnicodeBopomofo:
        EmitRange ('\u3100', '\u312F', negate, false, reverse); break;
      case Category.UnicodeHangulCompatibilityJamo:
        EmitRange ('\u3130', '\u318F', negate, false, reverse); break;
      case Category.UnicodeKanbun:
        EmitRange ('\u3190', '\u319F', negate, false, reverse); break;
      case Category.UnicodeBopomofoExtended:
        EmitRange ('\u31A0', '\u31BF', negate, false, reverse); break;
      case Category.UnicodeEnclosedCJKLettersandMonths:
        EmitRange ('\u3200', '\u32FF', negate, false, reverse); break;
      case Category.UnicodeCJKCompatibility:
        EmitRange ('\u3300', '\u33FF', negate, false, reverse); break;
      case Category.UnicodeCJKUnifiedIdeographsExtensionA:
        EmitRange ('\u3400', '\u4DB5', negate, false, reverse); break;
      case Category.UnicodeCJKUnifiedIdeographs:
        EmitRange ('\u4E00', '\u9FFF', negate, false, reverse); break;
      case Category.UnicodeYiSyllables:
        EmitRange ('\uA000', '\uA48F', negate, false, reverse); break;
      case Category.UnicodeYiRadicals:
        EmitRange ('\uA490', '\uA4CF', negate, false, reverse); break;
      case Category.UnicodeHangulSyllables:
        EmitRange ('\uAC00', '\uD7A3', negate, false, reverse); break;
      case Category.UnicodeHighSurrogates:
        EmitRange ('\uD800', '\uDB7F', negate, false, reverse); break;
      case Category.UnicodeHighPrivateUseSurrogates:
        EmitRange ('\uDB80', '\uDBFF', negate, false, reverse); break;
      case Category.UnicodeLowSurrogates:
        EmitRange ('\uDC00', '\uDFFF', negate, false, reverse); break;
      case Category.UnicodePrivateUse:
        EmitRange ('\uE000', '\uF8FF', negate, false, reverse); break;
      case Category.UnicodeCJKCompatibilityIdeographs:
        EmitRange ('\uF900', '\uFAFF', negate, false, reverse); break;
      case Category.UnicodeAlphabeticPresentationForms:
        EmitRange ('\uFB00', '\uFB4F', negate, false, reverse); break;
      case Category.UnicodeArabicPresentationFormsA:
        EmitRange ('\uFB50', '\uFDFF', negate, false, reverse); break;
      case Category.UnicodeCombiningHalfMarks:
        EmitRange ('\uFE20', '\uFE2F', negate, false, reverse); break;
      case Category.UnicodeCJKCompatibilityForms:
        EmitRange ('\uFE30', '\uFE4F', negate, false, reverse); break;
      case Category.UnicodeSmallFormVariants:
        EmitRange ('\uFE50', '\uFE6F', negate, false, reverse); break;
      case Category.UnicodeArabicPresentationFormsB:
        EmitRange ('\uFE70', '\uFEFE', negate, false, reverse); break;
      case Category.UnicodeHalfwidthandFullwidthForms:
        EmitRange ('\uFF00', '\uFFEF', negate, false, reverse); break;

        // Complex categories
      case Category.UnicodeL:
      case Category.UnicodeM:
      case Category.UnicodeN:
      case Category.UnicodeZ:
      case Category.UnicodeP:
      case Category.UnicodeS:
      case Category.UnicodeC:
        EmitCatGeneral (cat, negate, reverse); break;

      default:
        throw new NotImplementedException ("Missing category: " + cat);
      }
    }

    public void EmitNotCategory (Category cat, bool negate, bool reverse)
    {
      // not sure why the compiler needed this separate interface funtion
      if (negate) {
        EmitCategory (cat, false, reverse);
      } else {
        EmitCategory (cat, true, reverse);
      }
    }

    public void EmitRange (char lo, char hi, bool negate, bool ignore, bool reverse)
    {
      if (lo < 256 && hi < 256) {
        EmitOp (RxOp.Range, negate, ignore, reverse);
        Emit ((byte)lo);
        Emit ((byte)hi);
      } else {
        EmitOp (RxOp.UnicodeRange, negate, ignore, reverse);
        Emit ((ushort)lo);
        Emit ((ushort)hi);
      }
    }

    public void EmitSet (char lo, BitArray set, bool negate, bool ignore, bool reverse)
    {
      int len = (set.Length + 0x7) >> 3;
      if (lo < 256 && len < 256) {
        EmitOp (RxOp.Bitmap, negate, ignore, reverse);
        Emit ((byte)lo);
        Emit ((byte)len);
      } else {
        EmitOp (RxOp.UnicodeBitmap, negate, ignore, reverse);
        Emit ((ushort)lo);
        Emit ((ushort)len);
      }
      // emit the bitmap bytes
      int b = 0;
      while (len-- != 0) {
        int word = 0;
        for (int i = 0; i < 8; ++ i) {
          if (b >= set.Length)
            break;
          if (set [b ++])
            word |= 1 << i;
        }
        Emit ((byte)word);
      }
    }

    public void EmitString (string str, bool ignore, bool reverse)
    {
      bool islatin1 = false;
      int i;
      int offset = 0;
      if (ignore)
        offset += 1;
      if (reverse)
        offset += 2;
      if (ignore)
        str = str.ToLower ();
      if (str.Length < 256) {
        islatin1 = true;
        for (i = 0; i < str.Length; ++i) {
          if (str [i] >= 256) {
            islatin1 = false;
            break;
          }
        }
      }
      if (islatin1) {
        EmitOpIgnoreReverse (RxOp.String, ignore, reverse);
        Emit ((byte)str.Length);
        for (i = 0; i < str.Length; ++i)
          Emit ((byte)str [i]);
      } else {
        EmitOpIgnoreReverse (RxOp.UnicodeString, ignore, reverse);
        if (str.Length > ushort.MaxValue)
          throw new NotSupportedException ();
        Emit ((ushort)str.Length);
        for (i = 0; i < str.Length; ++i)
          Emit ((ushort)str [i]);
      }
    }

    public void EmitPosition (Position pos)
    {
      switch (pos) {
      case Position.Any:
        Emit (RxOp.AnyPosition);
        break;
      case Position.Start:
        Emit (RxOp.StartOfString);
        break;
      case Position.StartOfString:
        Emit (RxOp.StartOfString);
        break;
      case Position.StartOfLine:
        Emit (RxOp.StartOfLine);
        break;
      case Position.StartOfScan:
        Emit (RxOp.StartOfScan);
        break;
      case Position.End:
        Emit (RxOp.End);
        break;
      case Position.EndOfString:
        Emit (RxOp.EndOfString);
        break;
      case Position.EndOfLine:
        Emit (RxOp.EndOfLine);
        break;
      case Position.Boundary:
        Emit (RxOp.WordBoundary);
        break;
      case Position.NonBoundary:
        Emit (RxOp.NoWordBoundary);
        break;
      default:
        throw new NotSupportedException ();
      }
    }

    public void EmitOpen (int gid)
    {
      if (gid > ushort.MaxValue)
        throw new NotSupportedException ();
      Emit (RxOp.OpenGroup);
      Emit ((ushort)gid);
    }

    public void EmitClose (int gid)
    {
      if (gid > ushort.MaxValue)
        throw new NotSupportedException ();
      Emit (RxOp.CloseGroup);
      Emit ((ushort)gid);
    }

    public void EmitBalanceStart(int gid, int balance, bool capture,  LinkRef tail)
    {
      BeginLink (tail);
      Emit (RxOp.BalanceStart);
      Emit ((ushort)gid);
      Emit ((ushort)balance);
      Emit ((byte)(capture ? 1 : 0));
      EmitLink (tail);
    }

    public void EmitBalance ()
    {
      Emit (RxOp.Balance);
    }

    public void EmitReference (int gid, bool ignore, bool reverse)
    {
      if (gid > ushort.MaxValue)
        throw new NotSupportedException ();
      EmitOpIgnoreReverse (RxOp.Reference, ignore, reverse);
      Emit ((ushort)gid);
    }

    public void EmitIfDefined (int gid, LinkRef tail)
    {
      if (gid > ushort.MaxValue)
        throw new NotSupportedException ();
      BeginLink (tail);
      Emit (RxOp.IfDefined);
      EmitLink (tail);
      Emit ((ushort)gid);
    }

    public void EmitSub (LinkRef tail)
    {
      BeginLink (tail);
      Emit (RxOp.SubExpression);
      EmitLink (tail);
    }

    public void EmitTest (LinkRef yes, LinkRef tail)
    {
      BeginLink (yes);
      BeginLink (tail);
      Emit (RxOp.Test);
      EmitLink (yes);
      EmitLink (tail);
    }

    public void EmitBranch (LinkRef next)
    {
      BeginLink (next);
      Emit (RxOp.Branch);
      EmitLink (next);
    }

    public void EmitJump (LinkRef target)
    {
      BeginLink (target);
      Emit (RxOp.Jump);
      EmitLink (target);
    }

    public void EmitIn (LinkRef tail)
    {
      // emitted for things like [\dabcfh]
      BeginLink (tail);
      Emit (RxOp.TestCharGroup);
      EmitLink (tail);
    }

    public void EmitRepeat (int min, int max, bool lazy, LinkRef until)
    {
      BeginLink (until);
      Emit (lazy ? RxOp.RepeatLazy : RxOp.Repeat);
      EmitLink (until);
      Emit (min);
      Emit (max);
    }

    public void EmitUntil (LinkRef repeat)
    {
      ResolveLink (repeat);
      Emit (RxOp.Until);
    }

    public void EmitInfo (int count, int min, int max)
    {
      Emit (RxOp.Info);
      if (count > ushort.MaxValue)
        throw new NotSupportedException ();
      Emit ((ushort)count);
      Emit (min);
      Emit (max);
    }

    public void EmitFastRepeat (int min, int max, bool lazy, LinkRef tail)
    {
      BeginLink (tail);
      Emit (lazy ? RxOp.FastRepeatLazy : RxOp.FastRepeat);
      EmitLink (tail);
      Emit (min);
      Emit (max);
    }

    public void EmitAnchor (bool reverse, int offset, LinkRef tail)
    {
      BeginLink (tail);
      if (reverse)
        Emit (RxOp.AnchorReverse);
      else
        Emit (RxOp.Anchor);
      EmitLink (tail);
      if (offset > ushort.MaxValue)
        throw new NotSupportedException ();
      Emit ((ushort)offset);
    }

    // event for the CILCompiler
    public void EmitBranchEnd ()
    {
    }

    public void EmitAlternationEnd ()
    {
    }

    public LinkRef NewLink ()
    {
      return new RxLinkRef ();
    }

    public void ResolveLink (LinkRef link)
    {
      RxLinkRef l = link as RxLinkRef;
      for (int i = 0; i < l.current; i += 2) {
        int offset = curpos - l.offsets [i];
        if (offset > ushort.MaxValue)
          throw new NotSupportedException ();
        int offsetpos = l.offsets [i + 1];
        program [offsetpos] = (byte)offset;
        program [offsetpos + 1] = (byte)(offset >> 8);
      }
    }

  }

  class RxInterpreterFactory : IMachineFactory {
    public RxInterpreterFactory (byte[] program, EvalDelegate eval_del) {
      this.program = program;
      this.eval_del = eval_del;
    }
    
    public IMachine NewInstance () {
      return new RxInterpreter (program, eval_del);
    }

    public int GroupCount {
      get { 
        return (int)program [1] | ((int)program [2] << 8);
      }
    }

    public int Gap {
      get { return gap; }
      set { gap = value; }
    }

    public IDictionary Mapping {
      get { return mapping; }
      set { mapping = value; }
    }

    public string [] NamesMapping {
      get { return namesMapping; }
      set { namesMapping = value; }
    }

    private IDictionary mapping;
    private byte[] program;
    private EvalDelegate eval_del;
    private string[] namesMapping;
    private int gap;
  }

}

www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.