interpreter.cs :  » 2.6.4-mono-.net-core » System.Text » System » Text » RegularExpressions » C# / CSharp Open Source

Home
C# / CSharp Open Source
1.2.6.4 mono .net core
2.2.6.4 mono core
3.Aspect Oriented Frameworks
4.Bloggers
5.Build Systems
6.Business Application
7.Charting Reporting Tools
8.Chat Servers
9.Code Coverage Tools
10.Content Management Systems CMS
11.CRM ERP
12.Database
13.Development
14.Email
15.Forum
16.Game
17.GIS
18.GUI
19.IDEs
20.Installers Generators
21.Inversion of Control Dependency Injection
22.Issue Tracking
23.Logging Tools
24.Message
25.Mobile
26.Network Clients
27.Network Servers
28.Office
29.PDF
30.Persistence Frameworks
31.Portals
32.Profilers
33.Project Management
34.RSS RDF
35.Rule Engines
36.Script
37.Search Engines
38.Sound Audio
39.Source Control
40.SQL Clients
41.Template Engines
42.Testing
43.UML
44.Web Frameworks
45.Web Service
46.Web Testing
47.Wiki Engines
48.Windows Presentation Foundation
49.Workflows
50.XML Parsers
C# / C Sharp
C# / C Sharp by API
C# / CSharp Tutorial
C# / CSharp Open Source » 2.6.4 mono .net core » System.Text 
System.Text » System » Text » RegularExpressions » interpreter.cs
//
// assembly:  System
// namespace:  System.Text.RegularExpressions
// file:  interpreter.cs
//
// author:  Dan Lewis (dlewis@gmx.co.uk)
//     (c) 2002

//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
// 
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
// 
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//

using System;
using System.Collections;
using System.Diagnostics;
using System.Globalization;

namespace System.Text.RegularExpressions{

  partial class Interpreter : BaseMachine {
    private int ReadProgramCount (int ptr)
    {
      int ret = program [ptr + 1];
      ret <<= 16;
      ret += program [ptr];
      return ret;
    }

    public Interpreter (ushort[] program) {
      this.program = program;
      this.qs = null;

      // process info block
      Debug.Assert ((OpCode)program[0] == OpCode.Info, "Regex", "Cant' find info block");
      this.group_count = ReadProgramCount (1) + 1;
      this.match_min = ReadProgramCount (3);
      //this.match_max = ReadProgramCount (5);

      // setup

      this.program_start = 7;
      this.groups = new int [group_count];
    }

    // IMachine implementation

    public override Match Scan (Regex regex, string text, int start, int end) {
      this.text = text;
      this.text_end = end;
      this.scan_ptr = start;

      if (Eval (Mode.Match, ref scan_ptr, program_start))
        return GenerateMatch (regex);

      return Match.Empty;
    }

    // private methods

    private void Reset () {
      ResetGroups ();
      fast = repeat = null;
    }

    private bool Eval (Mode mode, ref int ref_ptr, int pc) {
      int ptr = ref_ptr;
    Begin:
      for (;;) {
        ushort word = program[pc];
        OpCode op = (OpCode)(word & 0x00ff);
        OpFlags flags = (OpFlags)(word & 0xff00);

        switch (op) {
        case OpCode.Anchor: {
          int skip = program[pc + 1];
          
          int anch_offset = program[pc + 2];
          bool anch_reverse = (flags & OpFlags.RightToLeft) != 0;  
          int anch_ptr = anch_reverse ?  ptr - anch_offset  : ptr + anch_offset;
          int anch_end = text_end - match_min + anch_offset;  // maximum anchor position  
          
          
          int anch_begin =  0;


          // the general case for an anchoring expression is at the bottom, however we
          // do some checks for the common cases before to save processing time. the current
          // optimizer only outputs three types of anchoring expressions: fixed position,
          // fixed substring, and no anchor.

          OpCode anch_op = (OpCode)(program[pc + 3] & 0x00ff);          
          if (anch_op == OpCode.Position && skip == 6) {        // position anchor
            // Anchor
            //   Position
            //  True

            switch ((Position)program[pc + 4]) {
            case Position.StartOfString:
              if (anch_reverse || anch_offset == 0) {
                if (anch_reverse)
                  ptr = anch_offset;
                if (TryMatch (ref ptr, pc + skip))
                  goto Pass;
              }
              break;
            
            case Position.StartOfLine:
               if (anch_ptr == 0) {
                ptr = 0;
                if (TryMatch (ref ptr, pc + skip))
                  goto Pass;
                
                ++ anch_ptr;
              }

              while ((anch_reverse && anch_ptr >= 0) || (!anch_reverse && anch_ptr <= anch_end)) {  
                if (anch_ptr == 0 || text[anch_ptr - 1] == '\n') {
                  if (anch_reverse)
                    ptr = anch_ptr == anch_end ? anch_ptr : anch_ptr + anch_offset;
                  else
                    ptr = anch_ptr == 0 ? anch_ptr : anch_ptr - anch_offset;
                  if (TryMatch (ref ptr, pc + skip))
                    goto Pass;
                }
              
                if (anch_reverse)
                  -- anch_ptr;
                else
                  ++ anch_ptr;
              }
              break;
            
            case Position.StartOfScan:
              if (anch_ptr == scan_ptr) {              
                ptr = anch_reverse ? scan_ptr + anch_offset : scan_ptr - anch_offset;
                if (TryMatch (ref ptr, pc + skip))
                  goto Pass;
              }
              break;

            default:
              // FIXME
              break;
            }
          }
          else if (qs != null ||
            (anch_op == OpCode.String && skip == 6 + program[pc + 4])) {  // substring anchor
            // Anchor
            //  String
            //  True
         
            bool reverse = ((OpFlags)program[pc + 3] & OpFlags.RightToLeft) != 0;

            if (qs == null) {
              bool ignore = ((OpFlags)program[pc + 3] & OpFlags.IgnoreCase) != 0;
              string substring = GetString (pc + 3);
              qs = new QuickSearch (substring, ignore, reverse);
            }
            while ((anch_reverse && anch_ptr >= anch_begin) 
                   || (!anch_reverse && anch_ptr <= anch_end)) {

              if (reverse)   
              {
                anch_ptr = qs.Search (text, anch_ptr, anch_begin);
                if (anch_ptr != -1)
                  anch_ptr += qs.Length ;
                
              }
              else
                anch_ptr = qs.Search (text, anch_ptr, anch_end);
              if (anch_ptr < 0)
                break;

              ptr = reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
              if (TryMatch (ref ptr, pc + skip))
                goto Pass;

              if (reverse)
                anch_ptr -= 2;
              else 
                ++ anch_ptr;
            }
          }
          else if (anch_op == OpCode.True) {          // no anchor
            // Anchor
            //  True

          
            while ((anch_reverse && anch_ptr >= anch_begin) 
                   || (!anch_reverse && anch_ptr <= anch_end)) {

              ptr = anch_ptr;
              if (TryMatch (ref ptr, pc + skip))
                goto Pass;
              if (anch_reverse)
                -- anch_ptr;
              else 
                ++ anch_ptr;
            }
          }
          else {                  // general case
            // Anchor
            //  <expr>
            //  True

            while ((anch_reverse && anch_ptr >= anch_begin) 
                   || (!anch_reverse && anch_ptr <= anch_end)) {

              ptr = anch_ptr;
              if (Eval (Mode.Match, ref ptr, pc + 3)) {
                // anchor expression passed: try real expression at the correct offset

                ptr = anch_reverse ? anch_ptr + anch_offset : anch_ptr - anch_offset;
                if (TryMatch (ref ptr, pc + skip))
                  goto Pass;
              }

                if (anch_reverse)
                -- anch_ptr;
              else 
                ++ anch_ptr;
            }
          }

          goto Fail;
        }
        
        case OpCode.False: {
          goto Fail;
        }

        case OpCode.True: {
          goto Pass;
        }

        case OpCode.Position: {
          if (!IsPosition ((Position)program[pc + 1], ptr))
            goto Fail;
          pc += 2;
          break;
        }

        case OpCode.String: {
          bool reverse = (flags & OpFlags.RightToLeft) != 0;
          bool ignore = (flags & OpFlags.IgnoreCase) != 0;
          int len = program[pc + 1];

          if (reverse) {
            ptr -= len;
            if (ptr < 0)
              goto Fail;
          }
          else 
          if (ptr + len > text_end)
            goto Fail;

          pc += 2;
          for (int i = 0; i < len; ++ i) {
            char c = text[ptr + i];
            if (ignore)
              c = Char.ToLower (c);

            if (c != (char)program[pc ++])
              goto Fail;
          }

          if (!reverse)
            ptr += len;
          break;
        }

        case OpCode.Reference: {
          bool reverse = (flags & OpFlags.RightToLeft) != 0;
          bool ignore = (flags & OpFlags.IgnoreCase) != 0;
          int m = GetLastDefined (program [pc + 1]);
          if (m < 0)
            goto Fail;

          int str = marks [m].Index;
          int len = marks [m].Length;

          if (reverse) {
            ptr -= len;
            if (ptr < 0)
              goto Fail;
          }
          else if (ptr + len > text_end)
            goto Fail;

          pc += 2;
          if (ignore) {
            for (int i = 0; i < len; ++ i) {
              if (Char.ToLower (text[ptr + i]) != Char.ToLower (text[str + i]))
                goto Fail;
            }
          } else {
            for (int i = 0; i < len; ++ i) {
              if (text[ptr + i] != text[str + i])
                goto Fail;
            }
          }

          if (!reverse)
            ptr += len;
          break;
        }

        case OpCode.Character: case OpCode.Category: case OpCode.NotCategory:
        case OpCode.Range: case OpCode.Set: {
          if (!EvalChar (mode, ref ptr, ref pc, false))
            goto Fail;
          break;
        }

        case OpCode.In: {
          int target = pc + program[pc + 1];
          pc += 2;
          if (!EvalChar (mode, ref ptr, ref pc, true))
            goto Fail;

          pc = target;
          break;
        }

        case OpCode.Open: {
          Open (program[pc + 1], ptr);
          pc += 2;
          break;
        }

        case OpCode.Close: {
          Close (program[pc + 1], ptr);
          pc += 2;
          break;
        }

              case OpCode.BalanceStart: {

          int start = ptr; //point before the balancing group
          
          if (!Eval (Mode.Match, ref ptr, pc + 5))
            goto Fail;
          
          
          
          if(!Balance (program[pc + 1], program[pc + 2], (program[pc + 3] == 1 ? true : false) , start)) {
            goto Fail;
          }

          
          pc += program[pc + 4];
          break;
        }

        case OpCode.Balance: {
          goto Pass;
        }

        case OpCode.IfDefined: {
          int m = GetLastDefined (program [pc + 2]);
          if (m < 0)
            pc += program[pc + 1];
          else
            pc += 3;
          break;
        }

        case OpCode.Sub: {
          if (!Eval (Mode.Match, ref ptr, pc + 2))
            goto Fail;

          pc += program[pc + 1];
          break;
        }

        case OpCode.Test: {
          int cp = Checkpoint ();
          int test_ptr = ptr;
          if (Eval (Mode.Match, ref test_ptr, pc + 3))
            pc += program[pc + 1];
          else {
            Backtrack (cp);
            pc += program[pc + 2];
          }
          break;
        }

        case OpCode.Branch: {
          OpCode branch_op;
          do {
            int cp = Checkpoint ();
            if (Eval (Mode.Match, ref ptr, pc + 2))
              goto Pass;
            
            Backtrack (cp);
            
            pc += program[pc + 1];
            branch_op = (OpCode)(program[pc] & 0xff);
          } while (branch_op != OpCode.False);

          goto Fail;
        }

        case OpCode.Jump: {
          pc += program[pc + 1];
          break;
        }

        case OpCode.Repeat: {
          this.repeat = new RepeatContext (
            this.repeat,      // previous context
            ReadProgramCount (pc + 2),    // minimum
            ReadProgramCount (pc + 4),    // maximum
            (flags & OpFlags.Lazy) != 0,  // lazy
            pc + 6        // subexpression
          );

          if (Eval (Mode.Match, ref ptr, pc + program[pc + 1]))
            goto Pass;
          else {
            this.repeat = this.repeat.Previous;
            goto Fail;
          }
        }

        case OpCode.Until: {
          RepeatContext current = this.repeat;

          //
          // Can we avoid recursion?
          //
          // Backtracking can be forced in nested quantifiers from the tail of this quantifier.
          // Thus, we cannot, in general, use a simple loop on repeat.Expression to handle
          // quantifiers.
          //
          // If 'deep' was unmolested, that implies that there was no nested quantifiers.
          // Thus, we can safely avoid recursion.
          //
          if (deep == current)
            goto Pass;

          int start = current.Start;
          int start_count = current.Count;

          while (!current.IsMinimum) {
            ++ current.Count;
            current.Start = ptr;
            deep = current;
            if (!Eval (Mode.Match, ref ptr, current.Expression)) {
              current.Start = start;
              current.Count = start_count;
              goto Fail;
            }
            if (deep != current)  // recursive mode
              goto Pass;
          }

          if (ptr == current.Start) {
            // degenerate match ... match tail or fail
            this.repeat = current.Previous;
            deep = null;
            if (Eval (Mode.Match, ref ptr, pc + 1))
              goto Pass;
          
            this.repeat = current;
            goto Fail;
          }

          if (current.IsLazy) {
            for (;;) {
              // match tail first ...
              this.repeat = current.Previous;
              deep = null;
              int cp = Checkpoint ();
              if (Eval (Mode.Match, ref ptr, pc + 1))
                goto Pass;

              Backtrack (cp);

              // ... then match more
              this.repeat = current;
              if (current.IsMaximum)
                goto Fail;
              ++ current.Count;
              current.Start = ptr;
              deep = current;
              if (!Eval (Mode.Match, ref ptr, current.Expression)) {
                current.Start = start;
                current.Count = start_count;
                goto Fail;
              }
              if (deep != current)  // recursive mode
                goto Pass;
              // Degenerate match: ptr has not moved since the last (failed) tail match.
              // So, next and subsequent tail matches will fail.
              if (ptr == current.Start)
                goto Fail;
            }
          } else {
            int stack_size = stack.Count;

            // match greedily as much as possible
            while (!current.IsMaximum) {
              int cp = Checkpoint ();
              int old_ptr = ptr;
              int old_start = current.Start;

              ++ current.Count;
              current.Start = ptr;
              deep = current;
              if (!Eval (Mode.Match, ref ptr, current.Expression)) {
                -- current.Count;
                current.Start = old_start;
                Backtrack (cp);
                break;
              }
              if (deep != current) {
                // recursive mode: no more backtracking, truncate the stack
                stack.Count = stack_size;
                goto Pass;
              }
              stack.Push (cp);
              stack.Push (old_ptr);

              // Degenerate match: no point going on
              if (ptr == current.Start)
                break;
            }

            // then, match the tail, backtracking as necessary.
            this.repeat = current.Previous;
            for (;;) {
              deep = null;
              if (Eval (Mode.Match, ref ptr, pc + 1)) {
                stack.Count = stack_size;
                goto Pass;
              }
              if (stack.Count == stack_size) {
                this.repeat = current;
                goto Fail;
              }

              --current.Count;
              ptr = stack.Pop ();
              Backtrack (stack.Pop ());
            }
          }
        }

        case OpCode.FastRepeat: {
          this.fast = new RepeatContext (
            fast,
            ReadProgramCount (pc + 2),    // minimum
            ReadProgramCount (pc + 4),    // maximum
            (flags & OpFlags.Lazy) != 0,  // lazy
            pc + 6        // subexpression
          );

          fast.Start = ptr;

          int cp = Checkpoint ();

          pc += program[pc + 1];    // tail expression
          ushort tail_word = program[pc];

          int c1 = -1;    // first character of tail operator
          int c2 = -1;    // ... and the same character, in upper case if ignoring case
          int coff = 0;    // 0 or -1 depending on direction

          OpCode tail_op = (OpCode)(tail_word & 0xff);
          if (tail_op == OpCode.Character || tail_op == OpCode.String) {
            OpFlags tail_flags = (OpFlags)(tail_word & 0xff00);

            if ((tail_flags & OpFlags.Negate) != 0)
              goto skip;

            if (tail_op == OpCode.String)
            {
              int offset = 0;
            
              if ((tail_flags & OpFlags.RightToLeft) != 0)
              {
                offset = program[pc + 1] - 1 ;
              }
                
              c1 = program[pc + 2 + offset];        // first char of string
            }
            else
              c1 = program[pc + 1];        // character
            
            if ((tail_flags & OpFlags.IgnoreCase) != 0)
              c2 = Char.ToUpper ((char)c1);      // ignore case
            else
              c2 = c1;

            if ((tail_flags & OpFlags.RightToLeft) != 0)
              coff = -1;          // reverse
            else
              coff = 0;
          }

        skip:
          if (fast.IsLazy) {
            if (!fast.IsMinimum && !Eval (Mode.Count, ref ptr, fast.Expression)) {
              //Console.WriteLine ("lazy fast: failed mininum.");
              fast = fast.Previous;
              goto Fail;
            }
            
            while (true) {
              int p = ptr + coff;
              if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
                deep = null;
                if (Eval (Mode.Match, ref ptr, pc))
                  break;
              }

              if (fast.IsMaximum) {
                //Console.WriteLine ("lazy fast: failed with maximum.");
                fast = fast.Previous;
                goto Fail;
              }

              Backtrack (cp);
              if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
                //Console.WriteLine ("lazy fast: no more.");
                fast = fast.Previous;
                goto Fail;
              }
            }
            fast = fast.Previous;
            goto Pass;
          }
          else {
            if (!Eval (Mode.Count, ref ptr, fast.Expression)) {
              fast = fast.Previous;
              goto Fail;
            }
          
            int width;
            if (fast.Count > 0)
              width = (ptr - fast.Start) / fast.Count;
            else
              width = 0;

            while (true) {
              int p = ptr + coff;
              if (c1 < 0 || (p >= 0 && p < text_end && (c1 == text[p] || c2 == text[p]))) {
                deep = null;
                if (Eval (Mode.Match, ref ptr, pc))
                  break;
              }

              -- fast.Count;
              if (!fast.IsMinimum) {
                fast = fast.Previous;
                goto Fail;
              }

              ptr -= width;
              Backtrack (cp);
            }
            fast = fast.Previous;
            goto Pass;
          }
        }

        case OpCode.Info: {
          Debug.Assert (false, "Regex", "Info block found in pattern");
          goto Fail;
        }
        }
      }
    Pass:
      ref_ptr = ptr;

      switch (mode) {
      case Mode.Match:
        return true;

      case Mode.Count: {
        ++ fast.Count;
        if (fast.IsMaximum || (fast.IsLazy && fast.IsMinimum))
          return true;

        pc = fast.Expression;
        goto Begin;
      }
      }

    Fail:
      switch (mode) {
      case Mode.Match:
        return false;

      case Mode.Count: {
        if (!fast.IsLazy && fast.IsMinimum)
          return true;

        ref_ptr = fast.Start;
        return false;
      }
      }

      return false;
    }

    private bool EvalChar (Mode mode, ref int ptr, ref int pc, bool multi) {
      bool consumed = false;
      char c = '\0';
      bool negate;
      bool ignore;
    
      do {
        ushort word = program[pc];
        OpCode op = (OpCode)(word & 0x00ff);
        OpFlags flags = (OpFlags)(word & 0xff00);

        ++ pc;

        ignore = (flags & OpFlags.IgnoreCase) != 0;
        
        // consume character: the direction of an In construct is
        // determined by the direction of its first op

        if (!consumed) {
          if ((flags & OpFlags.RightToLeft) != 0) {
            if (ptr <= 0)
              return false;

            c = text[-- ptr];
          }
          else {
            if (ptr >= text_end)
              return false;

            c = text[ptr ++];
          }

          if (ignore)
            c = Char.ToLower (c);

          consumed = true;
        }

        // negate flag

        negate = (flags & OpFlags.Negate) != 0;

        // execute op
        
        switch (op) {
        case OpCode.True:
          return true;

        case OpCode.False:
          return false;
        
        case OpCode.Character: {
          if (c == (char)program[pc ++])
            return !negate;
          break;
        }

        case OpCode.Category: {
          if (CategoryUtils.IsCategory ((Category)program[pc ++], c))
            return !negate;
          break;
        }

        case OpCode.NotCategory: {
          if (!CategoryUtils.IsCategory ((Category)program[pc ++], c))
            return !negate;
          break;
        }
        
        case OpCode.Range: {
          int lo = (char)program[pc ++];
          int hi = (char)program[pc ++];
          if (lo <= c && c <= hi)
            return !negate;
          break;
        }

        case OpCode.Set: {
          int lo = (char)program[pc ++];
          int len = (char)program[pc ++];
          int bits = pc;
          pc += len;

          int i = (int)c - lo;
          if (i < 0 || i >= len << 4)
            break;
            

          if ((program[bits + (i >> 4)] & (1 << (i & 0xf))) != 0)
            return !negate;
          break;
        }
        }
      } while (multi);

      return negate;
    }

    private bool TryMatch (ref int ref_ptr, int pc) {
      Reset ();
      
      int ptr = ref_ptr;
      marks [groups [0]].Start = ptr;
      if (Eval (Mode.Match, ref ptr, pc)) {
        marks [groups [0]].End = ptr;
        ref_ptr = ptr;
        return true;
      }

      return false;
    }
    
    private bool IsPosition (Position pos, int ptr) {
      switch (pos) {
      case Position.Start: case Position.StartOfString:
        return ptr == 0;

      case Position.StartOfLine:
        return ptr == 0 || text[ptr - 1] == '\n';
        
      case Position.StartOfScan:
        return ptr == scan_ptr;
      
      case Position.End:
        return ptr == text_end ||
          (ptr == text_end - 1 && text[ptr] == '\n');

      case Position.EndOfLine:
        return ptr == text_end || text[ptr] == '\n';
        
      case Position.EndOfString:
        return ptr == text_end;
        
      case Position.Boundary:
        if (text_end == 0)
          return false;

        if (ptr == 0)
          return IsWordChar (text[ptr]);
        else if (ptr == text_end)
          return IsWordChar (text[ptr - 1]);
        else
          return IsWordChar (text[ptr]) != IsWordChar (text[ptr - 1]);

      case Position.NonBoundary:
        if (text_end == 0)
          return false;

        if (ptr == 0)
          return !IsWordChar (text[ptr]);
        else if (ptr == text_end)
          return !IsWordChar (text[ptr - 1]);
        else
          return IsWordChar (text[ptr]) == IsWordChar (text[ptr - 1]);
      
      default:
        return false;
      }
    }

    private bool IsWordChar (char c) {
      return CategoryUtils.IsCategory (Category.Word, c);
    }

    private string GetString (int pc) {
      int len = program[pc + 1];
      int str = pc + 2;

      char[] cs = new char[len];
      for (int i = 0; i < len; ++ i)
        cs[i] = (char)program[str ++];

      return new string (cs);
    }

    // capture management

    private void Open (int gid, int ptr) {
      int m = groups [gid];
      if (m < mark_start || marks [m].IsDefined) {
        m = CreateMark (m);
        groups [gid] = m;
      }

      marks [m].Start = ptr;
    }

    private void Close (int gid, int ptr) {
             marks [groups [gid]].End = ptr;
    }

    private bool Balance (int gid, int balance_gid, bool capture, int ptr) {
      int b = groups [balance_gid];

      if(b == -1 || marks[b].Index < 0) {
        //Group not previously matched
        return false;
      }
      Debug.Assert (marks [b].IsDefined, "Regex", "Balancng group not closed");
      if (gid > 0 && capture){ 
        Open (gid, marks [b].Index + marks [b].Length);
        Close (gid, ptr);
      }

      groups [balance_gid] = marks[b].Previous;

      return true;
    }

    private int Checkpoint () {
      mark_start = mark_end;
      return mark_start;
    }

    private void Backtrack (int cp) {
      Debug.Assert (cp > mark_start, "Regex", "Attempt to backtrack forwards");
      for (int i = 0; i < groups.Length; ++ i) {
        int m = groups [i];
        while (cp <= m)
          m = marks [m].Previous;

        groups [i] = m;
      }
    }

    private void ResetGroups () {
      int n = groups.Length;
      if (marks == null)
        marks = new Mark [n * 10];

      for (int i = 0; i < n; ++ i) {
        groups [i] = i;

        marks [i].Start = -1;
        marks [i].End = -1;
        marks [i].Previous = -1;
      }
      
      mark_start = 0;
      mark_end = n;
    }

    private int GetLastDefined (int gid) {
      int m = groups [gid];
      while (m >= 0 && !marks [m].IsDefined)
        m = marks [m].Previous;

      return m;
    }

    private int CreateMark (int previous) {
      if (mark_end == marks.Length) {
        Mark [] dest = new Mark [marks.Length * 2];
        marks.CopyTo (dest, 0);
        marks = dest;
      }

      int m = mark_end ++;
      marks [m].Start = marks [m].End = -1;
      marks [m].Previous = previous;

      return m;
    }

    private void GetGroupInfo (int gid, out int first_mark_index, out int n_caps)
    {
      first_mark_index = -1;
      n_caps = 0;
      for (int m = groups [gid]; m >= 0; m = marks [m].Previous) {
        if (!marks [m].IsDefined)
          continue;
        if (first_mark_index < 0)
          first_mark_index = m;
        ++n_caps;
      }
    }

    private void PopulateGroup (Group g, int first_mark_index, int n_caps)
    {
      int i = 1;
      for (int m = marks [first_mark_index].Previous; m >= 0; m = marks [m].Previous) {
        if (!marks [m].IsDefined)
          continue;
        Capture cap = new Capture (text, marks [m].Index, marks [m].Length);
        g.Captures.SetValue (cap, n_caps - 1 - i);
        ++i;
      }
    }

    private Match GenerateMatch (Regex regex)
    {
      int n_caps, first_mark_index;
      Group g;
      GetGroupInfo (0, out first_mark_index, out n_caps);

      // Avoid fully populating the Match instance if not needed
      if (!needs_groups_or_captures)
        return new Match (regex, this, text, text_end, 0, marks [first_mark_index].Index, marks [first_mark_index].Length);

      Match retval = new Match (regex, this, text, text_end, groups.Length, 
              marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
      PopulateGroup (retval, first_mark_index, n_caps);

      for (int gid = 1; gid < groups.Length; ++ gid) {
        GetGroupInfo (gid, out first_mark_index, out n_caps);
        if (first_mark_index < 0) {
          g = Group.Fail;
        } else {
          g = new Group (text, marks [first_mark_index].Index, marks [first_mark_index].Length, n_caps);
          PopulateGroup (g, first_mark_index, n_caps);
        }
        retval.Groups.SetValue (g, gid);
      }
      return retval;
    }

    // interpreter attributes

    private ushort[] program;    // regex program
    private int program_start;    // first instruction after info block
    private string text;      // input text
    private int text_end;      // end of input text (last character + 1)
    private int group_count;    // number of capturing groups
    private int match_min;//, match_max;  // match width information
    private QuickSearch qs;      // fast substring matcher

    // match state
    
    private int scan_ptr;      // start of scan

    private RepeatContext repeat;    // current repeat context
    private RepeatContext fast;    // fast repeat context

    // Repeat/Until handling
    private IntStack stack = new IntStack (); // utility stack
    private RepeatContext deep;    // points to the most-nested repeat context

    private Mark[] marks = null;    // mark stack
    private int mark_start;      // start of current checkpoint
    private int mark_end;      // end of checkpoint/next free mark

    private int[] groups;      // current group definitions

    // private classes

    private struct IntStack {
      int [] values;
      int count;
      public int Pop ()
      {
        return values [--count];
      }
      public void Push (int value)
      {
        if (values == null) {
          values = new int [8];
        } else if (count == values.Length) {
          int new_size = values.Length;
          new_size += new_size >> 1;
          int [] new_values = new int [new_size];
          for (int i = 0; i < count; ++i)
            new_values [i] = values [i];
          values = new_values;
        }
        values [count++] = value;
      }
      public int Top {
        get { return values [count - 1]; }
      }
      public int Count {
        get { return count; }
        set {
          if (value > count)
            throw new SystemException ("can only truncate the stack");
          count = value;
        }
      }
    }

    private class RepeatContext {
      public RepeatContext (RepeatContext previous, int min, int max, bool lazy, int expr_pc) {
        this.previous = previous;
        this.min = min;
        this.max = max;
        this.lazy = lazy;
        this.expr_pc = expr_pc;
        
        this.start = -1;
        this.count = 0;
      }

      public int Count {
        get { return count; }
        set { count = value; }
      }

      public int Start {
        get { return start; }
        set { start = value; }
      }

      public bool IsMinimum {
        get { return min <= count; }
      }

      public bool IsMaximum {
        get { return max <= count; }
      }

      public bool IsLazy {
        get { return lazy; }
      }

      public int Expression {
        get { return expr_pc; }
      }

      public RepeatContext Previous {
        get { return previous; }
      }
    
      private int start;
      private int min, max;
      private bool lazy;
      private int expr_pc;
      private RepeatContext previous;

      private int count;
    }

    private enum Mode {
      Search,
      Match,
      Count
    }
  }
}
www.java2v.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.