001: package java_cup;
002:
003: import java.util.Hashtable;
004: import java.util.Enumeration;
005:
006: /** This class represents a production in the grammar. It contains
007: * a LHS non terminal, and an array of RHS symbols. As various
008: * transformations are done on the RHS of the production, it may shrink.
009: * As a result a separate length is always maintained to indicate how much
010: * of the RHS array is still valid.<p>
011: *
012: * I addition to construction and manipulation operations, productions provide
013: * methods for factoring out actions (see remove_embedded_actions()), for
014: * computing the nullability of the production (i.e., can it derive the empty
015: * string, see check_nullable()), and operations for computing its first
016: * set (i.e., the set of terminals that could appear at the beginning of some
017: * string derived from the production, see check_first_set()).
018: *
019: * @see java_cup.production_part
020: * @see java_cup.symbol_part
021: * @see java_cup.action_part
022: * @version last updated: 7/3/96
023: * @author Frank Flannery
024: */
025:
026: public class production {
027:
028: /*-----------------------------------------------------------*/
029: /*--- Constructor(s) ----------------------------------------*/
030: /*-----------------------------------------------------------*/
031:
032: /** Full constructor. This constructor accepts a LHS non terminal,
033: * an array of RHS parts (including terminals, non terminals, and
034: * actions), and a string for a final reduce action. It does several
035: * manipulations in the process of creating a production object.
036: * After some validity checking it translates labels that appear in
037: * actions into code for accessing objects on the runtime parse stack.
038: * It them merges adjacent actions if they appear and moves any trailing
039: * action into the final reduce actions string. Next it removes any
040: * embedded actions by factoring them out with new action productions.
041: * Finally it assigns a unique index to the production.<p>
042: *
043: * Factoring out of actions is accomplished by creating new "hidden"
044: * non terminals. For example if the production was originally: <pre>
045: * A ::= B {action} C D
046: * </pre>
047: * then it is factored into two productions:<pre>
048: * A ::= B X C D
049: * X ::= {action}
050: * </pre>
051: * (where X is a unique new non terminal). This has the effect of placing
052: * all actions at the end where they can be handled as part of a reduce by
053: * the parser.
054: */
055: public production(non_terminal lhs_sym,
056: production_part rhs_parts[], int rhs_l, String action_str)
057: throws internal_error {
058: int i;
059: action_part tail_action;
060: String declare_str;
061: int rightlen = rhs_l;
062:
063: /* remember the length */
064: if (rhs_l >= 0)
065: _rhs_length = rhs_l;
066: else if (rhs_parts != null)
067: _rhs_length = rhs_parts.length;
068: else
069: _rhs_length = 0;
070:
071: /* make sure we have a valid left-hand-side */
072: if (lhs_sym == null)
073: throw new internal_error(
074: "Attempt to construct a production with a null LHS");
075:
076: /* I'm not translating labels anymore, I'm adding code to declare
077: labels as valid variables. This way, the users code string is
078: untouched
079: 6/96 frankf */
080:
081: /* check if the last part of the right hand side is an action. If
082: it is, it won't be on the stack, so we don't want to count it
083: in the rightlen. Then when we search down the stack for a
084: Symbol, we don't try to search past action */
085:
086: if (rhs_l > 0) {
087: if (rhs_parts[rhs_l - 1].is_action()) {
088: rightlen = rhs_l - 1;
089: } else {
090: rightlen = rhs_l;
091: }
092: }
093:
094: /* get the generated declaration code for the necessary labels. */
095: declare_str = declare_labels(rhs_parts, rightlen, action_str);
096:
097: if (action_str == null)
098: action_str = declare_str;
099: else
100: action_str = declare_str + action_str;
101:
102: /* count use of lhs */
103: lhs_sym.note_use();
104:
105: /* create the part for left-hand-side */
106: _lhs = new symbol_part(lhs_sym);
107:
108: /* merge adjacent actions (if any) */
109: _rhs_length = merge_adjacent_actions(rhs_parts, _rhs_length);
110:
111: /* strip off any trailing action */
112: tail_action = strip_trailing_action(rhs_parts, _rhs_length);
113: if (tail_action != null)
114: _rhs_length--;
115:
116: /* Why does this run through the right hand side happen
117: over and over? here a quick combination of two
118: prior runs plus one I wanted of my own
119: frankf 6/25/96 */
120: /* allocate and copy over the right-hand-side */
121: /* count use of each rhs symbol */
122: _rhs = new production_part[_rhs_length];
123: for (i = 0; i < _rhs_length; i++) {
124: _rhs[i] = rhs_parts[i];
125: if (!_rhs[i].is_action()) {
126: ((symbol_part) _rhs[i]).the_symbol().note_use();
127: if (((symbol_part) _rhs[i]).the_symbol() instanceof terminal) {
128: _rhs_prec = ((terminal) ((symbol_part) _rhs[i])
129: .the_symbol()).precedence_num();
130: _rhs_assoc = ((terminal) ((symbol_part) _rhs[i])
131: .the_symbol()).precedence_side();
132: }
133: }
134: }
135:
136: /*now action string is really declaration string, so put it in front!
137: 6/14/96 frankf */
138: if (action_str == null)
139: action_str = "";
140: if (tail_action != null && tail_action.code_string() != null)
141: action_str = action_str + "\t\t"
142: + tail_action.code_string();
143:
144: /* stash the action */
145: _action = new action_part(action_str);
146:
147: /* rewrite production to remove any embedded actions */
148: remove_embedded_actions();
149:
150: /* assign an index */
151: _index = next_index++;
152:
153: /* put us in the global collection of productions */
154: _all.put(new Integer(_index), this );
155:
156: /* put us in the production list of the lhs non terminal */
157: lhs_sym.add_production(this );
158: }
159:
160: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
161:
162: /** Constructor with no action string. */
163: public production(non_terminal lhs_sym,
164: production_part rhs_parts[], int rhs_l)
165: throws internal_error {
166: this (lhs_sym, rhs_parts, rhs_l, null);
167: }
168:
169: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
170:
171: /* Constructor with precedence and associativity of production
172: contextually define */
173: public production(non_terminal lhs_sym,
174: production_part rhs_parts[], int rhs_l, String action_str,
175: int prec_num, int prec_side) throws internal_error {
176: this (lhs_sym, rhs_parts, rhs_l, action_str);
177:
178: /* set the precedence */
179: set_precedence_num(prec_num);
180: set_precedence_side(prec_side);
181: }
182:
183: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
184:
185: /* Constructor w/ no action string and contextual precedence
186: defined */
187: public production(non_terminal lhs_sym,
188: production_part rhs_parts[], int rhs_l, int prec_num,
189: int prec_side) throws internal_error {
190: this (lhs_sym, rhs_parts, rhs_l, null);
191: /* set the precedence */
192: set_precedence_num(prec_num);
193: set_precedence_side(prec_side);
194: }
195:
196: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
197:
198: /*-----------------------------------------------------------*/
199: /*--- (Access to) Static (Class) Variables ------------------*/
200: /*-----------------------------------------------------------*/
201:
202: /** Table of all productions. Elements are stored using their index as
203: * the key.
204: */
205: protected static Hashtable _all = new Hashtable();
206:
207: /** Access to all productions. */
208: public static Enumeration all() {
209: return _all.elements();
210: }
211:
212: /** Lookup a production by index. */
213: public static production find(int indx) {
214: return (production) _all.get(new Integer(indx));
215: }
216:
217: //Hm Added clear to clear all static fields
218: public static void clear() {
219: _all.clear();
220: next_index = 0;
221: }
222:
223: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
224:
225: /** Total number of productions. */
226: public static int number() {
227: return _all.size();
228: }
229:
230: /** Static counter for assigning unique index numbers. */
231: protected static int next_index;
232:
233: /*-----------------------------------------------------------*/
234: /*--- (Access to) Instance Variables ------------------------*/
235: /*-----------------------------------------------------------*/
236:
237: /** The left hand side non-terminal. */
238: protected symbol_part _lhs;
239:
240: /** The left hand side non-terminal. */
241: public symbol_part lhs() {
242: return _lhs;
243: }
244:
245: /** The precedence of the rule */
246: protected int _rhs_prec = -1;
247: protected int _rhs_assoc = -1;
248:
249: /** Access to the precedence of the rule */
250: public int precedence_num() {
251: return _rhs_prec;
252: }
253:
254: public int precedence_side() {
255: return _rhs_assoc;
256: }
257:
258: /** Setting the precedence of a rule */
259: public void set_precedence_num(int prec_num) {
260: _rhs_prec = prec_num;
261: }
262:
263: public void set_precedence_side(int prec_side) {
264: _rhs_assoc = prec_side;
265: }
266:
267: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
268:
269: /** A collection of parts for the right hand side. */
270: protected production_part _rhs[];
271:
272: /** Access to the collection of parts for the right hand side. */
273: public production_part rhs(int indx) throws internal_error {
274: if (indx >= 0 && indx < _rhs_length)
275: return _rhs[indx];
276: else
277: throw new internal_error(
278: "Index out of range for right hand side of production");
279: }
280:
281: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
282:
283: /** How much of the right hand side array we are presently using. */
284: protected int _rhs_length;
285:
286: /** How much of the right hand side array we are presently using. */
287: public int rhs_length() {
288: return _rhs_length;
289: }
290:
291: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
292:
293: /** An action_part containing code for the action to be performed when we
294: * reduce with this production.
295: */
296: protected action_part _action;
297:
298: /** An action_part containing code for the action to be performed when we
299: * reduce with this production.
300: */
301: public action_part action() {
302: return _action;
303: }
304:
305: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
306:
307: /** Index number of the production. */
308: protected int _index;
309:
310: /** Index number of the production. */
311: public int index() {
312: return _index;
313: }
314:
315: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
316:
317: /** Count of number of reductions using this production. */
318: protected int _num_reductions = 0;
319:
320: /** Count of number of reductions using this production. */
321: public int num_reductions() {
322: return _num_reductions;
323: }
324:
325: /** Increment the count of reductions with this non-terminal */
326: public void note_reduction_use() {
327: _num_reductions++;
328: }
329:
330: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
331:
332: /** Is the nullability of the production known or unknown? */
333: protected boolean _nullable_known = false;
334:
335: /** Is the nullability of the production known or unknown? */
336: public boolean nullable_known() {
337: return _nullable_known;
338: }
339:
340: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
341:
342: /** Nullability of the production (can it derive the empty string). */
343: protected boolean _nullable = false;
344:
345: /** Nullability of the production (can it derive the empty string). */
346: public boolean nullable() {
347: return _nullable;
348: }
349:
350: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
351:
352: /** First set of the production. This is the set of terminals that
353: * could appear at the front of some string derived from this production.
354: */
355: protected terminal_set _first_set = new terminal_set();
356:
357: /** First set of the production. This is the set of terminals that
358: * could appear at the front of some string derived from this production.
359: */
360: public terminal_set first_set() {
361: return _first_set;
362: }
363:
364: /*-----------------------------------------------------------*/
365: /*--- Static Methods ----------------------------------------*/
366: /*-----------------------------------------------------------*/
367:
368: /** Determine if a given character can be a label id starter.
369: * @param c the character in question.
370: */
371: protected static boolean is_id_start(char c) {
372: return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
373: || (c == '_');
374:
375: //later need to handle non-8-bit chars here
376: }
377:
378: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
379:
380: /** Determine if a character can be in a label id.
381: * @param c the character in question.
382: */
383: protected static boolean is_id_char(char c) {
384: return is_id_start(c) || (c >= '0' && c <= '9');
385: }
386:
387: /*-----------------------------------------------------------*/
388: /*--- General Methods ---------------------------------------*/
389: /*-----------------------------------------------------------*/
390:
391: /** Return label declaration code
392: * @param labelname the label name
393: * @param stack_type the stack type of label?
394: * @author frankf
395: */
396: protected String make_declaration(String labelname,
397: String stack_type, int offset) {
398: String ret;
399:
400: /* Put in the left/right value labels */
401: if (emit.lr_values())
402: ret = "\t\tint "
403: + labelname
404: + "left = ((java_cup.runtime.Symbol)"
405: + emit.pre("stack")
406: +
407: // TUM 20050917
408: ((offset == 0) ? ".peek()" : (".elementAt("
409: + emit.pre("top") + "-" + offset + ")"))
410: + ").left;\n"
411: + "\t\tint "
412: + labelname
413: + "right = ((java_cup.runtime.Symbol)"
414: + emit.pre("stack")
415: +
416: // TUM 20050917
417: ((offset == 0) ? ".peek()" : (".elementAt("
418: + emit.pre("top") + "-" + offset + ")"))
419: + ").right;\n";
420: else
421: ret = "";
422:
423: /* otherwise, just declare label. */
424: return ret
425: + "\t\t"
426: + stack_type
427: + " "
428: + labelname
429: + " = ("
430: + stack_type
431: + ")(("
432: + "java_cup.runtime.Symbol) "
433: + emit.pre("stack")
434: +
435: // TUM 20050917
436: ((offset == 0) ? ".peek()" : (".elementAt("
437: + emit.pre("top") + "-" + offset + ")"))
438: + ").value;\n";
439:
440: }
441:
442: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
443:
444: /** Declare label names as valid variables within the action string
445: * @param rhs array of RHS parts.
446: * @param rhs_len how much of rhs to consider valid.
447: * @param final_action the final action string of the production.
448: * @param lhs_type the object type associated with the LHS symbol.
449: */
450: protected String declare_labels(production_part rhs[], int rhs_len,
451: String final_action) {
452: String declaration = "";
453:
454: symbol_part part;
455: action_part act_part;
456: int pos;
457:
458: /* walk down the parts and extract the labels */
459: for (pos = 0; pos < rhs_len; pos++) {
460: if (!rhs[pos].is_action()) {
461: part = (symbol_part) rhs[pos];
462:
463: /* if it has a label, make declaration! */
464: if (part.label() != null) {
465: declaration = declaration
466: + make_declaration(part.label(), part
467: .the_symbol().stack_type(), rhs_len
468: - pos - 1);
469: }
470: }
471: }
472: return declaration;
473: }
474:
475: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
476:
477: /** Helper routine to merge adjacent actions in a set of RHS parts
478: * @param rhs_parts array of RHS parts.
479: * @param len amount of that array that is valid.
480: * @return remaining valid length.
481: */
482: protected int merge_adjacent_actions(production_part rhs_parts[],
483: int len) {
484: int from_loc, to_loc, merge_cnt;
485:
486: /* bail out early if we have no work to do */
487: if (rhs_parts == null || len == 0)
488: return 0;
489:
490: merge_cnt = 0;
491: to_loc = -1;
492: for (from_loc = 0; from_loc < len; from_loc++) {
493: /* do we go in the current position or one further */
494: if (to_loc < 0 || !rhs_parts[to_loc].is_action()
495: || !rhs_parts[from_loc].is_action()) {
496: /* next one */
497: to_loc++;
498:
499: /* clear the way for it */
500: if (to_loc != from_loc)
501: rhs_parts[to_loc] = null;
502: }
503:
504: /* if this is not trivial? */
505: if (to_loc != from_loc) {
506: /* do we merge or copy? */
507: if (rhs_parts[to_loc] != null
508: && rhs_parts[to_loc].is_action()
509: && rhs_parts[from_loc].is_action()) {
510: /* merge */
511: rhs_parts[to_loc] = new action_part(
512: ((action_part) rhs_parts[to_loc])
513: .code_string()
514: + ((action_part) rhs_parts[from_loc])
515: .code_string());
516: merge_cnt++;
517: } else {
518: /* copy */
519: rhs_parts[to_loc] = rhs_parts[from_loc];
520: }
521: }
522: }
523:
524: /* return the used length */
525: return len - merge_cnt;
526: }
527:
528: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
529:
530: /** Helper routine to strip a trailing action off rhs and return it
531: * @param rhs_parts array of RHS parts.
532: * @param len how many of those are valid.
533: * @return the removed action part.
534: */
535: protected action_part strip_trailing_action(
536: production_part rhs_parts[], int len) {
537: action_part result;
538:
539: /* bail out early if we have nothing to do */
540: if (rhs_parts == null || len == 0)
541: return null;
542:
543: /* see if we have a trailing action */
544: if (rhs_parts[len - 1].is_action()) {
545: /* snip it out and return it */
546: result = (action_part) rhs_parts[len - 1];
547: rhs_parts[len - 1] = null;
548: return result;
549: } else
550: return null;
551: }
552:
553: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
554:
555: /** Remove all embedded actions from a production by factoring them
556: * out into individual action production using new non terminals.
557: * if the original production was: <pre>
558: * A ::= B {action1} C {action2} D
559: * </pre>
560: * then it will be factored into: <pre>
561: * A ::= B NT$1 C NT$2 D
562: * NT$1 ::= {action1}
563: * NT$2 ::= {action2}
564: * </pre>
565: * where NT$1 and NT$2 are new system created non terminals.
566: */
567:
568: /* the declarations added to the parent production are also passed along,
569: as they should be perfectly valid in this code string, since it
570: was originally a code string in the parent, not on its own.
571: frank 6/20/96 */
572: protected void remove_embedded_actions(
573:
574: ) throws internal_error {
575: non_terminal new_nt;
576: production new_prod;
577: String declare_str;
578: int lastLocation = -1;
579: /* walk over the production and process each action */
580: for (int act_loc = 0; act_loc < rhs_length(); act_loc++)
581: if (rhs(act_loc).is_action()) {
582:
583: declare_str = declare_labels(_rhs, act_loc, "");
584: /* create a new non terminal for the action production */
585: new_nt = non_terminal.create_new(null, lhs()
586: .the_symbol().stack_type()); // TUM 20060608 embedded actions patch
587: new_nt.is_embedded_action = true; /* 24-Mar-1998, CSA */
588:
589: /* create a new production with just the action */
590: new_prod = new action_production(this , new_nt, null, 0,
591: declare_str
592: + ((action_part) rhs(act_loc))
593: .code_string(),
594: (lastLocation == -1) ? -1
595: : (act_loc - lastLocation));
596:
597: /* replace the action with the generated non terminal */
598: _rhs[act_loc] = new symbol_part(new_nt);
599: lastLocation = act_loc;
600: }
601: }
602:
603: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
604:
605: /** Check to see if the production (now) appears to be nullable.
606: * A production is nullable if its RHS could derive the empty string.
607: * This results when the RHS is empty or contains only non terminals
608: * which themselves are nullable.
609: */
610: public boolean check_nullable() throws internal_error {
611: production_part part;
612: symbol sym;
613: int pos;
614:
615: /* if we already know bail out early */
616: if (nullable_known())
617: return nullable();
618:
619: /* if we have a zero size RHS we are directly nullable */
620: if (rhs_length() == 0) {
621: /* stash and return the result */
622: return set_nullable(true);
623: }
624:
625: /* otherwise we need to test all of our parts */
626: for (pos = 0; pos < rhs_length(); pos++) {
627: part = rhs(pos);
628:
629: /* only look at non-actions */
630: if (!part.is_action()) {
631: sym = ((symbol_part) part).the_symbol();
632:
633: /* if its a terminal we are definitely not nullable */
634: if (!sym.is_non_term())
635: return set_nullable(false);
636: /* its a non-term, is it marked nullable */
637: else if (!((non_terminal) sym).nullable())
638: /* this one not (yet) nullable, so we aren't */
639: return false;
640: }
641: }
642:
643: /* if we make it here all parts are nullable */
644: return set_nullable(true);
645: }
646:
647: /** set (and return) nullability */
648: boolean set_nullable(boolean v) {
649: _nullable_known = true;
650: _nullable = v;
651: return v;
652: }
653:
654: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
655:
656: /** Update (and return) the first set based on current NT firsts.
657: * This assumes that nullability has already been computed for all non
658: * terminals and productions.
659: */
660: public terminal_set check_first_set() throws internal_error {
661: int part;
662: symbol sym;
663:
664: /* walk down the right hand side till we get past all nullables */
665: for (part = 0; part < rhs_length(); part++) {
666: /* only look at non-actions */
667: if (!rhs(part).is_action()) {
668: sym = ((symbol_part) rhs(part)).the_symbol();
669:
670: /* is it a non-terminal?*/
671: if (sym.is_non_term()) {
672: /* add in current firsts from that NT */
673: _first_set.add(((non_terminal) sym).first_set());
674:
675: /* if its not nullable, we are done */
676: if (!((non_terminal) sym).nullable())
677: break;
678: } else {
679: /* its a terminal -- add that to the set */
680: _first_set.add((terminal) sym);
681:
682: /* we are done */
683: break;
684: }
685: }
686: }
687:
688: /* return our updated first set */
689: return first_set();
690: }
691:
692: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
693:
694: /** Equality comparison. */
695: public boolean equals(production other) {
696: if (other == null)
697: return false;
698: return other._index == _index;
699: }
700:
701: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
702:
703: /** Generic equality comparison. */
704: public boolean equals(Object other) {
705: if (!(other instanceof production))
706: return false;
707: else
708: return equals((production) other);
709: }
710:
711: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
712:
713: /** Produce a hash code. */
714: public int hashCode() {
715: /* just use a simple function of the index */
716: return _index * 13;
717: }
718:
719: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
720:
721: /** Convert to a string. */
722: public String toString() {
723: String result;
724:
725: /* catch any internal errors */
726: try {
727: result = "production [" + index() + "]: ";
728: result += ((lhs() != null) ? lhs().toString()
729: : "$$NULL-LHS$$");
730: result += " :: = ";
731: for (int i = 0; i < rhs_length(); i++)
732: result += rhs(i) + " ";
733: result += ";";
734: if (action() != null && action().code_string() != null)
735: result += " {" + action().code_string() + "}";
736:
737: if (nullable_known())
738: if (nullable())
739: result += "[NULLABLE]";
740: else
741: result += "[NOT NULLABLE]";
742: } catch (internal_error e) {
743: /* crash on internal error since we can't throw it from here (because
744: superclass does not throw anything. */
745: e.crash();
746: result = null;
747: }
748:
749: return result;
750: }
751:
752: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
753:
754: /** Convert to a simpler string. */
755: public String to_simple_string() throws internal_error {
756: String result;
757:
758: result = ((lhs() != null) ? lhs().the_symbol().name()
759: : "NULL_LHS");
760: result += " ::= ";
761: for (int i = 0; i < rhs_length(); i++)
762: if (!rhs(i).is_action())
763: result += ((symbol_part) rhs(i)).the_symbol().name()
764: + " ";
765:
766: return result;
767: }
768:
769: /*-----------------------------------------------------------*/
770:
771: }
|