ruby constants for strings (should this be moved somewhere else?)
Additional context surrounding tokens that both the lexer and grammar use.
Stream of data that yylex examines.
Last token read via yylex.
What handles warnings
Value of last token which had a value associated with it.
# File lib/ruby_lexer.rb, line 204 def initialize self.cond = StackState.new(:cond) self.cmdarg = StackState.new(:cmdarg) self.nest = 0 @comments = [] reset end
How the parser advances to the next token.
@return true if not at end of file (EOF).
# File lib/ruby_lexer.rb, line 53 def advance r = yylex self.token = r raise "yylex returned nil" unless r return RubyLexer::EOF != r end
# File lib/ruby_lexer.rb, line 62 def arg_ambiguous self.warning("Ambiguous first argument. make sure.") end
# File lib/ruby_lexer.rb, line 66 def comments c = @comments.join @comments.clear c end
# File lib/ruby_lexer.rb, line 72 def expr_beg_push val cond.push false cmdarg.push false self.lex_state = :expr_beg self.yacc_value = val end
# File lib/ruby_lexer.rb, line 79 def fix_arg_lex_state self.lex_state = if lex_state == :expr_fname || lex_state == :expr_dot :expr_arg else :expr_beg end end
# File lib/ruby_lexer.rb, line 87 def heredoc here # 63 lines _, eos, func, last_line = here indent = (func & STR_FUNC_INDENT) != 0 expand = (func & STR_FUNC_EXPAND) != 0 eos_re = indent ? %r[ \t]*#{eos}(\r?\n|\z)/ : %r#{eos}(\r?\n|\z)/ err_msg = "can't match #{eos_re.inspect} anywhere in " rb_compile_error err_msg if src.eos? if src.beginning_of_line? && src.scan(eos_re) then src.unread_many last_line # TODO: figure out how to remove this self.yacc_value = eos return :tSTRING_END end self.string_buffer = [] if expand then case when src.scan(%r#[$@]/) then src.pos -= 1 # FIX omg stupid self.yacc_value = src.matched return :tSTRING_DVAR when src.scan(%r#[{]/) then self.yacc_value = src.matched return :tSTRING_DBEG when src.scan(%r#/) then string_buffer << '#' end until src.scan(eos_re) do c = tokadd_string func, "\n", nil rb_compile_error err_msg if c == RubyLexer::EOF if c != "\n" then self.yacc_value = string_buffer.join.delete("\r") return :tSTRING_CONTENT else string_buffer << src.scan(%r\n/) end rb_compile_error err_msg if src.eos? end # tack on a NL after the heredoc token - FIX NL should not be needed src.unread_many(eos + "\n") # TODO: remove this... stupid stupid stupid else until src.check(eos_re) do string_buffer << src.scan(%r.*(\n|\z)/) rb_compile_error err_msg if src.eos? end end self.lex_strterm = [:heredoc, eos, func, last_line] self.yacc_value = string_buffer.join.delete("\r") return :tSTRING_CONTENT end
# File lib/ruby_lexer.rb, line 152 def heredoc_identifier # 51 lines term, func = nil, STR_FUNC_BORING self.string_buffer = [] case when src.scan(%r(-?)(['"`])(.*?)\22//) then term = src[2] unless src[1].empty? then func |= STR_FUNC_INDENT end func |= case term when "\'" then STR_SQUOTE when '"' then STR_DQUOTE else STR_XQUOTE end string_buffer << src[3] when src.scan(%r-?(['"`])(?!\11**\Z)/) then rb_compile_error "unterminated here document identifier" when src.scan(%r(-?)(\w+)/) then term = '"' func |= STR_DQUOTE unless src[1].empty? then func |= STR_FUNC_INDENT end string_buffer << src[2] else return nil end if src.check(%r.*\n/) then # TODO: think about storing off the char range instead line = src.string[src.pos, src.matched_size] src.string[src.pos, src.matched_size] = "\n" src.pos += 1 else line = nil end self.lex_strterm = [:heredoc, string_buffer.join, func, line] if term == '`' then self.yacc_value = "`" return :tXSTRING_BEG else self.yacc_value = "\"" return :tSTRING_BEG end end
# File lib/ruby_lexer.rb, line 213 def int_with_base base rb_compile_error "Invalid numeric format" if src.matched =~ %r__/ self.yacc_value = src.matched.to_i(base) return :tINTEGER end
# File lib/ruby_lexer.rb, line 219 def lex_state= o raise "wtf?" unless Symbol === o @lex_state = o end
# File lib/ruby_lexer.rb, line 225 def lineno @lineno ||= src.lineno end
Parse a number from the input stream.
@param c The first character of the number. @return A int constant wich represents a token.
# File lib/ruby_lexer.rb, line 235 def parse_number self.lex_state = :expr_end case when src.scan(%r[+-]?0[xbd]\b/) then rb_compile_error "Invalid numeric format" when src.scan(%r[+-]?0x[a-f0-9_]+/) then int_with_base(16) when src.scan(%r[+-]?0b[01_]+/) then int_with_base(2) when src.scan(%r[+-]?0d[0-9_]+/) then int_with_base(10) when src.scan(%r[+-]?0[Oo]?[0-7_]*[89]/) then rb_compile_error "Illegal octal digit." when src.scan(%r[+-]?0[Oo]?[0-7_]+|0[Oo]/) then int_with_base(8) when src.scan(%r[+-]?[\d_]+_(e|\.)/) then rb_compile_error "Trailing '_' in number." when src.scan(%r[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then number = src.matched if number =~ %r__/ then rb_compile_error "Invalid numeric format" end self.yacc_value = number.to_f :tFLOAT when src.scan(%r[+-]?0\b/) then int_with_base(10) when src.scan(%r[+-]?[\d_]+\b/) then int_with_base(10) else rb_compile_error "Bad number format" end end
# File lib/ruby_lexer.rb, line 269 def parse_quote # 58 lines beg, nnd, short_hand, c = nil, nil, false, nil if src.scan(%r[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}). rb_compile_error "unknown type of %string" if src.matched_size == 2 c, beg, short_hand = src.matched, src.getch, false else # Short-hand (e.g. %{, %., %!, etc) c, beg, short_hand = 'Q', src.getch, true end if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then rb_compile_error "unterminated quoted string meets end of file" end # Figure nnd-char. "\0" is special to indicate beg=nnd and that no nesting? nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg] nnd, beg = beg, "\00"" if nnd.nil? token_type, self.yacc_value = nil, "%#{c}#{beg}" token_type, string_type = case c when 'Q' then ch = short_hand ? nnd : c + beg self.yacc_value = "%#{ch}" [:tSTRING_BEG, STR_DQUOTE] when 'q' then [:tSTRING_BEG, STR_SQUOTE] when 'W' then src.scan(%r\s*/) [:tWORDS_BEG, STR_DQUOTE | STR_FUNC_AWORDS] when 'w' then src.scan(%r\s*/) [:tAWORDS_BEG, STR_SQUOTE | STR_FUNC_AWORDS] when 'x' then [:tXSTRING_BEG, STR_XQUOTE] when 'r' then [:tREGEXP_BEG, STR_REGEXP] when 's' then self.lex_state = :expr_fname [:tSYMBEG, STR_SSYM] end rb_compile_error "Bad %string type. Expected [Qqwxr\W], found '#{c}'." if token_type.nil? self.lex_strterm = [:strterm, string_type, nnd, beg] return token_type end
# File lib/ruby_lexer.rb, line 318 def parse_string(quote) # 65 lines _, string_type, term, open = quote space = false # FIX: remove these func = string_type paren = open term_re = Regexp.escape term awords = (func & STR_FUNC_AWORDS) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 expand = (func & STR_FUNC_EXPAND) != 0 unless func then # FIX: impossible, prolly needs == 0 self.lineno = nil return :tSTRING_END end space = true if awords and src.scan(%r\s+/) if self.nest == 0 && src.scan(%r#{term_re}/) then if awords then quote[1] = nil return :tSPACE elsif regexp then self.yacc_value = self.regx_options self.lineno = nil return :tREGEXP_END else self.yacc_value = term self.lineno = nil return :tSTRING_END end end if space then return :tSPACE end self.string_buffer = [] if expand case when src.scan(%r#(?=[$@])/) then return :tSTRING_DVAR when src.scan(%r#[{]/) then return :tSTRING_DBEG when src.scan(%r#/) then string_buffer << '#' end end if tokadd_string(func, term, paren) == RubyLexer::EOF then rb_compile_error "unterminated string meets end of file" end self.yacc_value = string_buffer.join return :tSTRING_CONTENT end
# File lib/ruby_lexer.rb, line 1232 def process_token(command_state) token << src.matched if token =~ %r^\w/ && src.scan(%r[\!\?](?!=)/) result = nil last_state = lex_state case token when %r^\$/ then self.lex_state, result = :expr_end, :tGVAR when %r^@@/ then self.lex_state, result = :expr_end, :tCVAR when %r^@/ then self.lex_state, result = :expr_end, :tIVAR else if token =~ %r[!?]$/ then result = :tFID else if lex_state == :expr_fname then # ident=, not =~ => == or followed by => # TODO test lexing of a=>b vs a==>b if src.scan(%r=(?:(?![~>=])|(?==>))/) then result = :tIDENTIFIER token << src.matched end end result ||= if token =~ %r^[A-Z]/ then :tCONSTANT else :tIDENTIFIER end end unless lex_state == :expr_dot then # See if it is a reserved word. keyword = Keyword.keyword token if keyword then state = lex_state self.lex_state = keyword.state self.yacc_value = token if keyword.id0 == :kDO then self.command_start = true return :kDO_COND if cond.is_in_state return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg return :kDO_BLOCK if state == :expr_endarg return :kDO end return keyword.id0 if state == :expr_beg self.lex_state = :expr_beg if keyword.id0 != keyword.id1 return keyword.id1 end end if (lex_state == :expr_beg || lex_state == :expr_mid || lex_state == :expr_dot || lex_state == :expr_arg || lex_state == :expr_cmdarg) then if command_state then self.lex_state = :expr_cmdarg else self.lex_state = :expr_arg end else self.lex_state = :expr_end end end self.yacc_value = token self.lex_state = :expr_end if last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar return result end
# File lib/ruby_lexer.rb, line 379 def rb_compile_error msg msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}" raise SyntaxError, msg end
# File lib/ruby_lexer.rb, line 384 def read_escape # 51 lines case when src.scan(%r\\/) then # Backslash '\' when src.scan(%rn/) then # newline "\n" when src.scan(%rt/) then # horizontal tab "\t" when src.scan(%rr/) then # carriage-return "\r" when src.scan(%rf/) then # form-feed "\f" when src.scan(%rv/) then # vertical tab "\113"" when src.scan(%ra/) then # alarm(bell) "\0007" when src.scan(%re/) then # escape "\0033" when src.scan(%rb/) then # backspace "\0010" when src.scan(%rs/) then # space " " when src.scan(%r[0-7]{1,3}/) then # octal constant src.matched.to_i(8).chr when src.scan(%rx([0-9a-fA-F]{1,2})/) then # hex constant src[1].to_i(16).chr when src.check(%rM-\\[\\MCc]/) then src.scan(%rM-\\/) # eat it c = self.read_escape c[0] = (c[0].ord | 0x80).chr c when src.scan(%rM-(.)/) then c = src[1] c[0] = (c[0].ord | 0x80).chr c when src.check(%r(C-|c)\\[\\MCc]/) then src.scan(%r(C-|c)\\/) # eat it c = self.read_escape c[0] = (c[0].ord & 0x9f).chr c when src.scan(%rC-\?|c\?/) then 127.chr when src.scan(%r(C-|c)(.)/) then c = src[2] c[0] = (c[0].ord & 0x9f).chr c when src.scan(%r[McCx0-9]/) || src.eos? then rb_compile_error("Invalid escape character syntax") else src.getch end end
# File lib/ruby_lexer.rb, line 437 def regx_options # 15 lines good, bad = [], [] if src.scan(%r[a-z]+/) then good, bad = src.matched.split(%r/).partition { |s| s =~ %r^[ixmonesu]$/ } end unless bad.empty? then rb_compile_error("unknown regexp option%s - %s" % [(bad.size > 1 ? "s" : ""), bad.join.inspect]) end return good.join end
# File lib/ruby_lexer.rb, line 452 def reset self.command_start = true self.lex_strterm = nil self.token = nil self.yacc_value = nil @src = nil @lex_state = nil end
# File lib/ruby_lexer.rb, line 462 def src= src raise "bad src: #{src.inspect}" unless String === src @src = RPStringScanner.new(src) end
# File lib/ruby_lexer.rb, line 467 def tokadd_escape term # 20 lines case when src.scan(%r\\\n/) then # just ignore when src.scan(%r\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then self.string_buffer << src.matched when src.scan(%r\\([MC]-|c)(?=\\)/) then self.string_buffer << src.matched self.tokadd_escape term when src.scan(%r\\([MC]-|c)(.)/) then self.string_buffer << src.matched when src.scan(%r\\[McCx]/) then rb_compile_error "Invalid escape character syntax" when src.scan(%r\\(.)/) then self.string_buffer << src.matched else rb_compile_error "Invalid escape character syntax" end end
# File lib/ruby_lexer.rb, line 487 def tokadd_string(func, term, paren) # 105 lines awords = (func & STR_FUNC_AWORDS) != 0 escape = (func & STR_FUNC_ESCAPE) != 0 expand = (func & STR_FUNC_EXPAND) != 0 regexp = (func & STR_FUNC_REGEXP) != 0 symbol = (func & STR_FUNC_SYMBOL) != 0 paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren)) term_re = Regexp.new(Regexp.escape(term)) until src.eos? do c = nil handled = true case when self.nest == 0 && src.scan(term_re) then src.pos -= 1 break when paren_re && src.scan(paren_re) then self.nest += 1 when src.scan(term_re) then self.nest -= 1 when awords && src.scan(%r\s/) then src.pos -= 1 break when expand && src.scan(%r#(?=[\$\@\{])/) then src.pos -= 1 break when expand && src.scan(%r#(?!\n)/) then # do nothing when src.check(%r\\/) then case when awords && src.scan(%r\\\n/) then string_buffer << "\n" next when awords && src.scan(%r\\\s/) then c = ' ' when expand && src.scan(%r\\\n/) then next when regexp && src.check(%r\\/) then self.tokadd_escape term next when expand && src.scan(%r\\/) then c = self.read_escape when src.scan(%r\\\n/) then # do nothing when src.scan(%r\\\\/) then string_buffer << '\' if escape c = '\' when src.scan(%r\\/) then unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then string_buffer << "\\" end else handled = false end else handled = false end # case unless handled then t = Regexp.escape term x = Regexp.escape(paren) if paren && paren != "\0000" re = if awords then %r[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever else %r[^#{t}#{x}\#\00\\\]+|./ end src.scan re c = src.matched rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ %r\00// end # unless handled c ||= src.matched string_buffer << c end # until c ||= src.matched c = RubyLexer::EOF if src.eos? return c end
# File lib/ruby_lexer.rb, line 573 def unescape s r = { "a" => "\0007", "b" => "\0010", "e" => "\0033", "f" => "\f", "n" => "\n", "r" => "\r", "s" => " ", "t" => "\t", "v" => "\113"", "\\" => '\', "\n" => "", "C-\?" => 127.chr, "c\?" => 127.chr, }[s] return r if r case s when %r^[0-7]{1,3}/ then $&.to_i(8).chr when %r^x([0-9a-fA-F]{1,2})/ then $1.to_i(16).chr when %r^M-(.)/ then ($1[0].ord | 0x80).chr when %r^(C-|c)(.)/ then ($2[0].ord & 0x9f).chr when %r^[McCx0-9]/ then rb_compile_error("Invalid escape character syntax") else s end end
# File lib/ruby_lexer.rb, line 609 def warning s # do nothing for now end
Returns the next token. Also sets yy_val is needed.
@return Description of the Returned Value
# File lib/ruby_lexer.rb, line 618 def yylex # 826 lines c = '' space_seen = false command_state = false src = self.src self.token = nil self.yacc_value = nil return yylex_string if lex_strterm command_state = self.command_start self.command_start = false last_state = lex_state loop do # START OF CASE if src.scan(%r\ |\t|\r|\f|\113//) then # white spaces, 13 = '\v space_seen = true next elsif src.check(%r[^a-zA-Z]/) then if src.scan(%r\n|#/) then self.lineno = nil c = src.matched if c == '#' then src.unread c # ok while src.scan(%r\s*#.*(\n+|\z)/) do @comments << src.matched.gsub(%r^ +#/, '#').gsub(%r^ +$/, '') end if src.eos? then return RubyLexer::EOF end end # Replace a string of newlines with a single one src.scan(%r\n+/) if [:expr_beg, :expr_fname, :expr_dot, :expr_class].include? lex_state then next end self.command_start = true self.lex_state = :expr_beg return :tNL elsif src.scan(%r[\]\)\}]/) then cond.lexpop cmdarg.lexpop self.lex_state = :expr_end self.yacc_value = src.matched result = { ")" => :tRPAREN, "]" => :tRBRACK, "}" => :tRCURLY }[src.matched] return result elsif src.check(%r\./) then if src.scan(%r\.\.\./) then self.lex_state = :expr_beg self.yacc_value = "..." return :tDOT3 elsif src.scan(%r\.\./) then self.lex_state = :expr_beg self.yacc_value = ".." return :tDOT2 elsif src.scan(%r\.\d/) then rb_compile_error "no .<digit> floating literal anymore put 0 before dot" elsif src.scan(%r\./) then self.lex_state = :expr_dot self.yacc_value = "." return :tDOT end elsif src.scan(%r\,/) then self.lex_state = :expr_beg self.yacc_value = "," return :tCOMMA elsif src.scan(%r\(/) then result = :tLPAREN2 self.command_start = true if lex_state == :expr_beg || lex_state == :expr_mid then result = :tLPAREN elsif space_seen then if lex_state == :expr_cmdarg then result = :tLPAREN_ARG elsif lex_state == :expr_arg then warning("don't put space before argument parentheses") result = :tLPAREN2 end end self.expr_beg_push "(" return result elsif src.check(%r\=/) then if src.scan(%r\=\=\=/) then self.fix_arg_lex_state self.yacc_value = "===" return :tEQQ elsif src.scan(%r\=\=/) then self.fix_arg_lex_state self.yacc_value = "==" return :tEQ elsif src.scan(%r\=~/) then self.fix_arg_lex_state self.yacc_value = "=~" return :tMATCH elsif src.scan(%r\=>/) then self.fix_arg_lex_state self.yacc_value = "=>" return :tASSOC elsif src.scan(%r\=/) then if src.was_begin_of_line and src.scan(%rbegin(?=\s)/) then @comments << '=' << src.matched unless src.scan(%r.*?\n=end\s*(\n|\z)/) then @comments.clear rb_compile_error("embedded document meets end of file") end @comments << src.matched next else self.fix_arg_lex_state self.yacc_value = '=' return :tEQL end end elsif src.scan(%r\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 } self.lex_state = :expr_end return :tSTRING elsif src.scan(%r\"/) then # FALLBACK self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this self.yacc_value = "\"" return :tSTRING_BEG elsif src.scan(%r\@\@?\w*/) then self.token = src.matched rb_compile_error "`#{token}` is not allowed as a variable name" if token =~ %r\@\d/ return process_token(command_state) elsif src.scan(%r\:\:/) then if (lex_state == :expr_beg || lex_state == :expr_mid || lex_state == :expr_class || (lex_state.is_argument && space_seen)) then self.lex_state = :expr_beg self.yacc_value = "::" return :tCOLON3 end self.lex_state = :expr_dot self.yacc_value = "::" return :tCOLON2 elsif lex_state != :expr_end && lex_state != :expr_endarg && src.scan(%r:([a-zA-Z_]\w*(?:[?!]|=(?!>))?)/) then self.yacc_value = src[1] self.lex_state = :expr_end return :tSYMBOL elsif src.scan(%r\:/) then # ?: / then / when if (lex_state == :expr_end || lex_state == :expr_endarg|| src.check(%r\s/)) then self.lex_state = :expr_beg self.yacc_value = ":" return :tCOLON end case when src.scan(%r\'/) then self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""] when src.scan(%r\"/) then self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""] end self.lex_state = :expr_fname self.yacc_value = ":" return :tSYMBEG elsif src.check(%r[0-9]/) then return parse_number elsif src.scan(%r\[/) then result = src.matched if lex_state == :expr_fname || lex_state == :expr_dot then self.lex_state = :expr_arg case when src.scan(%r\]\=/) then self.yacc_value = "[]=" return :tASET when src.scan(%r\]/) then self.yacc_value = "[]" return :tAREF else rb_compile_error "unexpected '['" end elsif lex_state == :expr_beg || lex_state == :expr_mid then result = :tLBRACK elsif lex_state.is_argument && space_seen then result = :tLBRACK end self.expr_beg_push "[" return result elsif src.scan(%r\'(\\.|[^\'])*\'/) then self.yacc_value = src.matched[1..-2].gsub(%r\\\\/, "\\").gsub(%r\\'/, "'") self.lex_state = :expr_end return :tSTRING elsif src.check(%r\|/) then if src.scan(%r\|\|\=/) then self.lex_state = :expr_beg self.yacc_value = "||" return :tOP_ASGN elsif src.scan(%r\|\|/) then self.lex_state = :expr_beg self.yacc_value = "||" return :tOROP elsif src.scan(%r\|\=/) then self.lex_state = :expr_beg self.yacc_value = "|" return :tOP_ASGN elsif src.scan(%r\|/) then self.fix_arg_lex_state self.yacc_value = "|" return :tPIPE end elsif src.scan(%r\{/) then result = if lex_state.is_argument || lex_state == :expr_end then :tLCURLY # block (primary) elsif lex_state == :expr_endarg then :tLBRACE_ARG # block (expr) else :tLBRACE # hash end self.expr_beg_push "{" return result elsif src.scan(%r[+-]/) then sign = src.matched utype, type = if sign == "+" then [:tUPLUS, :tPLUS] else [:tUMINUS, :tMINUS] end if lex_state == :expr_fname || lex_state == :expr_dot then self.lex_state = :expr_arg if src.scan(%r@/) then self.yacc_value = "#{sign}@" return utype else self.yacc_value = sign return type end end if src.scan(%r\=/) then self.lex_state = :expr_beg self.yacc_value = sign return :tOP_ASGN end if (lex_state == :expr_beg || lex_state == :expr_mid || (lex_state.is_argument && space_seen && !src.check(%r\s/))) then if lex_state.is_argument then arg_ambiguous end self.lex_state = :expr_beg self.yacc_value = sign if src.check(%r\d/) then if utype == :tUPLUS then return self.parse_number else return :tUMINUS_NUM end end return utype end self.lex_state = :expr_beg self.yacc_value = sign return type elsif src.check(%r\*/) then if src.scan(%r\*\*=/) then self.lex_state = :expr_beg self.yacc_value = "**" return :tOP_ASGN elsif src.scan(%r\*\*/) then self.yacc_value = "**" self.fix_arg_lex_state return :tPOW elsif src.scan(%r\*\=/) then self.lex_state = :expr_beg self.yacc_value = "*" return :tOP_ASGN elsif src.scan(%r\*/) then result = if lex_state.is_argument && space_seen && src.check(%r\S/) then warning("`*' interpreted as argument prefix") :tSTAR elsif lex_state == :expr_beg || lex_state == :expr_mid then :tSTAR else :tSTAR2 end self.yacc_value = "*" self.fix_arg_lex_state return result end elsif src.check(%r\!/) then if src.scan(%r\!\=/) then self.lex_state = :expr_beg self.yacc_value = "!=" return :tNEQ elsif src.scan(%r\!~/) then self.lex_state = :expr_beg self.yacc_value = "!~" return :tNMATCH elsif src.scan(%r\!/) then self.lex_state = :expr_beg self.yacc_value = "!" return :tBANG end elsif src.check(%r\</) then if src.scan(%r\<\=\>/) then self.fix_arg_lex_state self.yacc_value = "<=>" return :tCMP elsif src.scan(%r\<\=/) then self.fix_arg_lex_state self.yacc_value = "<=" return :tLEQ elsif src.scan(%r\<\<\=/) then self.fix_arg_lex_state self.lex_state = :expr_beg self.yacc_value = "\<\<" return :tOP_ASGN elsif src.scan(%r\<\</) then if (! [:expr_end, :expr_dot, :expr_endarg, :expr_class].include?(lex_state) && (!lex_state.is_argument || space_seen)) then tok = self.heredoc_identifier if tok then return tok end end self.fix_arg_lex_state self.yacc_value = "\<\<" return :tLSHFT elsif src.scan(%r\</) then self.fix_arg_lex_state self.yacc_value = "<" return :tLT end elsif src.check(%r\>/) then if src.scan(%r\>\=/) then self.fix_arg_lex_state self.yacc_value = ">=" return :tGEQ elsif src.scan(%r\>\>=/) then self.fix_arg_lex_state self.lex_state = :expr_beg self.yacc_value = ">>" return :tOP_ASGN elsif src.scan(%r\>\>/) then self.fix_arg_lex_state self.yacc_value = ">>" return :tRSHFT elsif src.scan(%r\>/) then self.fix_arg_lex_state self.yacc_value = ">" return :tGT end elsif src.scan(%r\`/) then self.yacc_value = "`" case lex_state when :expr_fname then self.lex_state = :expr_end return :tBACK_REF2 when :expr_dot then self.lex_state = if command_state then :expr_cmdarg else :expr_arg end return :tBACK_REF2 end self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""] return :tXSTRING_BEG elsif src.scan(%r\?/) then if lex_state == :expr_end || lex_state == :expr_endarg then self.lex_state = :expr_beg self.yacc_value = "?" return :tEH end if src.eos? then rb_compile_error "incomplete character syntax" end if src.check(%r\s|\v/) then unless lex_state.is_argument then c2 = { " " => 's', "\n" => 'n', "\t" => 't', "\v" => 'v', "\r" => 'r', "\f" => 'f' }[src.matched] if c2 then warning("invalid character syntax; use ?\\" + c2) end end # ternary self.lex_state = :expr_beg self.yacc_value = "?" return :tEH elsif src.check(%r\w(?=\w)/) then # ternary, also self.lex_state = :expr_beg self.yacc_value = "?" return :tEH end c = if src.scan(%r\\/) then self.read_escape else src.getch end self.lex_state = :expr_end self.yacc_value = c[0].ord & 0xff return :tINTEGER elsif src.check(%r\&/) then if src.scan(%r\&\&\=/) then self.yacc_value = "&&" self.lex_state = :expr_beg return :tOP_ASGN elsif src.scan(%r\&\&/) then self.lex_state = :expr_beg self.yacc_value = "&&" return :tANDOP elsif src.scan(%r\&\=/) then self.yacc_value = "&" self.lex_state = :expr_beg return :tOP_ASGN elsif src.scan(%r&/) then result = if lex_state.is_argument && space_seen && !src.check(%r\s/) then warning("`&' interpreted as argument prefix") :tAMPER elsif lex_state == :expr_beg || lex_state == :expr_mid then :tAMPER else :tAMPER2 end self.fix_arg_lex_state self.yacc_value = "&" return result end elsif src.scan(%r\//) then if lex_state == :expr_beg || lex_state == :expr_mid then self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] self.yacc_value = "/" return :tREGEXP_BEG end if src.scan(%r\=/) then self.yacc_value = "/" self.lex_state = :expr_beg return :tOP_ASGN end if lex_state.is_argument && space_seen then unless src.scan(%r\s/) then arg_ambiguous self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""] self.yacc_value = "/" return :tREGEXP_BEG end end self.fix_arg_lex_state self.yacc_value = "/" return :tDIVIDE elsif src.scan(%r\^=/) then self.lex_state = :expr_beg self.yacc_value = "^" return :tOP_ASGN elsif src.scan(%r\^/) then self.fix_arg_lex_state self.yacc_value = "^" return :tCARET elsif src.scan(%r\;/) then self.command_start = true self.lex_state = :expr_beg self.yacc_value = ";" return :tSEMI elsif src.scan(%r\~/) then if lex_state == :expr_fname || lex_state == :expr_dot then src.scan(%r@/) end self.fix_arg_lex_state self.yacc_value = "~" return :tTILDE elsif src.scan(%r\\/) then if src.scan(%r\n/) then self.lineno = nil space_seen = true next end rb_compile_error "bare backslash only allowed before newline" elsif src.scan(%r\%/) then if lex_state == :expr_beg || lex_state == :expr_mid then return parse_quote end if src.scan(%r\=/) then self.lex_state = :expr_beg self.yacc_value = "%" return :tOP_ASGN end if lex_state.is_argument && space_seen && ! src.check(%r\s/) then return parse_quote end self.fix_arg_lex_state self.yacc_value = "%" return :tPERCENT elsif src.check(%r\$/) then if src.scan(%r(\$_)(\w+)/) then self.lex_state = :expr_end self.token = src.matched return process_token(command_state) elsif src.scan(%r\$_/) then self.lex_state = :expr_end self.token = src.matched self.yacc_value = src.matched return :tGVAR elsif src.scan(%r\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then self.lex_state = :expr_end self.yacc_value = src.matched return :tGVAR elsif src.scan(%r\$([\&\`\'\+])/) then self.lex_state = :expr_end # Explicit reference to these vars as symbols... if last_state == :expr_fname then self.yacc_value = src.matched return :tGVAR else self.yacc_value = src[1].to_sym return :tBACK_REF end elsif src.scan(%r\$([1-9]\d*)/) then self.lex_state = :expr_end if last_state == :expr_fname then self.yacc_value = src.matched return :tGVAR else self.yacc_value = src[1].to_i return :tNTH_REF end elsif src.scan(%r\$0/) then self.lex_state = :expr_end self.token = src.matched return process_token(command_state) elsif src.scan(%r\$\W|\$\z/) then # TODO: remove? self.lex_state = :expr_end self.yacc_value = "$" return "$" elsif src.scan(%r\$\w+/) self.lex_state = :expr_end self.token = src.matched return process_token(command_state) end elsif src.check(%r\_/) then if src.beginning_of_line? && src.scan(%r\__END__(\n|\Z)/) then self.lineno = nil return RubyLexer::EOF elsif src.scan(%r\_\w*/) then self.token = src.matched return process_token(command_state) end end end # END OF CASE if src.scan(%r\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF return RubyLexer::EOF else # alpha check if src.scan(%r\W/) then rb_compile_error "Invalid char #{src.matched.inspect} in expression" end end self.token = src.matched if self.src.scan(%r\w+/) return process_token(command_state) end end
# File lib/ruby_lexer.rb, line 1314 def yylex_string # 23 lines token = if lex_strterm[0] == :heredoc then self.heredoc lex_strterm else self.parse_string lex_strterm end if token == :tSTRING_END || token == :tREGEXP_END then self.lineno = nil self.lex_strterm = nil self.lex_state = :expr_end end return token end