A tokenizer for the Ruby language. It recognizes all common syntax (and some less common syntax) but because it is not a true lexer, it will make mistakes on some ambiguous cases.
The list of all identifiers recognized as keywords.
Perform ruby-specific setup
# File lib/syntax/lang/ruby.rb, line 18 def setup @selector = false @allow_operator = false @heredocs = [] end
Step through a single iteration of the tokenization process.
# File lib/syntax/lang/ruby.rb, line 25 def step case when bol? && check( %r=begin/ ) start_group( :comment, scan_until( %r^=end#{EOL}/ ) ) when bol? && check( %r__END__#{EOL}/ ) start_group( :comment, scan_until( %r\Z/ ) ) else case when check( %rdef\s+/ ) start_group :keyword, scan( %rdef\s+/ ) start_group :method, scan_until( %r(?=[;(\s]|#{EOL})/ ) when check( %rclass\s+/ ) start_group :keyword, scan( %rclass\s+/ ) start_group :class, scan_until( %r(?=[;\s<]|#{EOL})/ ) when check( %rmodule\s+/ ) start_group :keyword, scan( %rmodule\s+/ ) start_group :module, scan_until( %r(?=[;\s]|#{EOL})/ ) when check( %r::/ ) start_group :punct, scan(%r::/) when check( %r:"/ ) start_group :symbol, scan(%r:/) scan_delimited_region :symbol, :symbol, "", true @allow_operator = true when check( %r:'/ ) start_group :symbol, scan(%r:/) scan_delimited_region :symbol, :symbol, "", false @allow_operator = true when scan( %r:[_a-zA-Z@$][$@\w]*[=!?]?/ ) start_group :symbol, matched @allow_operator = true when scan( %r\?(\\[^\n\r]|[^\\\n\r\s])/ ) start_group :char, matched @allow_operator = true when check( %r(__FILE__|__LINE__|true|false|nil|self)[?!]?/ ) if @selector || matched[-1] == ?? || matched[-1] == !! start_group :ident, scan(%r(__FILE__|__LINE__|true|false|nil|self)[?!]?/) else start_group :constant, scan(%r(__FILE__|__LINE__|true|false|nil|self)/) end @selector = false @allow_operator = true when scan(%r0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) start_group :number, matched @allow_operator = true else case peek(2) when "%r" scan_delimited_region :punct, :regex, scan( %r../ ), true @allow_operator = true when "%w", "%q" scan_delimited_region :punct, :string, scan( %r../ ), false @allow_operator = true when "%s" scan_delimited_region :punct, :symbol, scan( %r../ ), false @allow_operator = true when "%W", "%Q", "%x" scan_delimited_region :punct, :string, scan( %r../ ), true @allow_operator = true when %r%[^\sa-zA-Z0-9]/ scan_delimited_region :punct, :string, scan( %r./ ), true @allow_operator = true when "<<" saw_word = ( chunk[-1,1] =~ %r[\w!?]/ ) start_group :punct, scan( %r<</ ) if saw_word @allow_operator = false return end float_right = scan( %r-/ ) append "-" if float_right if ( type = scan( %r['"]/ ) ) append type delim = scan_until( %r(?=#{type})/ ) if delim.nil? append scan_until( %r\Z/ ) return end else delim = scan( %r\w+/ ) or return end start_group :constant, delim start_group :punct, scan( %r#{type}/ ) if type @heredocs << [ float_right, type, delim ] @allow_operator = true else case peek(1) when %r[\n\r]/ unless @heredocs.empty? scan_heredoc(*@heredocs.shift) else start_group :normal, scan( %r\s+/ ) end @allow_operator = false when %r\s/ start_group :normal, scan( %r\s+/ ) when "#" start_group :comment, scan( %r#[^\n\r]*/ ) when %r[A-Z]/ start_group @selector ? :ident : :constant, scan( %r\w+/ ) @allow_operator = true when %r[a-z_]/ word = scan( %r\w+[?!]?/ ) if !@selector && KEYWORDS.include?( word ) start_group :keyword, word @allow_operator = false elsif start_group :ident, word @allow_operator = true end @selector = false when %r\d/ start_group :number, scan( %r[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ ) @allow_operator = true when '"' scan_delimited_region :punct, :string, "", true @allow_operator = true when '/' if @allow_operator start_group :punct, scan(%r{/}) @allow_operator = false else scan_delimited_region :punct, :regex, "", true @allow_operator = true end when "'" scan_delimited_region :punct, :string, "", false @allow_operator = true when "." dots = scan( %r\.{1,3}/ ) start_group :punct, dots @selector = ( dots.length == 1 ) when %r[@]/ start_group :attribute, scan( %r@{1,2}\w*/ ) @allow_operator = true when %r[$]/ start_group :global, scan(%r\$/) start_group :global, scan( %r\w+|./ ) if check(%r./) @allow_operator = true when %r[-!?*\/+=<>(\[\{}:;,&|%]/ start_group :punct, scan(%r./) @allow_operator = false when %r[)\]]/ start_group :punct, scan(%r./) @allow_operator = true else # all else just falls through this, to prevent # infinite loops... append getch end end end end end