A simple implementation of an YAML lexer. It handles most cases. It is not a validating lexer.
Step through a single iteration of the tokenization process. This will yield (potentially) many tokens, and possibly zero tokens.
# File lib/syntax/lang/yaml.rb, line 11 def step if bol? case when scan(%r---(\s*.+)?$/) start_group :document, matched when scan(%r(\s*)([a-zA-Z][-\w]*)(\s*):/) start_group :normal, subgroup(1) start_group :key, subgroup(2) start_group :normal, subgroup(3) start_group :punct, ":" when scan(%r(\s*)-/) start_group :normal, subgroup(1) start_group :punct, "-" when scan(%r\s*$/) start_group :normal, matched when scan(%r#.*$/) start_group :comment, matched else append getch end else case when scan(%r[\n\r]+/) start_group :normal, matched when scan(%r[ \t]+/) start_group :normal, matched when scan(%r!+(.*?^)?\S+/) start_group :type, matched when scan(%r&\S+/) start_group :anchor, matched when scan(%r\*\S+/) start_group :ref, matched when scan(%r\d\d:\d\d:\d\d/) start_group :time, matched when scan(%r\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/) start_group :date, matched when scan(%r['"]/) start_group :punct, matched scan_string matched when scan(%r:\w+/) start_group :symbol, matched when scan(%r[:]/) start_group :punct, matched when scan(%r#.*$/) start_group :comment, matched when scan(%r>-?/) start_group :punct, matched start_group :normal, scan(%r.*$/) append getch until eos? || bol? return if eos? indent = check(%r */) start_group :string loop do line = check_until(%r[\n\r]|\Z/) break if line.nil? if line.chomp.length > 0 this_indent = line.chomp.match( %r^\s*/ )[0] break if this_indent.length < indent.length end append scan_until(%r[\n\r]|\Z/) end else start_group :normal, scan_until(%r(?=$|#)/) end end end