14: def scan_tokens tokens, options
15:
16: state = :initial
17: key_indent = 0
18:
19: until eos?
20:
21: kind = nil
22: match = nil
23: key_indent = nil if bol?
24:
25: if match = scan(/ +[\t ]*/)
26: kind = :space
27:
28: elsif match = scan(/\n+/)
29: kind = :space
30: state = :initial if match.index(?\n)
31:
32: elsif match = scan(/#.*/)
33: kind = :comment
34:
35: elsif bol? and case
36: when match = scan(/---|\.\.\./)
37: tokens << [:open, :head]
38: tokens << [match, :head]
39: tokens << [:close, :head]
40: next
41: when match = scan(/%.*/)
42: tokens << [match, :doctype]
43: next
44: end
45:
46: elsif state == :value and case
47: when !check(/(?:"[^"]*")(?=: |:$)/) && scan(/"/)
48: tokens << [:open, :string]
49: tokens << [matched, :delimiter]
50: tokens << [matched, :content] if scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
51: tokens << [matched, :delimiter] if scan(/"/)
52: tokens << [:close, :string]
53: next
54: when match = scan(/[|>][-+]?/)
55: tokens << [:open, :string]
56: tokens << [match, :delimiter]
57: string_indent = key_indent || column(pos - match.size - 1)
58: tokens << [matched, :content] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
59: tokens << [:close, :string]
60: next
61: when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
62: tokens << [match, :string]
63: string_indent = key_indent || column(pos - match.size - 1)
64: tokens << [matched, :string] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
65: next
66: end
67:
68: elsif case
69: when match = scan(/[-:](?= |$)/)
70: state = :value if state == :colon && (match == ':' || match == '-')
71: state = :value if state == :initial && match == '-'
72: kind = :operator
73: when match = scan(/[,{}\[\]]/)
74: kind = :operator
75: when state == :initial && match = scan(/[\w.() ]*\S(?=: |:$)/)
76: kind = :key
77: key_indent = column(pos - match.size - 1)
78:
79: state = :colon
80: when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?=: |:$)/)
81: tokens << [:open, :key]
82: tokens << [match[0,1], :delimiter]
83: tokens << [match[1..-2], :content]
84: tokens << [match[-1,1], :delimiter]
85: tokens << [:close, :key]
86: key_indent = column(pos - match.size - 1)
87:
88: state = :colon
89: next
90: when scan(/(![\w\/]+)(:([\w:]+))?/)
91: tokens << [self[1], :type]
92: if self[2]
93: tokens << [':', :operator]
94: tokens << [self[3], :class]
95: end
96: next
97: when scan(/&\S+/)
98: kind = :variable
99: when scan(/\*\w+/)
100: kind = :global_variable
101: when scan(/<</)
102: kind = :class_variable
103: when scan(/\d\d:\d\d:\d\d/)
104: kind = :oct
105: when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
106: kind = :oct
107: when scan(/:\w+/)
108: kind = :symbol
109: when scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
110: kind = :error
111: when scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
112: kind = :error
113: end
114:
115: else
116: getch
117: kind = :error
118:
119: end
120:
121: match ||= matched
122:
123: if $CODERAY_DEBUG and not kind
124: raise_inspect 'Error token %p in line %d' %
125: [[match, kind], line], tokens, state
126: end
127: raise_inspect 'Empty token', tokens, state unless match
128:
129: tokens << [match, kind]
130:
131: end
132:
133: tokens
134: end