Ninja
lexer.in.cc
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "lexer.h"
16
17#include <stdio.h>
18
19#include "eval_env.h"
20#include "util.h"
21
22using namespace std;
23
24bool Lexer::Error(const string& message, string* err) {
25 // Compute line/column.
26 int line = 1;
27 const char* line_start = input_.str_;
28 for (const char* p = input_.str_; p < last_token_; ++p) {
29 if (*p == '\n') {
30 ++line;
31 line_start = p + 1;
32 }
33 }
34 int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35
36 char buf[1024];
37 snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38 *err = buf;
39 *err += message + "\n";
40
41 // Add some context to the message.
42 const int kTruncateColumn = 72;
43 if (col > 0 && col < kTruncateColumn) {
44 int len;
45 bool truncated = true;
46 for (len = 0; len < kTruncateColumn; ++len) {
47 if (line_start[len] == 0 || line_start[len] == '\n') {
48 truncated = false;
49 break;
50 }
51 }
52 *err += string(line_start, len);
53 if (truncated)
54 *err += "...";
55 *err += "\n";
56 *err += string(col, ' ');
57 *err += "^ near here";
58 }
59
60 return false;
61}
62
63Lexer::Lexer(const char* input) {
64 Start("input", input);
65}
66
67void Lexer::Start(StringPiece filename, StringPiece input) {
68 filename_ = filename;
69 input_ = input;
70 ofs_ = input_.str_;
71 last_token_ = NULL;
72}
73
74const char* Lexer::TokenName(Token t) {
75 switch (t) {
76 case ERROR: return "lexing error";
77 case BUILD: return "'build'";
78 case COLON: return "':'";
79 case DEFAULT: return "'default'";
80 case EQUALS: return "'='";
81 case IDENT: return "identifier";
82 case INCLUDE: return "'include'";
83 case INDENT: return "indent";
84 case NEWLINE: return "newline";
85 case PIPE2: return "'||'";
86 case PIPE: return "'|'";
87 case PIPEAT: return "'|@'";
88 case POOL: return "'pool'";
89 case RULE: return "'rule'";
90 case SUBNINJA: return "'subninja'";
91 case TEOF: return "eof";
92 }
93 return NULL; // not reached
94}
95
96const char* Lexer::TokenErrorHint(Token expected) {
97 switch (expected) {
98 case COLON:
99 return " ($ also escapes ':')";
100 default:
101 return "";
102 }
103}
104
106 if (last_token_) {
107 switch (last_token_[0]) {
108 case '\t':
109 return "tabs are not allowed, use spaces";
110 }
111 }
112 return "lexing error";
113}
114
115void Lexer::UnreadToken() {
117}
118
120 const char* p = ofs_;
121 const char* q;
122 const char* start;
123 Lexer::Token token;
124 for (;;) {
125 start = p;
126 /*!re2c
127 re2c:define:YYCTYPE = "unsigned char";
128 re2c:define:YYCURSOR = p;
129 re2c:define:YYMARKER = q;
130 re2c:yyfill:enable = 0;
131
132 nul = "\000";
133 simple_varname = [a-zA-Z0-9_-]+;
134 varname = [a-zA-Z0-9_.-]+;
135
136 [ ]*"#"[^\000\n]*"\n" { continue; }
137 [ ]*"\r\n" { token = NEWLINE; break; }
138 [ ]*"\n" { token = NEWLINE; break; }
139 [ ]+ { token = INDENT; break; }
140 "build" { token = BUILD; break; }
141 "pool" { token = POOL; break; }
142 "rule" { token = RULE; break; }
143 "default" { token = DEFAULT; break; }
144 "=" { token = EQUALS; break; }
145 ":" { token = COLON; break; }
146 "|@" { token = PIPEAT; break; }
147 "||" { token = PIPE2; break; }
148 "|" { token = PIPE; break; }
149 "include" { token = INCLUDE; break; }
150 "subninja" { token = SUBNINJA; break; }
151 varname { token = IDENT; break; }
152 nul { token = TEOF; break; }
153 [^] { token = ERROR; break; }
154 */
155 }
156
157 last_token_ = start;
158 ofs_ = p;
159 if (token != NEWLINE && token != TEOF)
161 return token;
162}
163
164bool Lexer::PeekToken(Token token) {
165 Token t = ReadToken();
166 if (t == token)
167 return true;
168 UnreadToken();
169 return false;
170}
171
173 const char* p = ofs_;
174 const char* q;
175 for (;;) {
176 ofs_ = p;
177 /*!re2c
178 [ ]+ { continue; }
179 "$\r\n" { continue; }
180 "$\n" { continue; }
181 nul { break; }
182 [^] { break; }
183 */
184 }
185}
186
187bool Lexer::ReadIdent(string* out) {
188 const char* p = ofs_;
189 const char* start;
190 for (;;) {
191 start = p;
192 /*!re2c
193 varname {
194 out->assign(start, p - start);
195 break;
196 }
197 [^] {
198 last_token_ = start;
199 return false;
200 }
201 */
202 }
203 last_token_ = start;
204 ofs_ = p;
206 return true;
207}
208
209bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
210 const char* p = ofs_;
211 const char* q;
212 const char* start;
213 for (;;) {
214 start = p;
215 /*!re2c
216 [^$ :\r\n|\000]+ {
217 eval->AddText(StringPiece(start, p - start));
218 continue;
219 }
220 "\r\n" {
221 if (path)
222 p = start;
223 break;
224 }
225 [ :|\n] {
226 if (path) {
227 p = start;
228 break;
229 } else {
230 if (*start == '\n')
231 break;
232 eval->AddText(StringPiece(start, 1));
233 continue;
234 }
235 }
236 "$$" {
237 eval->AddText(StringPiece("$", 1));
238 continue;
239 }
240 "$ " {
241 eval->AddText(StringPiece(" ", 1));
242 continue;
243 }
244 "$\r\n"[ ]* {
245 continue;
246 }
247 "$\n"[ ]* {
248 continue;
249 }
250 "${"varname"}" {
251 eval->AddSpecial(StringPiece(start + 2, p - start - 3));
252 continue;
253 }
254 "$"simple_varname {
255 eval->AddSpecial(StringPiece(start + 1, p - start - 1));
256 continue;
257 }
258 "$:" {
259 eval->AddText(StringPiece(":", 1));
260 continue;
261 }
262 "$". {
263 last_token_ = start;
264 return Error("bad $-escape (literal $ must be written as $$)", err);
265 }
266 nul {
267 last_token_ = start;
268 return Error("unexpected EOF", err);
269 }
270 [^] {
271 last_token_ = start;
272 return Error(DescribeLastError(), err);
273 }
274 */
275 }
276 last_token_ = start;
277 ofs_ = p;
278 if (path)
280 // Non-path strings end in newlines, so there's no whitespace to eat.
281 return true;
282}
Definition hash_map.h:26
A tokenized string that contains variable references.
Definition eval_env.h:35
static const char * TokenName(Token t)
Return a human-readable form of a token, used in error messages.
Definition lexer.cc:75
Token
Definition lexer.h:32
@ TEOF
Definition lexer.h:48
@ COLON
Definition lexer.h:35
@ NEWLINE
Definition lexer.h:41
@ POOL
Definition lexer.h:45
@ DEFAULT
Definition lexer.h:36
@ RULE
Definition lexer.h:46
@ INDENT
Definition lexer.h:40
@ ERROR
Definition lexer.h:33
@ INCLUDE
Definition lexer.h:39
@ PIPE
Definition lexer.h:42
@ EQUALS
Definition lexer.h:37
@ BUILD
Definition lexer.h:34
@ IDENT
Definition lexer.h:38
@ PIPE2
Definition lexer.h:43
@ SUBNINJA
Definition lexer.h:47
@ PIPEAT
Definition lexer.h:44
bool PeekToken(Token token)
If the next token is token, read it and return true.
Definition lexer.cc:463
Lexer()
Definition lexer.h:28
std::string DescribeLastError()
If the last token read was an ERROR token, provide more info or the empty string.
Definition lexer.cc:106
const char * ofs_
Definition lexer.h:102
void UnreadToken()
Rewind to the last read Token.
Definition lexer.cc:116
bool ReadEvalString(EvalString *eval, bool path, std::string *err)
Read a $-escaped string.
Definition lexer.cc:623
const char * last_token_
Definition lexer.h:103
void Start(StringPiece filename, StringPiece input)
Start parsing some input.
Definition lexer.cc:68
Token ReadToken()
Read a Token from the Token enum.
Definition lexer.cc:120
static const char * TokenErrorHint(Token expected)
Return a human-readable token hint, used in error messages.
Definition lexer.cc:97
StringPiece filename_
Definition lexer.h:100
void EatWhitespace()
Skip past whitespace (called after each read token/ident/etc.).
Definition lexer.cc:471
bool ReadIdent(std::string *out)
Read a simple identifier (a rule or variable name).
Definition lexer.cc:554
StringPiece input_
Definition lexer.h:101
bool Error(const std::string &message, std::string *err)
Construct an error message with context.
Definition lexer.cc:25
StringPiece represents a slice of a string whose memory is managed externally.