Ninja
depfile_parser.cc
Go to the documentation of this file.
1/* Generated by re2c */
2// Copyright 2011 Google Inc. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "depfile_parser.h"
17#include "util.h"
18
19#include <algorithm>
20
21using namespace std;
22
27
28// A note on backslashes in Makefiles, from reading the docs:
29// Backslash-newline is the line continuation character.
30// Backslash-# escapes a # (otherwise meaningful as a comment start).
31// Backslash-% escapes a % (otherwise meaningful as a special).
32// Finally, quoting the GNU manual, "Backslashes that are not in danger
33// of quoting ‘%’ characters go unmolested."
34// How do you end a line with a backslash? The netbsd Make docs suggest
35// reading the result of a shell command echoing a backslash!
36//
37// Rather than implement all of above, we follow what GCC/Clang produces:
38// Backslashes escape a space or hash sign.
39// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40// followed by space.
41// When a space is preceded by 2N backslashes, it represents 2N backslashes at
42// the end of a filename.
43// A hash sign is escaped by a single backslash. All other backslashes remain
44// unchanged.
45//
46// If anyone actually has depfiles that rely on the more complicated
47// behavior we can adjust this.
48bool DepfileParser::Parse(string* content, string* err) {
49 // in: current parser input point.
50 // end: end of input.
51 // parsing_targets: whether we are parsing targets or dependencies.
52 char* in = &(*content)[0];
53 char* end = in + content->size();
54 bool have_target = false;
55 bool parsing_targets = true;
56 bool poisoned_input = false;
57 bool is_empty = true;
58 while (in < end) {
59 bool have_newline = false;
60 // out: current output point (typically same as in, but can fall behind
61 // as we de-escape backslashes).
62 char* out = in;
63 // filename: start of the current parsed filename.
64 char* filename = out;
65 for (;;) {
66 // start: beginning of the current parsed span.
67 const char* start = in;
68 char* yymarker = NULL;
69
70 {
71 unsigned char yych;
72 static const unsigned char yybm[] = {
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 0, 0, 0, 0, 0, 0, 0,
77 0, 128, 128, 0, 0, 128, 128, 128,
78 128, 128, 0, 128, 128, 128, 128, 128,
79 128, 128, 128, 128, 128, 128, 128, 128,
80 128, 128, 128, 0, 0, 128, 0, 128,
81 128, 128, 128, 128, 128, 128, 128, 128,
82 128, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 128, 128, 128, 128,
84 128, 128, 128, 128, 0, 128, 0, 128,
85 0, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 0, 128, 128, 0,
89 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128,
105 };
106 yych = *in;
107 if (yybm[0+yych] & 128) {
108 goto yy5;
109 }
110 if (yych <= '\r') {
111 if (yych <= '\t') {
112 if (yych >= 0x01) goto yy1;
113 } else {
114 if (yych <= '\n') goto yy3;
115 if (yych <= '\f') goto yy1;
116 goto yy4;
117 }
118 } else {
119 if (yych <= '$') {
120 if (yych <= '#') goto yy1;
121 goto yy7;
122 } else {
123 if (yych <= '>') goto yy1;
124 if (yych <= '\\') goto yy8;
125 goto yy1;
126 }
127 }
128 ++in;
129 {
130 break;
131 }
132yy1:
133 ++in;
134yy2:
135 {
136 // For any other character (e.g. whitespace), swallow it here,
137 // allowing the outer logic to loop around again.
138 break;
139 }
140yy3:
141 ++in;
142 {
143 // A newline ends the current file name and the current rule.
144 have_newline = true;
145 break;
146 }
147yy4:
148 yych = *++in;
149 if (yych == '\n') goto yy3;
150 goto yy2;
151yy5:
152 yych = *++in;
153 if (yybm[0+yych] & 128) {
154 goto yy5;
155 }
156yy6:
157 {
158 // Got a span of plain text.
159 int len = (int)(in - start);
160 // Need to shift it over if we're overwriting backslashes.
161 if (out < start)
162 memmove(out, start, len);
163 out += len;
164 continue;
165 }
166yy7:
167 yych = *++in;
168 if (yych == '$') goto yy9;
169 goto yy2;
170yy8:
171 yych = *(yymarker = ++in);
172 if (yych <= ' ') {
173 if (yych <= '\n') {
174 if (yych <= 0x00) goto yy2;
175 if (yych <= '\t') goto yy10;
176 goto yy11;
177 } else {
178 if (yych == '\r') goto yy12;
179 if (yych <= 0x1F) goto yy10;
180 goto yy13;
181 }
182 } else {
183 if (yych <= '9') {
184 if (yych == '#') goto yy14;
185 goto yy10;
186 } else {
187 if (yych <= ':') goto yy15;
188 if (yych == '\\') goto yy17;
189 goto yy10;
190 }
191 }
192yy9:
193 ++in;
194 {
195 // De-escape dollar character.
196 *out++ = '$';
197 continue;
198 }
199yy10:
200 ++in;
201 goto yy6;
202yy11:
203 ++in;
204 {
205 // A line continuation ends the current file name.
206 break;
207 }
208yy12:
209 yych = *++in;
210 if (yych == '\n') goto yy11;
211 in = yymarker;
212 goto yy2;
213yy13:
214 ++in;
215 {
216 // 2N+1 backslashes plus space -> N backslashes plus space.
217 int len = (int)(in - start);
218 int n = len / 2 - 1;
219 if (out < start)
220 memset(out, '\\', n);
221 out += n;
222 *out++ = ' ';
223 continue;
224 }
225yy14:
226 ++in;
227 {
228 // De-escape hash sign, but preserve other leading backslashes.
229 int len = (int)(in - start);
230 if (len > 2 && out < start)
231 memset(out, '\\', len - 2);
232 out += len - 2;
233 *out++ = '#';
234 continue;
235 }
236yy15:
237 yych = *++in;
238 if (yych <= '\f') {
239 if (yych <= 0x00) goto yy18;
240 if (yych <= 0x08) goto yy16;
241 if (yych <= '\n') goto yy18;
242 } else {
243 if (yych <= '\r') goto yy18;
244 if (yych == ' ') goto yy18;
245 }
246yy16:
247 {
248 // De-escape colon sign, but preserve other leading backslashes.
249 // Regular expression uses lookahead to make sure that no whitespace
250 // nor EOF follows. In that case it'd be the : at the end of a target
251 int len = (int)(in - start);
252 if (len > 2 && out < start)
253 memset(out, '\\', len - 2);
254 out += len - 2;
255 *out++ = ':';
256 continue;
257 }
258yy17:
259 yych = *++in;
260 if (yych <= ' ') {
261 if (yych <= '\n') {
262 if (yych <= 0x00) goto yy6;
263 if (yych <= '\t') goto yy10;
264 goto yy6;
265 } else {
266 if (yych == '\r') goto yy6;
267 if (yych <= 0x1F) goto yy10;
268 goto yy19;
269 }
270 } else {
271 if (yych <= '9') {
272 if (yych == '#') goto yy14;
273 goto yy10;
274 } else {
275 if (yych <= ':') goto yy15;
276 if (yych == '\\') goto yy20;
277 goto yy10;
278 }
279 }
280yy18:
281 ++in;
282 {
283 // Backslash followed by : and whitespace.
284 // It is therefore normal text and not an escaped colon
285 int len = (int)(in - start - 1);
286 // Need to shift it over if we're overwriting backslashes.
287 if (out < start)
288 memmove(out, start, len);
289 out += len;
290 if (*(in - 1) == '\n')
291 have_newline = true;
292 break;
293 }
294yy19:
295 ++in;
296 {
297 // 2N backslashes plus space -> 2N backslashes, end of filename.
298 int len = (int)(in - start);
299 if (out < start)
300 memset(out, '\\', len - 1);
301 out += len - 1;
302 break;
303 }
304yy20:
305 yych = *++in;
306 if (yych <= ' ') {
307 if (yych <= '\n') {
308 if (yych <= 0x00) goto yy6;
309 if (yych <= '\t') goto yy10;
310 goto yy6;
311 } else {
312 if (yych == '\r') goto yy6;
313 if (yych <= 0x1F) goto yy10;
314 goto yy13;
315 }
316 } else {
317 if (yych <= '9') {
318 if (yych == '#') goto yy14;
319 goto yy10;
320 } else {
321 if (yych <= ':') goto yy15;
322 if (yych == '\\') goto yy17;
323 goto yy10;
324 }
325 }
326 }
327
328 }
329
330 int len = (int)(out - filename);
331 const bool is_dependency = !parsing_targets;
332 if (len > 0 && filename[len - 1] == ':') {
333 len--; // Strip off trailing colon, if any.
334 parsing_targets = false;
335 have_target = true;
336 }
337
338 if (len > 0) {
339 is_empty = false;
340 StringPiece piece = StringPiece(filename, len);
341 // If we've seen this as an input before, skip it.
342 std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
343 if (pos == ins_.end()) {
344 if (is_dependency) {
345 if (poisoned_input) {
346 *err = "inputs may not also have inputs";
347 return false;
348 }
349 // New input.
350 ins_.push_back(piece);
351 } else {
352 // Check for a new output.
353 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
354 outs_.push_back(piece);
355 }
356 } else if (!is_dependency) {
357 // We've passed an input on the left side; reject new inputs.
358 poisoned_input = true;
359 }
360 }
361
362 if (have_newline) {
363 // A newline ends a rule so the next filename will be a new target.
364 parsing_targets = true;
365 poisoned_input = false;
366 }
367 }
368 if (!have_target && !is_empty) {
369 *err = "expected ':' in depfile";
370 return false;
371 }
372 return true;
373}
Definition hash_map.h:26
bool Parse(std::string *content, std::string *err)
Parse an input file.
DepfileParser(DepfileParserOptions options=DepfileParserOptions())
std::vector< StringPiece > outs_
DepfileParserOptions options_
std::vector< StringPiece > ins_
StringPiece represents a slice of a string whose memory is managed externally.