Ninja
depfile_parser.cc
Go to the documentation of this file.
1 /* Generated by re2c 1.3 */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "depfile_parser.h"
17 #include "util.h"
18 
19 #include <algorithm>
20 
21 using namespace std;
22 
24  : options_(options)
25 {
26 }
27 
28 // A note on backslashes in Makefiles, from reading the docs:
29 // Backslash-newline is the line continuation character.
30 // Backslash-# escapes a # (otherwise meaningful as a comment start).
31 // Backslash-% escapes a % (otherwise meaningful as a special).
32 // Finally, quoting the GNU manual, "Backslashes that are not in danger
33 // of quoting ‘%’ characters go unmolested."
34 // How do you end a line with a backslash? The netbsd Make docs suggest
35 // reading the result of a shell command echoing a backslash!
36 //
37 // Rather than implement all of above, we follow what GCC/Clang produces:
38 // Backslashes escape a space or hash sign.
39 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40 // followed by space.
41 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
42 // the end of a filename.
43 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // unchanged.
45 //
46 // If anyone actually has depfiles that rely on the more complicated
47 // behavior we can adjust this.
48 bool DepfileParser::Parse(string* content, string* err) {
49  // in: current parser input point.
50  // end: end of input.
51  // parsing_targets: whether we are parsing targets or dependencies.
52  char* in = &(*content)[0];
53  char* end = in + content->size();
54  bool have_target = false;
55  bool parsing_targets = true;
56  bool poisoned_input = false;
57  while (in < end) {
58  bool have_newline = false;
59  // out: current output point (typically same as in, but can fall behind
60  // as we de-escape backslashes).
61  char* out = in;
62  // filename: start of the current parsed filename.
63  char* filename = out;
64  for (;;) {
65  // start: beginning of the current parsed span.
66  const char* start = in;
67  char* yymarker = NULL;
68 
69  {
70  unsigned char yych;
71  static const unsigned char yybm[] = {
72  0, 0, 0, 0, 0, 0, 0, 0,
73  0, 0, 0, 0, 0, 0, 0, 0,
74  0, 0, 0, 0, 0, 0, 0, 0,
75  0, 0, 0, 0, 0, 0, 0, 0,
76  0, 128, 0, 0, 0, 128, 0, 0,
77  128, 128, 0, 128, 128, 128, 128, 128,
78  128, 128, 128, 128, 128, 128, 128, 128,
79  128, 128, 128, 0, 0, 128, 0, 0,
80  128, 128, 128, 128, 128, 128, 128, 128,
81  128, 128, 128, 128, 128, 128, 128, 128,
82  128, 128, 128, 128, 128, 128, 128, 128,
83  128, 128, 128, 128, 0, 128, 0, 128,
84  0, 128, 128, 128, 128, 128, 128, 128,
85  128, 128, 128, 128, 128, 128, 128, 128,
86  128, 128, 128, 128, 128, 128, 128, 128,
87  128, 128, 128, 128, 0, 128, 128, 0,
88  128, 128, 128, 128, 128, 128, 128, 128,
89  128, 128, 128, 128, 128, 128, 128, 128,
90  128, 128, 128, 128, 128, 128, 128, 128,
91  128, 128, 128, 128, 128, 128, 128, 128,
92  128, 128, 128, 128, 128, 128, 128, 128,
93  128, 128, 128, 128, 128, 128, 128, 128,
94  128, 128, 128, 128, 128, 128, 128, 128,
95  128, 128, 128, 128, 128, 128, 128, 128,
96  128, 128, 128, 128, 128, 128, 128, 128,
97  128, 128, 128, 128, 128, 128, 128, 128,
98  128, 128, 128, 128, 128, 128, 128, 128,
99  128, 128, 128, 128, 128, 128, 128, 128,
100  128, 128, 128, 128, 128, 128, 128, 128,
101  128, 128, 128, 128, 128, 128, 128, 128,
102  128, 128, 128, 128, 128, 128, 128, 128,
103  128, 128, 128, 128, 128, 128, 128, 128,
104  };
105  yych = *in;
106  if (yybm[0+yych] & 128) {
107  goto yy9;
108  }
109  if (yych <= '\r') {
110  if (yych <= '\t') {
111  if (yych >= 0x01) goto yy4;
112  } else {
113  if (yych <= '\n') goto yy6;
114  if (yych <= '\f') goto yy4;
115  goto yy8;
116  }
117  } else {
118  if (yych <= '$') {
119  if (yych <= '#') goto yy4;
120  goto yy12;
121  } else {
122  if (yych <= '?') goto yy4;
123  if (yych <= '\\') goto yy13;
124  goto yy4;
125  }
126  }
127  ++in;
128  {
129  break;
130  }
131 yy4:
132  ++in;
133 yy5:
134  {
135  // For any other character (e.g. whitespace), swallow it here,
136  // allowing the outer logic to loop around again.
137  break;
138  }
139 yy6:
140  ++in;
141  {
142  // A newline ends the current file name and the current rule.
143  have_newline = true;
144  break;
145  }
146 yy8:
147  yych = *++in;
148  if (yych == '\n') goto yy6;
149  goto yy5;
150 yy9:
151  yych = *++in;
152  if (yybm[0+yych] & 128) {
153  goto yy9;
154  }
155 yy11:
156  {
157  // Got a span of plain text.
158  int len = (int)(in - start);
159  // Need to shift it over if we're overwriting backslashes.
160  if (out < start)
161  memmove(out, start, len);
162  out += len;
163  continue;
164  }
165 yy12:
166  yych = *++in;
167  if (yych == '$') goto yy14;
168  goto yy5;
169 yy13:
170  yych = *(yymarker = ++in);
171  if (yych <= ' ') {
172  if (yych <= '\n') {
173  if (yych <= 0x00) goto yy5;
174  if (yych <= '\t') goto yy16;
175  goto yy17;
176  } else {
177  if (yych == '\r') goto yy19;
178  if (yych <= 0x1F) goto yy16;
179  goto yy21;
180  }
181  } else {
182  if (yych <= '9') {
183  if (yych == '#') goto yy23;
184  goto yy16;
185  } else {
186  if (yych <= ':') goto yy25;
187  if (yych == '\\') goto yy27;
188  goto yy16;
189  }
190  }
191 yy14:
192  ++in;
193  {
194  // De-escape dollar character.
195  *out++ = '$';
196  continue;
197  }
198 yy16:
199  ++in;
200  goto yy11;
201 yy17:
202  ++in;
203  {
204  // A line continuation ends the current file name.
205  break;
206  }
207 yy19:
208  yych = *++in;
209  if (yych == '\n') goto yy17;
210  in = yymarker;
211  goto yy5;
212 yy21:
213  ++in;
214  {
215  // 2N+1 backslashes plus space -> N backslashes plus space.
216  int len = (int)(in - start);
217  int n = len / 2 - 1;
218  if (out < start)
219  memset(out, '\\', n);
220  out += n;
221  *out++ = ' ';
222  continue;
223  }
224 yy23:
225  ++in;
226  {
227  // De-escape hash sign, but preserve other leading backslashes.
228  int len = (int)(in - start);
229  if (len > 2 && out < start)
230  memset(out, '\\', len - 2);
231  out += len - 2;
232  *out++ = '#';
233  continue;
234  }
235 yy25:
236  yych = *++in;
237  if (yych <= '\f') {
238  if (yych <= 0x00) goto yy28;
239  if (yych <= 0x08) goto yy26;
240  if (yych <= '\n') goto yy28;
241  } else {
242  if (yych <= '\r') goto yy28;
243  if (yych == ' ') goto yy28;
244  }
245 yy26:
246  {
247  // De-escape colon sign, but preserve other leading backslashes.
248  // Regular expression uses lookahead to make sure that no whitespace
249  // nor EOF follows. In that case it'd be the : at the end of a target
250  int len = (int)(in - start);
251  if (len > 2 && out < start)
252  memset(out, '\\', len - 2);
253  out += len - 2;
254  *out++ = ':';
255  continue;
256  }
257 yy27:
258  yych = *++in;
259  if (yych <= ' ') {
260  if (yych <= '\n') {
261  if (yych <= 0x00) goto yy11;
262  if (yych <= '\t') goto yy16;
263  goto yy11;
264  } else {
265  if (yych == '\r') goto yy11;
266  if (yych <= 0x1F) goto yy16;
267  goto yy30;
268  }
269  } else {
270  if (yych <= '9') {
271  if (yych == '#') goto yy23;
272  goto yy16;
273  } else {
274  if (yych <= ':') goto yy25;
275  if (yych == '\\') goto yy32;
276  goto yy16;
277  }
278  }
279 yy28:
280  ++in;
281  {
282  // Backslash followed by : and whitespace.
283  // It is therefore normal text and not an escaped colon
284  int len = (int)(in - start - 1);
285  // Need to shift it over if we're overwriting backslashes.
286  if (out < start)
287  memmove(out, start, len);
288  out += len;
289  if (*(in - 1) == '\n')
290  have_newline = true;
291  break;
292  }
293 yy30:
294  ++in;
295  {
296  // 2N backslashes plus space -> 2N backslashes, end of filename.
297  int len = (int)(in - start);
298  if (out < start)
299  memset(out, '\\', len - 1);
300  out += len - 1;
301  break;
302  }
303 yy32:
304  yych = *++in;
305  if (yych <= ' ') {
306  if (yych <= '\n') {
307  if (yych <= 0x00) goto yy11;
308  if (yych <= '\t') goto yy16;
309  goto yy11;
310  } else {
311  if (yych == '\r') goto yy11;
312  if (yych <= 0x1F) goto yy16;
313  goto yy21;
314  }
315  } else {
316  if (yych <= '9') {
317  if (yych == '#') goto yy23;
318  goto yy16;
319  } else {
320  if (yych <= ':') goto yy25;
321  if (yych == '\\') goto yy27;
322  goto yy16;
323  }
324  }
325  }
326 
327  }
328 
329  int len = (int)(out - filename);
330  const bool is_dependency = !parsing_targets;
331  if (len > 0 && filename[len - 1] == ':') {
332  len--; // Strip off trailing colon, if any.
333  parsing_targets = false;
334  have_target = true;
335  }
336 
337  if (len > 0) {
338  StringPiece piece = StringPiece(filename, len);
339  // If we've seen this as an input before, skip it.
340  std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
341  if (pos == ins_.end()) {
342  if (is_dependency) {
343  if (poisoned_input) {
344  *err = "inputs may not also have inputs";
345  return false;
346  }
347  // New input.
348  ins_.push_back(piece);
349  } else {
350  // Check for a new output.
351  if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
352  outs_.push_back(piece);
353  }
354  } else if (!is_dependency) {
355  // We've passed an input on the left side; reject new inputs.
356  poisoned_input = true;
357  }
358  }
359 
360  if (have_newline) {
361  // A newline ends a rule so the next filename will be a new target.
362  parsing_targets = true;
363  poisoned_input = false;
364  }
365  }
366  if (!have_target) {
367  *err = "expected ':' in depfile";
368  return false;
369  }
370  return true;
371 }
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
bool Parse(std::string *content, std::string *err)
Parse an input file.
std::vector< StringPiece > outs_
std::vector< StringPiece > ins_
DepfileParser(DepfileParserOptions options=DepfileParserOptions())