Ninja
depfile_parser.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "depfile_parser.h"
16 #include "util.h"
17 
18 #include <algorithm>
19 
20 using namespace std;
21 
23  : options_(options)
24 {
25 }
26 
27 // A note on backslashes in Makefiles, from reading the docs:
28 // Backslash-newline is the line continuation character.
29 // Backslash-# escapes a # (otherwise meaningful as a comment start).
30 // Backslash-% escapes a % (otherwise meaningful as a special).
31 // Finally, quoting the GNU manual, "Backslashes that are not in danger
32 // of quoting ‘%’ characters go unmolested."
33 // How do you end a line with a backslash? The netbsd Make docs suggest
34 // reading the result of a shell command echoing a backslash!
35 //
36 // Rather than implement all of above, we follow what GCC/Clang produces:
37 // Backslashes escape a space or hash sign.
38 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
39 // followed by space.
40 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
41 // the end of a filename.
42 // A hash sign is escaped by a single backslash. All other backslashes remain
43 // unchanged.
44 //
45 // If anyone actually has depfiles that rely on the more complicated
46 // behavior we can adjust this.
47 bool DepfileParser::Parse(string* content, string* err) {
48  // in: current parser input point.
49  // end: end of input.
50  // parsing_targets: whether we are parsing targets or dependencies.
51  char* in = &(*content)[0];
52  char* end = in + content->size();
53  bool have_target = false;
54  bool parsing_targets = true;
55  bool poisoned_input = false;
56  while (in < end) {
57  bool have_newline = false;
58  // out: current output point (typically same as in, but can fall behind
59  // as we de-escape backslashes).
60  char* out = in;
61  // filename: start of the current parsed filename.
62  char* filename = out;
63  for (;;) {
64  // start: beginning of the current parsed span.
65  const char* start = in;
66  char* yymarker = NULL;
67  /*!re2c
68  re2c:define:YYCTYPE = "unsigned char";
69  re2c:define:YYCURSOR = in;
70  re2c:define:YYLIMIT = end;
71  re2c:define:YYMARKER = yymarker;
72 
73  re2c:yyfill:enable = 0;
74 
75  re2c:indent:top = 2;
76  re2c:indent:string = " ";
77 
78  nul = "\000";
79  newline = '\r'?'\n';
80 
81  '\\\\'* '\\ ' {
82  // 2N+1 backslashes plus space -> N backslashes plus space.
83  int len = (int)(in - start);
84  int n = len / 2 - 1;
85  if (out < start)
86  memset(out, '\\', n);
87  out += n;
88  *out++ = ' ';
89  continue;
90  }
91  '\\\\'+ ' ' {
92  // 2N backslashes plus space -> 2N backslashes, end of filename.
93  int len = (int)(in - start);
94  if (out < start)
95  memset(out, '\\', len - 1);
96  out += len - 1;
97  break;
98  }
99  '\\'+ '#' {
100  // De-escape hash sign, but preserve other leading backslashes.
101  int len = (int)(in - start);
102  if (len > 2 && out < start)
103  memset(out, '\\', len - 2);
104  out += len - 2;
105  *out++ = '#';
106  continue;
107  }
108  '\\'+ ':' [\x00\x20\r\n\t] {
109  // Backslash followed by : and whitespace.
110  // It is therefore normal text and not an escaped colon
111  int len = (int)(in - start - 1);
112  // Need to shift it over if we're overwriting backslashes.
113  if (out < start)
114  memmove(out, start, len);
115  out += len;
116  if (*(in - 1) == '\n')
117  have_newline = true;
118  break;
119  }
120  '\\'+ ':' {
121  // De-escape colon sign, but preserve other leading backslashes.
122  // Regular expression uses lookahead to make sure that no whitespace
123  // nor EOF follows. In that case it'd be the : at the end of a target
124  int len = (int)(in - start);
125  if (len > 2 && out < start)
126  memset(out, '\\', len - 2);
127  out += len - 2;
128  *out++ = ':';
129  continue;
130  }
131  '$$' {
132  // De-escape dollar character.
133  *out++ = '$';
134  continue;
135  }
136  '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
137  // Got a span of plain text.
138  int len = (int)(in - start);
139  // Need to shift it over if we're overwriting backslashes.
140  if (out < start)
141  memmove(out, start, len);
142  out += len;
143  continue;
144  }
145  nul {
146  break;
147  }
148  '\\' newline {
149  // A line continuation ends the current file name.
150  break;
151  }
152  newline {
153  // A newline ends the current file name and the current rule.
154  have_newline = true;
155  break;
156  }
157  [^] {
158  // For any other character (e.g. whitespace), swallow it here,
159  // allowing the outer logic to loop around again.
160  break;
161  }
162  */
163  }
164 
165  int len = (int)(out - filename);
166  const bool is_dependency = !parsing_targets;
167  if (len > 0 && filename[len - 1] == ':') {
168  len--; // Strip off trailing colon, if any.
169  parsing_targets = false;
170  have_target = true;
171  }
172 
173  if (len > 0) {
174  StringPiece piece = StringPiece(filename, len);
175  // If we've seen this as an input before, skip it.
176  std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
177  if (pos == ins_.end()) {
178  if (is_dependency) {
179  if (poisoned_input) {
180  *err = "inputs may not also have inputs";
181  return false;
182  }
183  // New input.
184  ins_.push_back(piece);
185  } else {
186  // Check for a new output.
187  if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
188  outs_.push_back(piece);
189  }
190  } else if (!is_dependency) {
191  // We've passed an input on the left side; reject new inputs.
192  poisoned_input = true;
193  }
194  }
195 
196  if (have_newline) {
197  // A newline ends a rule so the next filename will be a new target.
198  parsing_targets = true;
199  poisoned_input = false;
200  }
201  }
202  if (!have_target) {
203  *err = "expected ':' in depfile";
204  return false;
205  }
206  return true;
207 }
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
bool Parse(std::string *content, std::string *err)
Parse an input file.
std::vector< StringPiece > outs_
std::vector< StringPiece > ins_
DepfileParser(DepfileParserOptions options=DepfileParserOptions())