Ninja
lexer.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
22 using namespace std;
23 
24 bool Lexer::Error(const string& message, string* err) {
25  // Compute line/column.
26  int line = 1;
27  const char* line_start = input_.str_;
28  for (const char* p = input_.str_; p < last_token_; ++p) {
29  if (*p == '\n') {
30  ++line;
31  line_start = p + 1;
32  }
33  }
34  int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35 
36  char buf[1024];
37  snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38  *err = buf;
39  *err += message + "\n";
40 
41  // Add some context to the message.
42  const int kTruncateColumn = 72;
43  if (col > 0 && col < kTruncateColumn) {
44  int len;
45  bool truncated = true;
46  for (len = 0; len < kTruncateColumn; ++len) {
47  if (line_start[len] == 0 || line_start[len] == '\n') {
48  truncated = false;
49  break;
50  }
51  }
52  *err += string(line_start, len);
53  if (truncated)
54  *err += "...";
55  *err += "\n";
56  *err += string(col, ' ');
57  *err += "^ near here";
58  }
59 
60  return false;
61 }
62 
63 Lexer::Lexer(const char* input) {
64  Start("input", input);
65 }
66 
67 void Lexer::Start(StringPiece filename, StringPiece input) {
68  filename_ = filename;
69  input_ = input;
70  ofs_ = input_.str_;
71  last_token_ = NULL;
72 }
73 
74 const char* Lexer::TokenName(Token t) {
75  switch (t) {
76  case ERROR: return "lexing error";
77  case BUILD: return "'build'";
78  case COLON: return "':'";
79  case DEFAULT: return "'default'";
80  case EQUALS: return "'='";
81  case IDENT: return "identifier";
82  case INCLUDE: return "'include'";
83  case INDENT: return "indent";
84  case NEWLINE: return "newline";
85  case PIPE2: return "'||'";
86  case PIPE: return "'|'";
87  case POOL: return "'pool'";
88  case RULE: return "'rule'";
89  case SUBNINJA: return "'subninja'";
90  case TEOF: return "eof";
91  }
92  return NULL; // not reached
93 }
94 
95 const char* Lexer::TokenErrorHint(Token expected) {
96  switch (expected) {
97  case COLON:
98  return " ($ also escapes ':')";
99  default:
100  return "";
101  }
102 }
103 
104 string Lexer::DescribeLastError() {
105  if (last_token_) {
106  switch (last_token_[0]) {
107  case '\t':
108  return "tabs are not allowed, use spaces";
109  }
110  }
111  return "lexing error";
112 }
113 
114 void Lexer::UnreadToken() {
115  ofs_ = last_token_;
116 }
117 
119  const char* p = ofs_;
120  const char* q;
121  const char* start;
122  Lexer::Token token;
123  for (;;) {
124  start = p;
125  /*!re2c
126  re2c:define:YYCTYPE = "unsigned char";
127  re2c:define:YYCURSOR = p;
128  re2c:define:YYMARKER = q;
129  re2c:yyfill:enable = 0;
130 
131  nul = "\000";
132  simple_varname = [a-zA-Z0-9_-]+;
133  varname = [a-zA-Z0-9_.-]+;
134 
135  [ ]*"#"[^\000\n]*"\n" { continue; }
136  [ ]*"\r\n" { token = NEWLINE; break; }
137  [ ]*"\n" { token = NEWLINE; break; }
138  [ ]+ { token = INDENT; break; }
139  "build" { token = BUILD; break; }
140  "pool" { token = POOL; break; }
141  "rule" { token = RULE; break; }
142  "default" { token = DEFAULT; break; }
143  "=" { token = EQUALS; break; }
144  ":" { token = COLON; break; }
145  "||" { token = PIPE2; break; }
146  "|" { token = PIPE; break; }
147  "include" { token = INCLUDE; break; }
148  "subninja" { token = SUBNINJA; break; }
149  varname { token = IDENT; break; }
150  nul { token = TEOF; break; }
151  [^] { token = ERROR; break; }
152  */
153  }
154 
155  last_token_ = start;
156  ofs_ = p;
157  if (token != NEWLINE && token != TEOF)
158  EatWhitespace();
159  return token;
160 }
161 
162 bool Lexer::PeekToken(Token token) {
163  Token t = ReadToken();
164  if (t == token)
165  return true;
166  UnreadToken();
167  return false;
168 }
169 
170 void Lexer::EatWhitespace() {
171  const char* p = ofs_;
172  const char* q;
173  for (;;) {
174  ofs_ = p;
175  /*!re2c
176  [ ]+ { continue; }
177  "$\r\n" { continue; }
178  "$\n" { continue; }
179  nul { break; }
180  [^] { break; }
181  */
182  }
183 }
184 
185 bool Lexer::ReadIdent(string* out) {
186  const char* p = ofs_;
187  const char* start;
188  for (;;) {
189  start = p;
190  /*!re2c
191  varname {
192  out->assign(start, p - start);
193  break;
194  }
195  [^] {
196  last_token_ = start;
197  return false;
198  }
199  */
200  }
201  last_token_ = start;
202  ofs_ = p;
203  EatWhitespace();
204  return true;
205 }
206 
207 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
208  const char* p = ofs_;
209  const char* q;
210  const char* start;
211  for (;;) {
212  start = p;
213  /*!re2c
214  [^$ :\r\n|\000]+ {
215  eval->AddText(StringPiece(start, p - start));
216  continue;
217  }
218  "\r\n" {
219  if (path)
220  p = start;
221  break;
222  }
223  [ :|\n] {
224  if (path) {
225  p = start;
226  break;
227  } else {
228  if (*start == '\n')
229  break;
230  eval->AddText(StringPiece(start, 1));
231  continue;
232  }
233  }
234  "$$" {
235  eval->AddText(StringPiece("$", 1));
236  continue;
237  }
238  "$ " {
239  eval->AddText(StringPiece(" ", 1));
240  continue;
241  }
242  "$\r\n"[ ]* {
243  continue;
244  }
245  "$\n"[ ]* {
246  continue;
247  }
248  "${"varname"}" {
249  eval->AddSpecial(StringPiece(start + 2, p - start - 3));
250  continue;
251  }
252  "$"simple_varname {
253  eval->AddSpecial(StringPiece(start + 1, p - start - 1));
254  continue;
255  }
256  "$:" {
257  eval->AddText(StringPiece(":", 1));
258  continue;
259  }
260  "$". {
261  last_token_ = start;
262  return Error("bad $-escape (literal $ must be written as $$)", err);
263  }
264  nul {
265  last_token_ = start;
266  return Error("unexpected EOF", err);
267  }
268  [^] {
269  last_token_ = start;
270  return Error(DescribeLastError(), err);
271  }
272  */
273  }
274  last_token_ = start;
275  ofs_ = p;
276  if (path)
277  EatWhitespace();
278  // Non-path strings end in newlines, so there's no whitespace to eat.
279  return true;
280 }
const char * str_
Definition: string_piece.h:66
void UnreadToken()
Rewind to the last read Token.
Definition: lexer.cc:115
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:25
static const char * TokenErrorHint(Token expected)
Return a human-readable token hint, used in error messages.
Definition: lexer.cc:96
void EatWhitespace()
Skip past whitespace (called after each read token/ident/etc.).
Definition: lexer.cc:466
bool PeekToken(Token token)
If the next token is token, read it and return true.
Definition: lexer.cc:458
Token ReadToken()
Read a Token from the Token enum.
Definition: lexer.cc:119
bool Error(const std::string &message, std::string *err)
Construct an error message with context.
Definition: lexer.cc:25
std::string DescribeLastError()
If the last token read was an ERROR token, provide more info or the empty string. ...
Definition: lexer.cc:105
Token
Definition: lexer.h:32
static const char * TokenName(Token t)
Return a human-readable form of a token, used in error messages.
Definition: lexer.cc:75
Lexer()
Definition: lexer.h:28
bool ReadIdent(std::string *out)
Read a simple identifier (a rule or variable name).
Definition: lexer.cc:549
bool ReadEvalString(EvalString *eval, bool path, std::string *err)
Read a $-escaped string.
Definition: lexer.cc:618
A tokenized string that contains variable references.
Definition: eval_env.h:34
void Start(StringPiece filename, StringPiece input)
Start parsing some input.
Definition: lexer.cc:68