Ninja
lexer.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
22 bool Lexer::Error(const string& message, string* err) {
23  // Compute line/column.
24  int line = 1;
25  const char* line_start = input_.str_;
26  for (const char* p = input_.str_; p < last_token_; ++p) {
27  if (*p == '\n') {
28  ++line;
29  line_start = p + 1;
30  }
31  }
32  int col = last_token_ ? (int)(last_token_ - line_start) : 0;
33 
34  char buf[1024];
35  snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
36  *err = buf;
37  *err += message + "\n";
38 
39  // Add some context to the message.
40  const int kTruncateColumn = 72;
41  if (col > 0 && col < kTruncateColumn) {
42  int len;
43  bool truncated = true;
44  for (len = 0; len < kTruncateColumn; ++len) {
45  if (line_start[len] == 0 || line_start[len] == '\n') {
46  truncated = false;
47  break;
48  }
49  }
50  *err += string(line_start, len);
51  if (truncated)
52  *err += "...";
53  *err += "\n";
54  *err += string(col, ' ');
55  *err += "^ near here";
56  }
57 
58  return false;
59 }
60 
61 Lexer::Lexer(const char* input) {
62  Start("input", input);
63 }
64 
65 void Lexer::Start(StringPiece filename, StringPiece input) {
66  filename_ = filename;
67  input_ = input;
68  ofs_ = input_.str_;
69  last_token_ = NULL;
70 }
71 
72 const char* Lexer::TokenName(Token t) {
73  switch (t) {
74  case ERROR: return "lexing error";
75  case BUILD: return "'build'";
76  case COLON: return "':'";
77  case DEFAULT: return "'default'";
78  case EQUALS: return "'='";
79  case IDENT: return "identifier";
80  case INCLUDE: return "'include'";
81  case INDENT: return "indent";
82  case NEWLINE: return "newline";
83  case PIPE2: return "'||'";
84  case PIPE: return "'|'";
85  case POOL: return "'pool'";
86  case RULE: return "'rule'";
87  case SUBNINJA: return "'subninja'";
88  case TEOF: return "eof";
89  }
90  return NULL; // not reached
91 }
92 
93 const char* Lexer::TokenErrorHint(Token expected) {
94  switch (expected) {
95  case COLON:
96  return " ($ also escapes ':')";
97  default:
98  return "";
99  }
100 }
101 
102 string Lexer::DescribeLastError() {
103  if (last_token_) {
104  switch (last_token_[0]) {
105  case '\t':
106  return "tabs are not allowed, use spaces";
107  }
108  }
109  return "lexing error";
110 }
111 
112 void Lexer::UnreadToken() {
113  ofs_ = last_token_;
114 }
115 
117  const char* p = ofs_;
118  const char* q;
119  const char* start;
120  Lexer::Token token;
121  for (;;) {
122  start = p;
123  /*!re2c
124  re2c:define:YYCTYPE = "unsigned char";
125  re2c:define:YYCURSOR = p;
126  re2c:define:YYMARKER = q;
127  re2c:yyfill:enable = 0;
128 
129  nul = "\000";
130  simple_varname = [a-zA-Z0-9_-]+;
131  varname = [a-zA-Z0-9_.-]+;
132 
133  [ ]*"#"[^\000\n]*"\n" { continue; }
134  [ ]*"\r\n" { token = NEWLINE; break; }
135  [ ]*"\n" { token = NEWLINE; break; }
136  [ ]+ { token = INDENT; break; }
137  "build" { token = BUILD; break; }
138  "pool" { token = POOL; break; }
139  "rule" { token = RULE; break; }
140  "default" { token = DEFAULT; break; }
141  "=" { token = EQUALS; break; }
142  ":" { token = COLON; break; }
143  "||" { token = PIPE2; break; }
144  "|" { token = PIPE; break; }
145  "include" { token = INCLUDE; break; }
146  "subninja" { token = SUBNINJA; break; }
147  varname { token = IDENT; break; }
148  nul { token = TEOF; break; }
149  [^] { token = ERROR; break; }
150  */
151  }
152 
153  last_token_ = start;
154  ofs_ = p;
155  if (token != NEWLINE && token != TEOF)
156  EatWhitespace();
157  return token;
158 }
159 
160 bool Lexer::PeekToken(Token token) {
161  Token t = ReadToken();
162  if (t == token)
163  return true;
164  UnreadToken();
165  return false;
166 }
167 
168 void Lexer::EatWhitespace() {
169  const char* p = ofs_;
170  const char* q;
171  for (;;) {
172  ofs_ = p;
173  /*!re2c
174  [ ]+ { continue; }
175  "$\r\n" { continue; }
176  "$\n" { continue; }
177  nul { break; }
178  [^] { break; }
179  */
180  }
181 }
182 
183 bool Lexer::ReadIdent(string* out) {
184  const char* p = ofs_;
185  const char* start;
186  for (;;) {
187  start = p;
188  /*!re2c
189  varname {
190  out->assign(start, p - start);
191  break;
192  }
193  [^] {
194  last_token_ = start;
195  return false;
196  }
197  */
198  }
199  last_token_ = start;
200  ofs_ = p;
201  EatWhitespace();
202  return true;
203 }
204 
205 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
206  const char* p = ofs_;
207  const char* q;
208  const char* start;
209  for (;;) {
210  start = p;
211  /*!re2c
212  [^$ :\r\n|\000]+ {
213  eval->AddText(StringPiece(start, p - start));
214  continue;
215  }
216  "\r\n" {
217  if (path)
218  p = start;
219  break;
220  }
221  [ :|\n] {
222  if (path) {
223  p = start;
224  break;
225  } else {
226  if (*start == '\n')
227  break;
228  eval->AddText(StringPiece(start, 1));
229  continue;
230  }
231  }
232  "$$" {
233  eval->AddText(StringPiece("$", 1));
234  continue;
235  }
236  "$ " {
237  eval->AddText(StringPiece(" ", 1));
238  continue;
239  }
240  "$\r\n"[ ]* {
241  continue;
242  }
243  "$\n"[ ]* {
244  continue;
245  }
246  "${"varname"}" {
247  eval->AddSpecial(StringPiece(start + 2, p - start - 3));
248  continue;
249  }
250  "$"simple_varname {
251  eval->AddSpecial(StringPiece(start + 1, p - start - 1));
252  continue;
253  }
254  "$:" {
255  eval->AddText(StringPiece(":", 1));
256  continue;
257  }
258  "$". {
259  last_token_ = start;
260  return Error("bad $-escape (literal $ must be written as $$)", err);
261  }
262  nul {
263  last_token_ = start;
264  return Error("unexpected EOF", err);
265  }
266  [^] {
267  last_token_ = start;
268  return Error(DescribeLastError(), err);
269  }
270  */
271  }
272  last_token_ = start;
273  ofs_ = p;
274  if (path)
275  EatWhitespace();
276  // Non-path strings end in newlines, so there's no whitespace to eat.
277  return true;
278 }
const char * last_token_
Definition: lexer.h:102
const char * str_
Definition: string_piece.h:67
void UnreadToken()
Rewind to the last read Token.
Definition: lexer.cc:113
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:27
static const char * TokenErrorHint(Token expected)
Return a human-readable token hint, used in error messages.
Definition: lexer.cc:94
void EatWhitespace()
Skip past whitespace (called after each read token/ident/etc.).
Definition: lexer.cc:467
bool PeekToken(Token token)
If the next token is token, read it and return true.
Definition: lexer.cc:459
bool Error(const string &message, string *err)
Construct an error message with context.
Definition: lexer.cc:23
StringPiece filename_
Definition: lexer.h:99
StringPiece input_
Definition: lexer.h:100
Token ReadToken()
Read a Token from the Token enum.
Definition: lexer.cc:117
string DescribeLastError()
If the last token read was an ERROR token, provide more info or the empty string. ...
Definition: lexer.cc:103
Token
Definition: lexer.h:32
bool ReadIdent(string *out)
Read a simple identifier (a rule or variable name).
Definition: lexer.cc:551
bool ReadEvalString(EvalString *eval, bool path, string *err)
Read a $-escaped string.
Definition: lexer.cc:621
static const char * TokenName(Token t)
Return a human-readable form of a token, used in error messages.
Definition: lexer.cc:73
Lexer()
Definition: lexer.h:28
A tokenized string that contains variable references.
Definition: eval_env.h:35
const char * ofs_
Definition: lexer.h:101
string AsString() const
Convert the slice into a full-fledged std::string, copying the data into a new string.
Definition: string_piece.h:47
void Start(StringPiece filename, StringPiece input)
Start parsing some input.
Definition: lexer.cc:66