aboutsummaryrefslogtreecommitdiff
path: root/py/lexer.h
diff options
context:
space:
mode:
authorDamien <damien.p.george@gmail.com>2013-10-04 19:53:11 +0100
committerDamien <damien.p.george@gmail.com>2013-10-04 19:53:11 +0100
commit429d71943d6b94c7dc3c40a39ff1a09742c77dc2 (patch)
treeb0fd643076254656c358806d7e47c18f796a54f3 /py/lexer.h
Initial commit.
Diffstat (limited to 'py/lexer.h')
-rw-r--r--py/lexer.h141
1 files changed, 141 insertions, 0 deletions
diff --git a/py/lexer.h b/py/lexer.h
new file mode 100644
index 000000000..32ab48a08
--- /dev/null
+++ b/py/lexer.h
@@ -0,0 +1,141 @@
+/* lexer.h -- simple tokeniser for Python implementation
+ */
+
+#ifndef INCLUDED_LEXER_H
+#define INCLUDED_LEXER_H
+
+/* uses (byte) length instead of null termination
+ * tokens are the same - UTF-8 with (byte) length
+ */
+
+typedef enum _py_token_kind_t {
+ PY_TOKEN_END, // 0
+
+ PY_TOKEN_INVALID,
+ PY_TOKEN_LONELY_STRING_OPEN,
+
+ PY_TOKEN_NEWLINE, // 3
+ PY_TOKEN_INDENT, // 4
+ PY_TOKEN_DEDENT, // 5
+
+ PY_TOKEN_NAME, // 6
+ PY_TOKEN_NUMBER,
+ PY_TOKEN_STRING,
+ PY_TOKEN_BYTES,
+
+ PY_TOKEN_ELLIPSES,
+
+ PY_TOKEN_KW_FALSE, // 11
+ PY_TOKEN_KW_NONE,
+ PY_TOKEN_KW_TRUE,
+ PY_TOKEN_KW_AND,
+ PY_TOKEN_KW_AS,
+ PY_TOKEN_KW_ASSERT,
+ PY_TOKEN_KW_BREAK,
+ PY_TOKEN_KW_CLASS,
+ PY_TOKEN_KW_CONTINUE,
+ PY_TOKEN_KW_DEF, // 20
+ PY_TOKEN_KW_DEL,
+ PY_TOKEN_KW_ELIF,
+ PY_TOKEN_KW_ELSE,
+ PY_TOKEN_KW_EXCEPT,
+ PY_TOKEN_KW_FINALLY,
+ PY_TOKEN_KW_FOR,
+ PY_TOKEN_KW_FROM,
+ PY_TOKEN_KW_GLOBAL,
+ PY_TOKEN_KW_IF,
+ PY_TOKEN_KW_IMPORT, // 30
+ PY_TOKEN_KW_IN,
+ PY_TOKEN_KW_IS,
+ PY_TOKEN_KW_LAMBDA,
+ PY_TOKEN_KW_NONLOCAL,
+ PY_TOKEN_KW_NOT,
+ PY_TOKEN_KW_OR,
+ PY_TOKEN_KW_PASS,
+ PY_TOKEN_KW_RAISE,
+ PY_TOKEN_KW_RETURN,
+ PY_TOKEN_KW_TRY, // 40
+ PY_TOKEN_KW_WHILE,
+ PY_TOKEN_KW_WITH,
+ PY_TOKEN_KW_YIELD,
+
+ PY_TOKEN_OP_PLUS, // 44
+ PY_TOKEN_OP_MINUS,
+ PY_TOKEN_OP_STAR,
+ PY_TOKEN_OP_DBL_STAR,
+ PY_TOKEN_OP_SLASH,
+ PY_TOKEN_OP_DBL_SLASH,
+ PY_TOKEN_OP_PERCENT,
+ PY_TOKEN_OP_LESS,
+ PY_TOKEN_OP_DBL_LESS,
+ PY_TOKEN_OP_MORE,
+ PY_TOKEN_OP_DBL_MORE, // 54
+ PY_TOKEN_OP_AMPERSAND,
+ PY_TOKEN_OP_PIPE,
+ PY_TOKEN_OP_CARET,
+ PY_TOKEN_OP_TILDE,
+ PY_TOKEN_OP_LESS_EQUAL,
+ PY_TOKEN_OP_MORE_EQUAL,
+ PY_TOKEN_OP_DBL_EQUAL,
+ PY_TOKEN_OP_NOT_EQUAL,
+
+ PY_TOKEN_DEL_PAREN_OPEN, // 63
+ PY_TOKEN_DEL_PAREN_CLOSE,
+ PY_TOKEN_DEL_BRACKET_OPEN,
+ PY_TOKEN_DEL_BRACKET_CLOSE,
+ PY_TOKEN_DEL_BRACE_OPEN,
+ PY_TOKEN_DEL_BRACE_CLOSE,
+ PY_TOKEN_DEL_COMMA,
+ PY_TOKEN_DEL_COLON,
+ PY_TOKEN_DEL_PERIOD,
+ PY_TOKEN_DEL_SEMICOLON,
+ PY_TOKEN_DEL_AT, // 73
+ PY_TOKEN_DEL_EQUAL,
+ PY_TOKEN_DEL_PLUS_EQUAL,
+ PY_TOKEN_DEL_MINUS_EQUAL,
+ PY_TOKEN_DEL_STAR_EQUAL,
+ PY_TOKEN_DEL_SLASH_EQUAL,
+ PY_TOKEN_DEL_DBL_SLASH_EQUAL,
+ PY_TOKEN_DEL_PERCENT_EQUAL,
+ PY_TOKEN_DEL_AMPERSAND_EQUAL,
+ PY_TOKEN_DEL_PIPE_EQUAL,
+ PY_TOKEN_DEL_CARET_EQUAL, // 83
+ PY_TOKEN_DEL_DBL_MORE_EQUAL,
+ PY_TOKEN_DEL_DBL_LESS_EQUAL,
+ PY_TOKEN_DEL_DBL_STAR_EQUAL,
+ PY_TOKEN_DEL_MINUS_MORE,
+} py_token_kind_t;
+
+typedef struct _py_token_t {
+ const char *src_name; // (file) name of source
+ uint src_line; // actual source line
+ uint src_column; // actual source column
+
+ py_token_kind_t kind; // kind of token
+ uint cont_line; // token belongs to this line in a continued line
+ const char *str; // string of token
+ uint len; // (byte) length of string of token
+} py_token_t;
+
+typedef struct _py_lexer_t py_lexer_t;
+
+void py_token_show(const py_token_t *tok);
+void py_token_show_error_prefix(const py_token_t *tok);
+bool py_token_show_error(const py_token_t *tok, const char *msg);
+
+py_lexer_t *py_lexer_from_file(const char *filename);
+py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+void py_lexer_free(py_lexer_t *lex);
+void py_lexer_to_next(py_lexer_t *lex);
+const py_token_t *py_lexer_cur(const py_lexer_t *lex);
+bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
+/* unused
+bool py_lexer_is_str(py_lexer_t *lex, const char *str);
+bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
+bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
+bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
+*/
+bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
+
+#endif /* INCLUDED_LEXER_H */