aboutsummaryrefslogtreecommitdiff
path: root/py/lexer.h
diff options
context:
space:
mode:
authorDamien <damien.p.george@gmail.com>2013-12-21 18:17:45 +0000
committerDamien <damien.p.george@gmail.com>2013-12-21 18:17:45 +0000
commitd99b05282d14ceb0163cbcd059aa37bdb415af43 (patch)
tree978135f9fe83d3c4d5b3c95f84cb104c0092936a /py/lexer.h
parente2880aa2fdc75298df487df7519d483acb03959c (diff)
Change object representation from 1 big union to individual structs.
A big change. Micro Python objects are allocated as individual structs with the first element being a pointer to the type information (which is itself an object). This scheme follows CPython. Much more flexible, not necessarily slower, uses same heap memory, and can allocate objects statically. Also change name prefix, from py_ to mp_ (mp for Micro Python).
Diffstat (limited to 'py/lexer.h')
-rw-r--r--py/lexer.h262
1 files changed, 128 insertions, 134 deletions
diff --git a/py/lexer.h b/py/lexer.h
index 347237060..f58a38e92 100644
--- a/py/lexer.h
+++ b/py/lexer.h
@@ -1,146 +1,140 @@
-/* lexer.h -- simple tokeniser for Python implementation
+/* lexer.h -- simple tokeniser for Micro Python
+ *
+ * Uses (byte) length instead of null termination.
+ * Tokens are the same - UTF-8 with (byte) length.
*/
-#ifndef INCLUDED_LEXER_H
-#define INCLUDED_LEXER_H
-
-/* uses (byte) length instead of null termination
- * tokens are the same - UTF-8 with (byte) length
- */
-
-typedef enum _py_token_kind_t {
- PY_TOKEN_END, // 0
-
- PY_TOKEN_INVALID,
- PY_TOKEN_DEDENT_MISMATCH,
- PY_TOKEN_LONELY_STRING_OPEN,
-
- PY_TOKEN_NEWLINE, // 4
- PY_TOKEN_INDENT, // 5
- PY_TOKEN_DEDENT, // 6
-
- PY_TOKEN_NAME, // 7
- PY_TOKEN_NUMBER,
- PY_TOKEN_STRING,
- PY_TOKEN_BYTES,
-
- PY_TOKEN_ELLIPSES,
-
- PY_TOKEN_KW_FALSE, // 12
- PY_TOKEN_KW_NONE,
- PY_TOKEN_KW_TRUE,
- PY_TOKEN_KW_AND,
- PY_TOKEN_KW_AS,
- PY_TOKEN_KW_ASSERT,
- PY_TOKEN_KW_BREAK,
- PY_TOKEN_KW_CLASS,
- PY_TOKEN_KW_CONTINUE,
- PY_TOKEN_KW_DEF, // 21
- PY_TOKEN_KW_DEL,
- PY_TOKEN_KW_ELIF,
- PY_TOKEN_KW_ELSE,
- PY_TOKEN_KW_EXCEPT,
- PY_TOKEN_KW_FINALLY,
- PY_TOKEN_KW_FOR,
- PY_TOKEN_KW_FROM,
- PY_TOKEN_KW_GLOBAL,
- PY_TOKEN_KW_IF,
- PY_TOKEN_KW_IMPORT, // 31
- PY_TOKEN_KW_IN,
- PY_TOKEN_KW_IS,
- PY_TOKEN_KW_LAMBDA,
- PY_TOKEN_KW_NONLOCAL,
- PY_TOKEN_KW_NOT,
- PY_TOKEN_KW_OR,
- PY_TOKEN_KW_PASS,
- PY_TOKEN_KW_RAISE,
- PY_TOKEN_KW_RETURN,
- PY_TOKEN_KW_TRY, // 41
- PY_TOKEN_KW_WHILE,
- PY_TOKEN_KW_WITH,
- PY_TOKEN_KW_YIELD,
-
- PY_TOKEN_OP_PLUS, // 45
- PY_TOKEN_OP_MINUS,
- PY_TOKEN_OP_STAR,
- PY_TOKEN_OP_DBL_STAR,
- PY_TOKEN_OP_SLASH,
- PY_TOKEN_OP_DBL_SLASH,
- PY_TOKEN_OP_PERCENT,
- PY_TOKEN_OP_LESS,
- PY_TOKEN_OP_DBL_LESS,
- PY_TOKEN_OP_MORE,
- PY_TOKEN_OP_DBL_MORE, // 55
- PY_TOKEN_OP_AMPERSAND,
- PY_TOKEN_OP_PIPE,
- PY_TOKEN_OP_CARET,
- PY_TOKEN_OP_TILDE,
- PY_TOKEN_OP_LESS_EQUAL,
- PY_TOKEN_OP_MORE_EQUAL,
- PY_TOKEN_OP_DBL_EQUAL,
- PY_TOKEN_OP_NOT_EQUAL,
-
- PY_TOKEN_DEL_PAREN_OPEN, // 64
- PY_TOKEN_DEL_PAREN_CLOSE,
- PY_TOKEN_DEL_BRACKET_OPEN,
- PY_TOKEN_DEL_BRACKET_CLOSE,
- PY_TOKEN_DEL_BRACE_OPEN,
- PY_TOKEN_DEL_BRACE_CLOSE,
- PY_TOKEN_DEL_COMMA,
- PY_TOKEN_DEL_COLON,
- PY_TOKEN_DEL_PERIOD,
- PY_TOKEN_DEL_SEMICOLON,
- PY_TOKEN_DEL_AT, // 74
- PY_TOKEN_DEL_EQUAL,
- PY_TOKEN_DEL_PLUS_EQUAL,
- PY_TOKEN_DEL_MINUS_EQUAL,
- PY_TOKEN_DEL_STAR_EQUAL,
- PY_TOKEN_DEL_SLASH_EQUAL,
- PY_TOKEN_DEL_DBL_SLASH_EQUAL,
- PY_TOKEN_DEL_PERCENT_EQUAL,
- PY_TOKEN_DEL_AMPERSAND_EQUAL,
- PY_TOKEN_DEL_PIPE_EQUAL,
- PY_TOKEN_DEL_CARET_EQUAL, // 84
- PY_TOKEN_DEL_DBL_MORE_EQUAL,
- PY_TOKEN_DEL_DBL_LESS_EQUAL,
- PY_TOKEN_DEL_DBL_STAR_EQUAL,
- PY_TOKEN_DEL_MINUS_MORE,
-} py_token_kind_t;
-
-typedef struct _py_token_t {
+typedef enum _mp_token_kind_t {
+ MP_TOKEN_END, // 0
+
+ MP_TOKEN_INVALID,
+ MP_TOKEN_DEDENT_MISMATCH,
+ MP_TOKEN_LONELY_STRING_OPEN,
+
+ MP_TOKEN_NEWLINE, // 4
+ MP_TOKEN_INDENT, // 5
+ MP_TOKEN_DEDENT, // 6
+
+ MP_TOKEN_NAME, // 7
+ MP_TOKEN_NUMBER,
+ MP_TOKEN_STRING,
+ MP_TOKEN_BYTES,
+
+ MP_TOKEN_ELLIPSES,
+
+ MP_TOKEN_KW_FALSE, // 12
+ MP_TOKEN_KW_NONE,
+ MP_TOKEN_KW_TRUE,
+ MP_TOKEN_KW_AND,
+ MP_TOKEN_KW_AS,
+ MP_TOKEN_KW_ASSERT,
+ MP_TOKEN_KW_BREAK,
+ MP_TOKEN_KW_CLASS,
+ MP_TOKEN_KW_CONTINUE,
+ MP_TOKEN_KW_DEF, // 21
+ MP_TOKEN_KW_DEL,
+ MP_TOKEN_KW_ELIF,
+ MP_TOKEN_KW_ELSE,
+ MP_TOKEN_KW_EXCEPT,
+ MP_TOKEN_KW_FINALLY,
+ MP_TOKEN_KW_FOR,
+ MP_TOKEN_KW_FROM,
+ MP_TOKEN_KW_GLOBAL,
+ MP_TOKEN_KW_IF,
+ MP_TOKEN_KW_IMPORT, // 31
+ MP_TOKEN_KW_IN,
+ MP_TOKEN_KW_IS,
+ MP_TOKEN_KW_LAMBDA,
+ MP_TOKEN_KW_NONLOCAL,
+ MP_TOKEN_KW_NOT,
+ MP_TOKEN_KW_OR,
+ MP_TOKEN_KW_PASS,
+ MP_TOKEN_KW_RAISE,
+ MP_TOKEN_KW_RETURN,
+ MP_TOKEN_KW_TRY, // 41
+ MP_TOKEN_KW_WHILE,
+ MP_TOKEN_KW_WITH,
+ MP_TOKEN_KW_YIELD,
+
+ MP_TOKEN_OP_PLUS, // 45
+ MP_TOKEN_OP_MINUS,
+ MP_TOKEN_OP_STAR,
+ MP_TOKEN_OP_DBL_STAR,
+ MP_TOKEN_OP_SLASH,
+ MP_TOKEN_OP_DBL_SLASH,
+ MP_TOKEN_OP_PERCENT,
+ MP_TOKEN_OP_LESS,
+ MP_TOKEN_OP_DBL_LESS,
+ MP_TOKEN_OP_MORE,
+ MP_TOKEN_OP_DBL_MORE, // 55
+ MP_TOKEN_OP_AMPERSAND,
+ MP_TOKEN_OP_PIPE,
+ MP_TOKEN_OP_CARET,
+ MP_TOKEN_OP_TILDE,
+ MP_TOKEN_OP_LESS_EQUAL,
+ MP_TOKEN_OP_MORE_EQUAL,
+ MP_TOKEN_OP_DBL_EQUAL,
+ MP_TOKEN_OP_NOT_EQUAL,
+
+ MP_TOKEN_DEL_PAREN_OPEN, // 64
+ MP_TOKEN_DEL_PAREN_CLOSE,
+ MP_TOKEN_DEL_BRACKET_OPEN,
+ MP_TOKEN_DEL_BRACKET_CLOSE,
+ MP_TOKEN_DEL_BRACE_OPEN,
+ MP_TOKEN_DEL_BRACE_CLOSE,
+ MP_TOKEN_DEL_COMMA,
+ MP_TOKEN_DEL_COLON,
+ MP_TOKEN_DEL_PERIOD,
+ MP_TOKEN_DEL_SEMICOLON,
+ MP_TOKEN_DEL_AT, // 74
+ MP_TOKEN_DEL_EQUAL,
+ MP_TOKEN_DEL_PLUS_EQUAL,
+ MP_TOKEN_DEL_MINUS_EQUAL,
+ MP_TOKEN_DEL_STAR_EQUAL,
+ MP_TOKEN_DEL_SLASH_EQUAL,
+ MP_TOKEN_DEL_DBL_SLASH_EQUAL,
+ MP_TOKEN_DEL_PERCENT_EQUAL,
+ MP_TOKEN_DEL_AMPERSAND_EQUAL,
+ MP_TOKEN_DEL_PIPE_EQUAL,
+ MP_TOKEN_DEL_CARET_EQUAL, // 84
+ MP_TOKEN_DEL_DBL_MORE_EQUAL,
+ MP_TOKEN_DEL_DBL_LESS_EQUAL,
+ MP_TOKEN_DEL_DBL_STAR_EQUAL,
+ MP_TOKEN_DEL_MINUS_MORE,
+} mp_token_kind_t;
+
+typedef struct _mp_token_t {
const char *src_name; // name of source
uint src_line; // source line
uint src_column; // source column
- py_token_kind_t kind; // kind of token
+ mp_token_kind_t kind; // kind of token
const char *str; // string of token (valid only while this token is current token)
uint len; // (byte) length of string of token
-} py_token_t;
+} mp_token_t;
// the next-char function must return the next character in the stream
-// it must return PY_LEXER_CHAR_EOF if end of stream
-// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
-#define PY_LEXER_CHAR_EOF (-1)
-typedef unichar (*py_lexer_stream_next_char_t)(void*);
-typedef void (*py_lexer_stream_close_t)(void*);
-
-typedef struct _py_lexer_t py_lexer_t;
-
-void py_token_show(const py_token_t *tok);
-void py_token_show_error_prefix(const py_token_t *tok);
-bool py_token_show_error(const py_token_t *tok, const char *msg);
-
-py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close);
-void py_lexer_free(py_lexer_t *lex);
-void py_lexer_to_next(py_lexer_t *lex);
-const py_token_t *py_lexer_cur(const py_lexer_t *lex);
-bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
+// it must return MP_LEXER_CHAR_EOF if end of stream
+// it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF
+#define MP_LEXER_CHAR_EOF (-1)
+typedef unichar (*mp_lexer_stream_next_char_t)(void*);
+typedef void (*mp_lexer_stream_close_t)(void*);
+
+typedef struct _mp_lexer_t mp_lexer_t;
+
+void mp_token_show(const mp_token_t *tok);
+void mp_token_show_error_prefix(const mp_token_t *tok);
+bool mp_token_show_error(const mp_token_t *tok, const char *msg);
+
+mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
+void mp_lexer_free(mp_lexer_t *lex);
+void mp_lexer_to_next(mp_lexer_t *lex);
+const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
+bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
/* unused
-bool py_lexer_is_str(py_lexer_t *lex, const char *str);
-bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
-bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
+bool mp_lexer_is_str(mp_lexer_t *lex, const char *str);
+bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind);
+bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str);
*/
-bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
-bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);
-
-#endif /* INCLUDED_LEXER_H */
+bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg);
+bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);