#include "Python.h" #include "errcode.h" #include "helpers.h" #include "../lexer/state.h" static int tok_underflow_string(struct tok_state *tok) { char *end = strchr(tok->inp, '\n'); if (end != NULL) { end++; } else { end = strchr(tok->inp, '\0'); if (end == tok->inp) { tok->done = E_EOF; return 0; } } if (tok->start == NULL) { tok->buf = tok->cur; } tok->line_start = tok->cur; ADVANCE_LINENO(); tok->inp = end; return 1; } /* Set up tokenizer for UTF-8 string */ struct tok_state * _PyTokenizer_FromUTF8(const char *str, int exec_input, int preserve_crlf) { struct tok_state *tok = _PyTokenizer_tok_new(); char *translated; if (tok == NULL) return NULL; tok->input = translated = _PyTokenizer_translate_newlines(str, exec_input, preserve_crlf, tok); if (translated == NULL) { _PyTokenizer_Free(tok); return NULL; } tok->decoding_state = STATE_NORMAL; tok->enc = NULL; tok->str = translated; tok->encoding = _PyTokenizer_new_string("utf-8", 5, tok); if (!tok->encoding) { _PyTokenizer_Free(tok); return NULL; } tok->buf = tok->cur = tok->inp = translated; tok->end = translated; tok->underflow = &tok_underflow_string; return tok; }