1991-02-19 08:39:46 -04:00
|
|
|
|
1990-10-14 09:07:46 -03:00
|
|
|
/* Parse tree node interface */
|
|
|
|
|
2000-07-08 20:37:28 -03:00
|
|
|
#ifndef Py_NODE_H
|
|
|
|
#define Py_NODE_H
|
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
1990-10-14 09:07:46 -03:00
|
|
|
typedef struct _node {
|
2017-11-28 11:56:10 -04:00
|
|
|
short n_type;
|
|
|
|
char *n_str;
|
|
|
|
int n_lineno;
|
|
|
|
int n_col_offset;
|
|
|
|
int n_nchildren;
|
|
|
|
struct _node *n_child;
|
bpo-33416: Add end positions to Python AST (GH-11605)
The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points:
* It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`.
* I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient.
* Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear.
* For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in
```python
class C:
pass
pass
```
the end line and end column for the class definition is (2, 8).
* For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node.
* I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing.
An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
|
|
|
int n_end_lineno;
|
|
|
|
int n_end_col_offset;
|
1990-10-14 09:07:46 -03:00
|
|
|
} node;
|
|
|
|
|
2002-08-12 04:21:58 -03:00
|
|
|
PyAPI_FUNC(node *) PyNode_New(int type);
|
|
|
|
PyAPI_FUNC(int) PyNode_AddChild(node *n, int type,
|
bpo-33416: Add end positions to Python AST (GH-11605)
The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points:
* It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`.
* I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient.
* Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear.
* For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in
```python
class C:
pass
pass
```
the end line and end column for the class definition is (2, 8).
* For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node.
* I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing.
An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
|
|
|
char *str, int lineno, int col_offset,
|
|
|
|
int end_lineno, int end_col_offset);
|
2002-08-12 04:21:58 -03:00
|
|
|
PyAPI_FUNC(void) PyNode_Free(node *n);
|
2012-08-03 09:28:37 -03:00
|
|
|
#ifndef Py_LIMITED_API
|
2014-11-18 11:30:15 -04:00
|
|
|
PyAPI_FUNC(Py_ssize_t) _PyNode_SizeOf(node *n);
|
2012-08-03 09:28:37 -03:00
|
|
|
#endif
|
1990-10-14 09:07:46 -03:00
|
|
|
|
|
|
|
/* Node access functions */
|
2017-11-28 11:56:10 -04:00
|
|
|
#define NCH(n) ((n)->n_nchildren)
|
2015-03-21 04:25:53 -03:00
|
|
|
|
2017-11-28 11:56:10 -04:00
|
|
|
#define CHILD(n, i) (&(n)->n_child[i])
|
|
|
|
#define RCHILD(n, i) (CHILD(n, NCH(n) + i))
|
|
|
|
#define TYPE(n) ((n)->n_type)
|
|
|
|
#define STR(n) ((n)->n_str)
|
2012-01-16 18:29:05 -04:00
|
|
|
#define LINENO(n) ((n)->n_lineno)
|
1990-10-14 09:07:46 -03:00
|
|
|
|
|
|
|
/* Assert that the type of a node is what we expect */
|
2001-10-15 14:23:13 -03:00
|
|
|
#define REQ(n, type) assert(TYPE(n) == (type))
|
1990-12-20 11:06:42 -04:00
|
|
|
|
2002-08-12 04:21:58 -03:00
|
|
|
PyAPI_FUNC(void) PyNode_ListTree(node *);
|
bpo-33416: Add end positions to Python AST (GH-11605)
The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points:
* It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`.
* I add end position information to both CST and AST. Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient.
* Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear.
* For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in
```python
class C:
pass
pass
```
the end line and end column for the class definition is (2, 8).
* For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node.
* I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing.
An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 07:18:22 -04:00
|
|
|
void _PyNode_FinalizeEndPos(node *n); // helper also used in parsetok.c
|
1993-07-28 06:05:47 -03:00
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif /* !Py_NODE_H */
|