1.1 --- a/compiler/transformer.py Sun Jan 08 00:27:02 2017 +0100 1.2 +++ b/compiler/transformer.py Sun Jan 08 20:20:39 2017 +0100 1.3 @@ -26,8 +26,8 @@ 1.4 # and replace OWNER, ORGANIZATION, and YEAR as appropriate. 1.5 1.6 from compiler.ast import * 1.7 -import parser 1.8 -import symbol 1.9 +import pyparser.pyparse as parser 1.10 +from pyparser.pygram import syms as symbol 1.11 import token 1.12 1.13 class WalkerError(StandardError):
2.1 --- a/docs/COPYING.txt Sun Jan 08 00:27:02 2017 +0100 2.2 +++ b/docs/COPYING.txt Sun Jan 08 20:20:39 2017 +0100 2.3 @@ -16,3 +16,25 @@ 2.4 2.5 You should have received a copy of the GNU General Public License along with 2.6 this program. If not, see <http://www.gnu.org/licenses/>. 2.7 + 2.8 +Licence Details for compiler 2.9 +---------------------------- 2.10 + 2.11 +See LICENCE-Python.txt for the licensing details applying to the compiler 2.12 +package. 2.13 + 2.14 +The compiler package has been modified to only provide essential abstract 2.15 +syntax tree support for Lichen. The following applies to these modifications: 2.16 + 2.17 +Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 2.18 + 2.19 +Licence Details for pyparser 2.20 +---------------------------- 2.21 + 2.22 +See LICENSE-PyPy.txt for the licensing details applying to the pyparser 2.23 +package. 2.24 + 2.25 +The pyparser package has been modified to work with the modified compiler 2.26 +package. The following applies to these modifications: 2.27 + 2.28 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/docs/LICENCE-Python.txt Sun Jan 08 20:20:39 2017 +0100 3.3 @@ -0,0 +1,254 @@ 3.4 +A. HISTORY OF THE SOFTWARE 3.5 +========================== 3.6 + 3.7 +Python was created in the early 1990s by Guido van Rossum at Stichting 3.8 +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands 3.9 +as a successor of a language called ABC. Guido remains Python's 3.10 +principal author, although it includes many contributions from others. 3.11 + 3.12 +In 1995, Guido continued his work on Python at the Corporation for 3.13 +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) 3.14 +in Reston, Virginia where he released several versions of the 3.15 +software. 3.16 + 3.17 +In May 2000, Guido and the Python core development team moved to 3.18 +BeOpen.com to form the BeOpen PythonLabs team. In October of the same 3.19 +year, the PythonLabs team moved to Digital Creations (now Zope 3.20 +Corporation, see http://www.zope.com). In 2001, the Python Software 3.21 +Foundation (PSF, see http://www.python.org/psf/) was formed, a 3.22 +non-profit organization created specifically to own Python-related 3.23 +Intellectual Property. Zope Corporation is a sponsoring member of 3.24 +the PSF. 3.25 + 3.26 +All Python releases are Open Source (see http://www.opensource.org for 3.27 +the Open Source Definition). Historically, most, but not all, Python 3.28 +releases have also been GPL-compatible; the table below summarizes 3.29 +the various releases. 3.30 + 3.31 + Release Derived Year Owner GPL- 3.32 + from compatible? (1) 3.33 + 3.34 + 0.9.0 thru 1.2 1991-1995 CWI yes 3.35 + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes 3.36 + 1.6 1.5.2 2000 CNRI no 3.37 + 2.0 1.6 2000 BeOpen.com no 3.38 + 1.6.1 1.6 2001 CNRI yes (2) 3.39 + 2.1 2.0+1.6.1 2001 PSF no 3.40 + 2.0.1 2.0+1.6.1 2001 PSF yes 3.41 + 2.1.1 2.1+2.0.1 2001 PSF yes 3.42 + 2.1.2 2.1.1 2002 PSF yes 3.43 + 2.1.3 2.1.2 2002 PSF yes 3.44 + 2.2 and above 2.1.1 2001-now PSF yes 3.45 + 3.46 +Footnotes: 3.47 + 3.48 +(1) GPL-compatible doesn't mean that we're distributing Python under 3.49 + the GPL. All Python licenses, unlike the GPL, let you distribute 3.50 + a modified version without making your changes open source. The 3.51 + GPL-compatible licenses make it possible to combine Python with 3.52 + other software that is released under the GPL; the others don't. 3.53 + 3.54 +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, 3.55 + because its license has a choice of law clause. According to 3.56 + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 3.57 + is "not incompatible" with the GPL. 3.58 + 3.59 +Thanks to the many outside volunteers who have worked under Guido's 3.60 +direction to make these releases possible. 3.61 + 3.62 + 3.63 +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON 3.64 +=============================================================== 3.65 + 3.66 +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 3.67 +-------------------------------------------- 3.68 + 3.69 +1. This LICENSE AGREEMENT is between the Python Software Foundation 3.70 +("PSF"), and the Individual or Organization ("Licensee") accessing and 3.71 +otherwise using this software ("Python") in source or binary form and 3.72 +its associated documentation. 3.73 + 3.74 +2. Subject to the terms and conditions of this License Agreement, PSF hereby 3.75 +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, 3.76 +analyze, test, perform and/or display publicly, prepare derivative works, 3.77 +distribute, and otherwise use Python alone or in any derivative version, 3.78 +provided, however, that PSF's License Agreement and PSF's notice of copyright, 3.79 +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 3.80 +2011, 2012, 2013, 2014 Python Software Foundation; All Rights Reserved" are retained 3.81 +in Python alone or in any derivative version prepared by Licensee. 3.82 + 3.83 +3. In the event Licensee prepares a derivative work that is based on 3.84 +or incorporates Python or any part thereof, and wants to make 3.85 +the derivative work available to others as provided herein, then 3.86 +Licensee hereby agrees to include in any such work a brief summary of 3.87 +the changes made to Python. 3.88 + 3.89 +4. PSF is making Python available to Licensee on an "AS IS" 3.90 +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 3.91 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND 3.92 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 3.93 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT 3.94 +INFRINGE ANY THIRD PARTY RIGHTS. 3.95 + 3.96 +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.97 +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 3.98 +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, 3.99 +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 3.100 + 3.101 +6. This License Agreement will automatically terminate upon a material 3.102 +breach of its terms and conditions. 3.103 + 3.104 +7. Nothing in this License Agreement shall be deemed to create any 3.105 +relationship of agency, partnership, or joint venture between PSF and 3.106 +Licensee. This License Agreement does not grant permission to use PSF 3.107 +trademarks or trade name in a trademark sense to endorse or promote 3.108 +products or services of Licensee, or any third party. 3.109 + 3.110 +8. By copying, installing or otherwise using Python, Licensee 3.111 +agrees to be bound by the terms and conditions of this License 3.112 +Agreement. 3.113 + 3.114 + 3.115 +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 3.116 +------------------------------------------- 3.117 + 3.118 +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 3.119 + 3.120 +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an 3.121 +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the 3.122 +Individual or Organization ("Licensee") accessing and otherwise using 3.123 +this software in source or binary form and its associated 3.124 +documentation ("the Software"). 3.125 + 3.126 +2. Subject to the terms and conditions of this BeOpen Python License 3.127 +Agreement, BeOpen hereby grants Licensee a non-exclusive, 3.128 +royalty-free, world-wide license to reproduce, analyze, test, perform 3.129 +and/or display publicly, prepare derivative works, distribute, and 3.130 +otherwise use the Software alone or in any derivative version, 3.131 +provided, however, that the BeOpen Python License is retained in the 3.132 +Software, alone or in any derivative version prepared by Licensee. 3.133 + 3.134 +3. BeOpen is making the Software available to Licensee on an "AS IS" 3.135 +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 3.136 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND 3.137 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 3.138 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT 3.139 +INFRINGE ANY THIRD PARTY RIGHTS. 3.140 + 3.141 +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE 3.142 +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS 3.143 +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY 3.144 +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 3.145 + 3.146 +5. This License Agreement will automatically terminate upon a material 3.147 +breach of its terms and conditions. 3.148 + 3.149 +6. This License Agreement shall be governed by and interpreted in all 3.150 +respects by the law of the State of California, excluding conflict of 3.151 +law provisions. Nothing in this License Agreement shall be deemed to 3.152 +create any relationship of agency, partnership, or joint venture 3.153 +between BeOpen and Licensee. This License Agreement does not grant 3.154 +permission to use BeOpen trademarks or trade names in a trademark 3.155 +sense to endorse or promote products or services of Licensee, or any 3.156 +third party. As an exception, the "BeOpen Python" logos available at 3.157 +http://www.pythonlabs.com/logos.html may be used according to the 3.158 +permissions granted on that web page. 3.159 + 3.160 +7. By copying, installing or otherwise using the software, Licensee 3.161 +agrees to be bound by the terms and conditions of this License 3.162 +Agreement. 3.163 + 3.164 + 3.165 +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 3.166 +--------------------------------------- 3.167 + 3.168 +1. This LICENSE AGREEMENT is between the Corporation for National 3.169 +Research Initiatives, having an office at 1895 Preston White Drive, 3.170 +Reston, VA 20191 ("CNRI"), and the Individual or Organization 3.171 +("Licensee") accessing and otherwise using Python 1.6.1 software in 3.172 +source or binary form and its associated documentation. 3.173 + 3.174 +2. Subject to the terms and conditions of this License Agreement, CNRI 3.175 +hereby grants Licensee a nonexclusive, royalty-free, world-wide 3.176 +license to reproduce, analyze, test, perform and/or display publicly, 3.177 +prepare derivative works, distribute, and otherwise use Python 1.6.1 3.178 +alone or in any derivative version, provided, however, that CNRI's 3.179 +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) 3.180 +1995-2001 Corporation for National Research Initiatives; All Rights 3.181 +Reserved" are retained in Python 1.6.1 alone or in any derivative 3.182 +version prepared by Licensee. Alternately, in lieu of CNRI's License 3.183 +Agreement, Licensee may substitute the following text (omitting the 3.184 +quotes): "Python 1.6.1 is made available subject to the terms and 3.185 +conditions in CNRI's License Agreement. This Agreement together with 3.186 +Python 1.6.1 may be located on the Internet using the following 3.187 +unique, persistent identifier (known as a handle): 1895.22/1013. This 3.188 +Agreement may also be obtained from a proxy server on the Internet 3.189 +using the following URL: http://hdl.handle.net/1895.22/1013". 3.190 + 3.191 +3. In the event Licensee prepares a derivative work that is based on 3.192 +or incorporates Python 1.6.1 or any part thereof, and wants to make 3.193 +the derivative work available to others as provided herein, then 3.194 +Licensee hereby agrees to include in any such work a brief summary of 3.195 +the changes made to Python 1.6.1. 3.196 + 3.197 +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" 3.198 +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR 3.199 +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND 3.200 +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS 3.201 +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT 3.202 +INFRINGE ANY THIRD PARTY RIGHTS. 3.203 + 3.204 +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 3.205 +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS 3.206 +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, 3.207 +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 3.208 + 3.209 +6. This License Agreement will automatically terminate upon a material 3.210 +breach of its terms and conditions. 3.211 + 3.212 +7. This License Agreement shall be governed by the federal 3.213 +intellectual property law of the United States, including without 3.214 +limitation the federal copyright law, and, to the extent such 3.215 +U.S. federal law does not apply, by the law of the Commonwealth of 3.216 +Virginia, excluding Virginia's conflict of law provisions. 3.217 +Notwithstanding the foregoing, with regard to derivative works based 3.218 +on Python 1.6.1 that incorporate non-separable material that was 3.219 +previously distributed under the GNU General Public License (GPL), the 3.220 +law of the Commonwealth of Virginia shall govern this License 3.221 +Agreement only as to issues arising under or with respect to 3.222 +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this 3.223 +License Agreement shall be deemed to create any relationship of 3.224 +agency, partnership, or joint venture between CNRI and Licensee. This 3.225 +License Agreement does not grant permission to use CNRI trademarks or 3.226 +trade name in a trademark sense to endorse or promote products or 3.227 +services of Licensee, or any third party. 3.228 + 3.229 +8. By clicking on the "ACCEPT" button where indicated, or by copying, 3.230 +installing or otherwise using Python 1.6.1, Licensee agrees to be 3.231 +bound by the terms and conditions of this License Agreement. 3.232 + 3.233 + ACCEPT 3.234 + 3.235 + 3.236 +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 3.237 +-------------------------------------------------- 3.238 + 3.239 +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, 3.240 +The Netherlands. All rights reserved. 3.241 + 3.242 +Permission to use, copy, modify, and distribute this software and its 3.243 +documentation for any purpose and without fee is hereby granted, 3.244 +provided that the above copyright notice appear in all copies and that 3.245 +both that copyright notice and this permission notice appear in 3.246 +supporting documentation, and that the name of Stichting Mathematisch 3.247 +Centrum or CWI not be used in advertising or publicity pertaining to 3.248 +distribution of the software without specific, written prior 3.249 +permission. 3.250 + 3.251 +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO 3.252 +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 3.253 +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE 3.254 +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 3.255 +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 3.256 +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 3.257 +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/docs/LICENSE-PyPy.txt Sun Jan 08 20:20:39 2017 +0100 4.3 @@ -0,0 +1,479 @@ 4.4 +License 4.5 +======= 4.6 + 4.7 +Except when otherwise stated (look for LICENSE files in directories or 4.8 +information at the beginning of each file) all software and documentation in 4.9 +the 'rpython', 'pypy', 'ctype_configure', 'dotviewer', 'demo', 'lib_pypy', 4.10 +'py', and '_pytest' directories is licensed as follows: 4.11 + 4.12 + The MIT License 4.13 + 4.14 + Permission is hereby granted, free of charge, to any person 4.15 + obtaining a copy of this software and associated documentation 4.16 + files (the "Software"), to deal in the Software without 4.17 + restriction, including without limitation the rights to use, 4.18 + copy, modify, merge, publish, distribute, sublicense, and/or 4.19 + sell copies of the Software, and to permit persons to whom the 4.20 + Software is furnished to do so, subject to the following conditions: 4.21 + 4.22 + The above copyright notice and this permission notice shall be included 4.23 + in all copies or substantial portions of the Software. 4.24 + 4.25 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 4.26 + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 4.27 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 4.28 + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 4.29 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 4.30 + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 4.31 + DEALINGS IN THE SOFTWARE. 4.32 + 4.33 + 4.34 +PyPy Copyright holders 2003-2017 4.35 +----------------------------------- 4.36 + 4.37 +Except when otherwise stated (look for LICENSE files or information at 4.38 +the beginning of each file) the files in the 'pypy' directory are each 4.39 +copyrighted by one or more of the following people and organizations: 4.40 + 4.41 + Armin Rigo 4.42 + Maciej Fijalkowski 4.43 + Carl Friedrich Bolz 4.44 + Amaury Forgeot d'Arc 4.45 + Antonio Cuni 4.46 + Samuele Pedroni 4.47 + Matti Picus 4.48 + Alex Gaynor 4.49 + Philip Jenvey 4.50 + Ronan Lamy 4.51 + Brian Kearns 4.52 + Richard Plangger 4.53 + Michael Hudson 4.54 + Manuel Jacob 4.55 + David Schneider 4.56 + Holger Krekel 4.57 + Christian Tismer 4.58 + Hakan Ardo 4.59 + Benjamin Peterson 4.60 + Anders Chrigstrom 4.61 + Eric van Riet Paap 4.62 + Wim Lavrijsen 4.63 + Richard Emslie 4.64 + Alexander Schremmer 4.65 + Dan Villiom Podlaski Christiansen 4.66 + Remi Meier 4.67 + Lukas Diekmann 4.68 + Sven Hager 4.69 + Anders Lehmann 4.70 + Aurelien Campeas 4.71 + Niklaus Haldimann 4.72 + Camillo Bruni 4.73 + Laura Creighton 4.74 + Romain Guillebert 4.75 + Toon Verwaest 4.76 + Leonardo Santagada 4.77 + Seo Sanghyeon 4.78 + Ronny Pfannschmidt 4.79 + Justin Peel 4.80 + Raffael Tfirst 4.81 + David Edelsohn 4.82 + Anders Hammarquist 4.83 + Jakub Gustak 4.84 + Gregor Wegberg 4.85 + Guido Wesdorp 4.86 + Lawrence Oluyede 4.87 + Bartosz Skowron 4.88 + Daniel Roberts 4.89 + Niko Matsakis 4.90 + Adrien Di Mascio 4.91 + Alexander Hesse 4.92 + Ludovic Aubry 4.93 + Jacob Hallen 4.94 + Jason Creighton 4.95 + Mark Young 4.96 + Alex Martelli 4.97 + Spenser Bauman 4.98 + Michal Bendowski 4.99 + stian 4.100 + Jan de Mooij 4.101 + Tyler Wade 4.102 + Vincent Legoll 4.103 + Michael Foord 4.104 + Stephan Diehl 4.105 + Stefan Schwarzer 4.106 + Valentino Volonghi 4.107 + Tomek Meka 4.108 + Stefano Rivera 4.109 + Patrick Maupin 4.110 + Devin Jeanpierre 4.111 + Bob Ippolito 4.112 + Bruno Gola 4.113 + David Malcolm 4.114 + Jean-Paul Calderone 4.115 + Timo Paulssen 4.116 + Edd Barrett 4.117 + Squeaky 4.118 + Marius Gedminas 4.119 + Alexandre Fayolle 4.120 + Simon Burton 4.121 + Martin Matusiak 4.122 + Nicolas Truessel 4.123 + Konstantin Lopuhin 4.124 + Wenzhu Man 4.125 + John Witulski 4.126 + Laurence Tratt 4.127 + Ivan Sichmann Freitas 4.128 + Greg Price 4.129 + Dario Bertini 4.130 + Mark Pearse 4.131 + Simon Cross 4.132 + Jeremy Thurgood 4.133 + Andreas Stührk 4.134 + Tobias Pape 4.135 + Jean-Philippe St. Pierre 4.136 + Guido van Rossum 4.137 + Pavel Vinogradov 4.138 + PaweÅ‚ Piotr Przeradowski 4.139 + Paul deGrandis 4.140 + Ilya Osadchiy 4.141 + marky1991 4.142 + Tobias Oberstein 4.143 + Adrian Kuhn 4.144 + Boris Feigin 4.145 + tav 4.146 + Taavi Burns 4.147 + Georg Brandl 4.148 + Bert Freudenberg 4.149 + Stian Andreassen 4.150 + Wanja Saatkamp 4.151 + Gerald Klix 4.152 + Mike Blume 4.153 + Oscar Nierstrasz 4.154 + Stefan H. Muller 4.155 + Rami Chowdhury 4.156 + Eugene Oden 4.157 + Henry Mason 4.158 + Vasily Kuznetsov 4.159 + Preston Timmons 4.160 + David Ripton 4.161 + Jeff Terrace 4.162 + Tim Felgentreff 4.163 + Dusty Phillips 4.164 + Lukas Renggli 4.165 + Guenter Jantzen 4.166 + William Leslie 4.167 + Ned Batchelder 4.168 + Anton Gulenko 4.169 + Amit Regmi 4.170 + Ben Young 4.171 + Jasper Schulz 4.172 + Nicolas Chauvat 4.173 + Andrew Durdin 4.174 + Andrew Chambers 4.175 + Sergey Matyunin 4.176 + Michael Schneider 4.177 + Nicholas Riley 4.178 + Jason Chu 4.179 + Igor Trindade Oliveira 4.180 + Yichao Yu 4.181 + Rocco Moretti 4.182 + Gintautas Miliauskas 4.183 + Michael Twomey 4.184 + Lucian Branescu Mihaila 4.185 + anatoly techtonik 4.186 + Gabriel Lavoie 4.187 + Olivier Dormond 4.188 + Jared Grubb 4.189 + Karl Bartel 4.190 + Wouter van Heyst 4.191 + Brian Dorsey 4.192 + Victor Stinner 4.193 + Andrews Medina 4.194 + Sebastian PawluÅ› 4.195 + Stuart Williams 4.196 + Daniel Patrick 4.197 + Aaron Iles 4.198 + Toby Watson 4.199 + Antoine Pitrou 4.200 + Christian Hudon 4.201 + Michael Cheng 4.202 + Justas Sadzevicius 4.203 + Gasper Zejn 4.204 + Neil Shepperd 4.205 + Stanislaw Halik 4.206 + Mikael Schönenberg 4.207 + Berkin Ilbeyi 4.208 + Faye Zhao 4.209 + Elmo Mäntynen 4.210 + Jonathan David Riehl 4.211 + Anders Qvist 4.212 + Corbin Simpson 4.213 + Chirag Jadwani 4.214 + Beatrice During 4.215 + Alex Perry 4.216 + Vaibhav Sood 4.217 + Alan McIntyre 4.218 + Reuben Cummings 4.219 + Alexander Sedov 4.220 + p_zieschang@yahoo.de 4.221 + Attila Gobi 4.222 + Christopher Pope 4.223 + Aaron Gallagher 4.224 + Florin Papa 4.225 + Christian Tismer 4.226 + Marc Abramowitz 4.227 + Dan Stromberg 4.228 + Arjun Naik 4.229 + Valentina Mukhamedzhanova 4.230 + Stefano Parmesan 4.231 + touilleMan 4.232 + Alexis Daboville 4.233 + Jens-Uwe Mager 4.234 + Carl Meyer 4.235 + Karl Ramm 4.236 + Pieter Zieschang 4.237 + Gabriel 4.238 + Lukas Vacek 4.239 + Kunal Grover 4.240 + Andrew Dalke 4.241 + Sylvain Thenault 4.242 + Jakub Stasiak 4.243 + Nathan Taylor 4.244 + Vladimir Kryachko 4.245 + Omer Katz 4.246 + Mark Williams 4.247 + Jacek Generowicz 4.248 + Alejandro J. Cura 4.249 + Jacob Oscarson 4.250 + Travis Francis Athougies 4.251 + Ryan Gonzalez 4.252 + Ian Foote 4.253 + Kristjan Valur Jonsson 4.254 + David Lievens 4.255 + Neil Blakey-Milner 4.256 + Lutz Paelike 4.257 + Lucio Torre 4.258 + Lars Wassermann 4.259 + Philipp Rustemeuer 4.260 + Henrik Vendelbo 4.261 + Richard Lancaster 4.262 + Yasir Suhail 4.263 + Dan Buch 4.264 + Miguel de Val Borro 4.265 + Artur Lisiecki 4.266 + Sergey Kishchenko 4.267 + Ignas Mikalajunas 4.268 + Alecsandru Patrascu 4.269 + Christoph Gerum 4.270 + Martin Blais 4.271 + Lene Wagner 4.272 + Catalin Gabriel Manciu 4.273 + Tomo Cocoa 4.274 + Kim Jin Su 4.275 + rafalgalczynski@gmail.com 4.276 + Toni Mattis 4.277 + Amber Brown 4.278 + Lucas Stadler 4.279 + Julian Berman 4.280 + Markus Holtermann 4.281 + roberto@goyle 4.282 + Yury V. Zaytsev 4.283 + Anna Katrina Dominguez 4.284 + Bobby Impollonia 4.285 + Vasantha Ganesh K 4.286 + Andrew Thompson 4.287 + florinpapa 4.288 + Yusei Tahara 4.289 + Aaron Tubbs 4.290 + Ben Darnell 4.291 + Roberto De Ioris 4.292 + Logan Chien 4.293 + Juan Francisco Cantero Hurtado 4.294 + Ruochen Huang 4.295 + Jeong YunWon 4.296 + Godefroid Chappelle 4.297 + Joshua Gilbert 4.298 + Dan Colish 4.299 + Christopher Armstrong 4.300 + Michael Hudson-Doyle 4.301 + Anders Sigfridsson 4.302 + Nikolay Zinov 4.303 + Jason Michalski 4.304 + Floris Bruynooghe 4.305 + Laurens Van Houtven 4.306 + Akira Li 4.307 + Gustavo Niemeyer 4.308 + Stephan Busemann 4.309 + RafaÅ‚ GaÅ‚czyÅ„ski 4.310 + Matt Bogosian 4.311 + timo 4.312 + Christian Muirhead 4.313 + Berker Peksag 4.314 + James Lan 4.315 + Volodymyr Vladymyrov 4.316 + shoma hosaka 4.317 + Ben Mather 4.318 + Niclas Olofsson 4.319 + Matthew Miller 4.320 + Rodrigo Araújo 4.321 + halgari 4.322 + Boglarka Vezer 4.323 + Chris Pressey 4.324 + Buck Golemon 4.325 + Diana Popa 4.326 + Konrad Delong 4.327 + Dinu Gherman 4.328 + Chris Lambacher 4.329 + coolbutuseless@gmail.com 4.330 + Daniil Yarancev 4.331 + Jim Baker 4.332 + Dan Crosta 4.333 + Nikolaos-Digenis Karagiannis 4.334 + James Robert 4.335 + Armin Ronacher 4.336 + Brett Cannon 4.337 + Donald Stufft 4.338 + yrttyr 4.339 + aliceinwire 4.340 + OlivierBlanvillain 4.341 + Dan Sanders 4.342 + Zooko Wilcox-O Hearn 4.343 + Tomer Chachamu 4.344 + Christopher Groskopf 4.345 + Asmo Soinio 4.346 + jiaaro 4.347 + Mads Kiilerich 4.348 + Antony Lee 4.349 + Jason Madden 4.350 + Daniel Neuhäuser 4.351 + reubano@gmail.com 4.352 + Yaroslav Fedevych 4.353 + Jim Hunziker 4.354 + Markus Unterwaditzer 4.355 + Even Wiik Thomassen 4.356 + jbs 4.357 + squeaky 4.358 + soareschen 4.359 + Jonas Pfannschmidt 4.360 + Kurt Griffiths 4.361 + Mike Bayer 4.362 + Stefan Marr 4.363 + Flavio Percoco 4.364 + Kristoffer Kleine 4.365 + Michael Chermside 4.366 + Anna Ravencroft 4.367 + pizi 4.368 + remarkablerocket 4.369 + Andrey Churin 4.370 + Zearin 4.371 + Eli Stevens 4.372 + Tobias Diaz 4.373 + Julien Phalip 4.374 + Roman Podoliaka 4.375 + Dan Loewenherz 4.376 + werat 4.377 + 4.378 + Heinrich-Heine University, Germany 4.379 + Open End AB (formerly AB Strakt), Sweden 4.380 + merlinux GmbH, Germany 4.381 + tismerysoft GmbH, Germany 4.382 + Logilab Paris, France 4.383 + DFKI GmbH, Germany 4.384 + Impara, Germany 4.385 + Change Maker, Sweden 4.386 + University of California Berkeley, USA 4.387 + Google Inc. 4.388 + King's College London 4.389 + 4.390 +The PyPy Logo as used by http://speed.pypy.org and others was created 4.391 +by Samuel Reis and is distributed on terms of Creative Commons Share Alike 4.392 +License. 4.393 + 4.394 +License for 'lib-python/2.7' 4.395 +============================ 4.396 + 4.397 +Except when otherwise stated (look for LICENSE files or copyright/license 4.398 +information at the beginning of each file) the files in the 'lib-python/2.7' 4.399 +directory are all copyrighted by the Python Software Foundation and licensed 4.400 +under the terms that you can find here: https://docs.python.org/2/license.html 4.401 + 4.402 +License for 'pypy/module/unicodedata/' 4.403 +====================================== 4.404 + 4.405 +The following files are from the website of The Unicode Consortium 4.406 +at http://www.unicode.org/. For the terms of use of these files, see 4.407 +http://www.unicode.org/terms_of_use.html . Or they are derived from 4.408 +files from the above website, and the same terms of use apply. 4.409 + 4.410 + CompositionExclusions-*.txt 4.411 + EastAsianWidth-*.txt 4.412 + LineBreak-*.txt 4.413 + UnicodeData-*.txt 4.414 + UnihanNumeric-*.txt 4.415 + 4.416 +License for 'dotviewer/font/' 4.417 +============================= 4.418 + 4.419 +Copyright (C) 2008 The Android Open Source Project 4.420 + 4.421 +Licensed under the Apache License, Version 2.0 (the "License"); 4.422 +you may not use this file except in compliance with the License. 4.423 +You may obtain a copy of the License at 4.424 + 4.425 + http://www.apache.org/licenses/LICENSE-2.0 4.426 + 4.427 +Unless required by applicable law or agreed to in writing, software 4.428 +distributed under the License is distributed on an "AS IS" BASIS, 4.429 +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 4.430 +See the License for the specific language governing permissions and 4.431 +limitations under the License. 4.432 + 4.433 +Detailed license information is contained in the NOTICE file in the 4.434 +directory. 4.435 + 4.436 + 4.437 +Licenses and Acknowledgements for Incorporated Software 4.438 +======================================================= 4.439 + 4.440 +This section is an incomplete, but growing list of licenses and 4.441 +acknowledgements for third-party software incorporated in the PyPy 4.442 +distribution. 4.443 + 4.444 +License for 'Tcl/Tk' 4.445 +-------------------- 4.446 + 4.447 +This copy of PyPy contains library code that may, when used, result in 4.448 +the Tcl/Tk library to be loaded. PyPy also includes code that may be 4.449 +regarded as being a copy of some parts of the Tcl/Tk header files. 4.450 +You may see a copy of the License for Tcl/Tk in the file 4.451 +`lib_pypy/_tkinter/license.terms` included here. 4.452 + 4.453 +License for 'bzip2' 4.454 +------------------- 4.455 + 4.456 +This copy of PyPy may be linked (dynamically or statically) with the 4.457 +bzip2 library. You may see a copy of the License for bzip2/libbzip2 at 4.458 + 4.459 + http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html 4.460 + 4.461 +License for 'openssl' 4.462 +--------------------- 4.463 + 4.464 +This copy of PyPy may be linked (dynamically or statically) with the 4.465 +openssl library. You may see a copy of the License for OpenSSL at 4.466 + 4.467 + https://www.openssl.org/source/license.html 4.468 + 4.469 +License for 'gdbm' 4.470 +------------------ 4.471 + 4.472 +The gdbm module includes code from gdbm.h, which is distributed under 4.473 +the terms of the GPL license version 2 or any later version. Thus the 4.474 +gdbm module, provided in the file lib_pypy/gdbm.py, is redistributed 4.475 +under the terms of the GPL license as well. 4.476 + 4.477 +License for 'rpython/rlib/rvmprof/src' 4.478 +-------------------------------------- 4.479 + 4.480 +The code is based on gperftools. You may see a copy of the License for it at 4.481 + 4.482 + https://github.com/gperftools/gperftools/blob/master/COPYING
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/pyparser/__init__.py Sun Jan 08 20:20:39 2017 +0100 5.3 @@ -0,0 +1,1 @@ 5.4 +# empty
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/pyparser/automata.py Sun Jan 08 20:20:39 2017 +0100 6.3 @@ -0,0 +1,120 @@ 6.4 +# ______________________________________________________________________ 6.5 +"""Module automata 6.6 + 6.7 +THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED 6.8 +TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists 6.9 +instead of a unique nested one) 6.10 + 6.11 +$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $ 6.12 +""" 6.13 +# ______________________________________________________________________ 6.14 +# Module level definitions 6.15 + 6.16 +# PYPY Modification: removed the EMPTY class as it's not needed here 6.17 + 6.18 + 6.19 +# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython 6.20 +# dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into 6.21 +# more compact strings. 6.22 +DEFAULT = object() 6.23 + 6.24 +# PYPY Modification : removed all automata functions (any, maybe, 6.25 +# newArcPair, etc.) 6.26 + 6.27 +ERROR_STATE = chr(255) 6.28 + 6.29 +class DFA: 6.30 + # ____________________________________________________________ 6.31 + def __init__(self, states, accepts, start = 0): 6.32 + """ NOT_RPYTHON """ 6.33 + assert len(states) < 255 # no support for huge amounts of states 6.34 + # construct string for looking up state transitions 6.35 + string_states = [] * len(states) 6.36 + # compute maximum 6.37 + maximum = 0 6.38 + for state in states: 6.39 + for key in state: 6.40 + if key == DEFAULT: 6.41 + continue 6.42 + maximum = max(ord(key), maximum) 6.43 + self.max_char = maximum + 1 6.44 + 6.45 + defaults = [] 6.46 + for i, state in enumerate(states): 6.47 + default = ERROR_STATE 6.48 + if DEFAULT in state: 6.49 + default = chr(state[DEFAULT]) 6.50 + defaults.append(default) 6.51 + string_state = [default] * self.max_char 6.52 + for key, value in state.iteritems(): 6.53 + if key == DEFAULT: 6.54 + continue 6.55 + assert len(key) == 1 6.56 + assert ord(key) < self.max_char 6.57 + string_state[ord(key)] = chr(value) 6.58 + string_states.extend(string_state) 6.59 + self.states = "".join(string_states) 6.60 + self.defaults = "".join(defaults) 6.61 + self.accepts = accepts 6.62 + self.start = start 6.63 + 6.64 + # ____________________________________________________________ 6.65 + 6.66 + def _next_state(self, item, crntState): 6.67 + if ord(item) >= self.max_char: 6.68 + return self.defaults[crntState] 6.69 + else: 6.70 + return self.states[crntState * self.max_char + ord(item)] 6.71 + 6.72 + def recognize(self, inVec, pos = 0): 6.73 + crntState = self.start 6.74 + lastAccept = False 6.75 + i = pos 6.76 + for i in range(pos, len(inVec)): 6.77 + item = inVec[i] 6.78 + accept = self.accepts[crntState] 6.79 + crntState = self._next_state(item, crntState) 6.80 + if crntState != ERROR_STATE: 6.81 + pass 6.82 + elif accept: 6.83 + return i 6.84 + elif lastAccept: 6.85 + # This is now needed b/c of exception cases where there are 6.86 + # transitions to dead states 6.87 + return i - 1 6.88 + else: 6.89 + return -1 6.90 + crntState = ord(crntState) 6.91 + lastAccept = accept 6.92 + # if self.states[crntState][1]: 6.93 + if self.accepts[crntState]: 6.94 + return i + 1 6.95 + elif lastAccept: 6.96 + return i 6.97 + else: 6.98 + return -1 6.99 + 6.100 +# ______________________________________________________________________ 6.101 + 6.102 +class NonGreedyDFA (DFA): 6.103 + 6.104 + def recognize(self, inVec, pos = 0): 6.105 + crntState = self.start 6.106 + i = pos 6.107 + for i in range(pos, len(inVec)): 6.108 + item = inVec[i] 6.109 + accept = self.accepts[crntState] 6.110 + if accept: 6.111 + return i 6.112 + crntState = self._next_state(item, crntState) 6.113 + if crntState == ERROR_STATE: 6.114 + return -1 6.115 + crntState = ord(crntState) 6.116 + i += 1 6.117 + if self.accepts[crntState]: 6.118 + return i 6.119 + else: 6.120 + return -1 6.121 + 6.122 +# ______________________________________________________________________ 6.123 +# End of automata.py
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/pyparser/consts.py Sun Jan 08 20:20:39 2017 +0100 7.3 @@ -0,0 +1,8 @@ 7.4 +""" 7.5 +Various flags used during the compilation process. 7.6 +""" 7.7 + 7.8 +PyCF_SOURCE_IS_UTF8 = 0x0100 7.9 +PyCF_DONT_IMPLY_DEDENT = 0x0200 7.10 +PyCF_ONLY_AST = 0x0400 7.11 +PyCF_ACCEPT_NULL_BYTES = 0x10000000 # PyPy only, for compile()
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/pyparser/data/Grammar2.5 Sun Jan 08 20:20:39 2017 +0100 8.3 @@ -0,0 +1,148 @@ 8.4 +# Grammar for Python 8.5 + 8.6 +# Note: Changing the grammar specified in this file will most likely 8.7 +# require corresponding changes in the parser module 8.8 +# (../Modules/parsermodule.c). If you can't make the changes to 8.9 +# that module yourself, please co-ordinate the required changes 8.10 +# with someone who can; ask around on python-dev for help. Fred 8.11 +# Drake <fdrake@acm.org> will probably be listening there. 8.12 + 8.13 +# NOTE WELL: You should also follow all the steps listed in PEP 306, 8.14 +# "How to Change Python's Grammar" 8.15 + 8.16 +# Commands for Kees Blom's railroad program 8.17 +#diagram:token NAME 8.18 +#diagram:token NUMBER 8.19 +#diagram:token STRING 8.20 +#diagram:token NEWLINE 8.21 +#diagram:token ENDMARKER 8.22 +#diagram:token INDENT 8.23 +#diagram:output\input python.bla 8.24 +#diagram:token DEDENT 8.25 +#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm 8.26 +#diagram:rules 8.27 + 8.28 +# Start symbols for the grammar: 8.29 +# single_input is a single interactive statement; 8.30 +# file_input is a module or sequence of commands read from an input file; 8.31 +# eval_input is the input for the eval() and input() functions. 8.32 +# NB: compound_stmt in single_input is followed by extra NEWLINE! 8.33 +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 8.34 +file_input: (NEWLINE | stmt)* ENDMARKER 8.35 +eval_input: testlist NEWLINE* ENDMARKER 8.36 + 8.37 +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 8.38 +decorators: decorator+ 8.39 +funcdef: [decorators] 'def' NAME parameters ':' suite 8.40 +parameters: '(' [varargslist] ')' 8.41 +varargslist: ((fpdef ['=' test] ',')* 8.42 + ('*' NAME [',' '**' NAME] | '**' NAME) | 8.43 + fpdef ['=' test] (',' fpdef ['=' test])* [',']) 8.44 +fpdef: NAME | '(' fplist ')' 8.45 +fplist: fpdef (',' fpdef)* [','] 8.46 + 8.47 +stmt: simple_stmt | compound_stmt 8.48 +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 8.49 +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 8.50 + import_stmt | global_stmt | exec_stmt | assert_stmt) 8.51 +expr_stmt: testlist (augassign (yield_expr|testlist) | 8.52 + ('=' (yield_expr|testlist))*) 8.53 +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 8.54 + '<<=' | '>>=' | '**=' | '//=') 8.55 +# For normal assignments, additional restrictions enforced by the interpreter 8.56 +print_stmt: 'print' ( [ test (',' test)* [','] ] | 8.57 + '>>' test [ (',' test)+ [','] ] ) 8.58 +del_stmt: 'del' exprlist 8.59 +pass_stmt: 'pass' 8.60 +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 8.61 +break_stmt: 'break' 8.62 +continue_stmt: 'continue' 8.63 +return_stmt: 'return' [testlist] 8.64 +yield_stmt: yield_expr 8.65 +raise_stmt: 'raise' [test [',' test [',' test]]] 8.66 +import_stmt: import_name | import_from 8.67 +import_name: 'import' dotted_as_names 8.68 +import_from: ('from' ('.'* dotted_name | '.'+) 8.69 + 'import' ('*' | '(' import_as_names ')' | import_as_names)) 8.70 +import_as_name: NAME [('as' | NAME) NAME] 8.71 +dotted_as_name: dotted_name [('as' | NAME) NAME] 8.72 +import_as_names: import_as_name (',' import_as_name)* [','] 8.73 +dotted_as_names: dotted_as_name (',' dotted_as_name)* 8.74 +dotted_name: NAME ('.' NAME)* 8.75 +global_stmt: 'global' NAME (',' NAME)* 8.76 +exec_stmt: 'exec' expr ['in' test [',' test]] 8.77 +assert_stmt: 'assert' test [',' test] 8.78 + 8.79 +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef 8.80 +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 8.81 +while_stmt: 'while' test ':' suite ['else' ':' suite] 8.82 +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 8.83 +try_stmt: ('try' ':' suite 8.84 + ((except_clause ':' suite)+ 8.85 + ['else' ':' suite] 8.86 + ['finally' ':' suite] | 8.87 + 'finally' ':' suite)) 8.88 +with_stmt: 'with' test [ with_var ] ':' suite 8.89 +with_var: ('as' | NAME) expr 8.90 +# NB compile.c makes sure that the default except clause is last 8.91 +except_clause: 'except' [test [',' test]] 8.92 +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 8.93 + 8.94 +# Backward compatibility cruft to support: 8.95 +# [ x for x in lambda: True, lambda: False if x() ] 8.96 +# even while also allowing: 8.97 +# lambda x: 5 if x else 2 8.98 +# (But not a mix of the two) 8.99 +testlist_safe: old_test [(',' old_test)+ [',']] 8.100 +old_test: or_test | old_lambdef 8.101 +old_lambdef: 'lambda' [varargslist] ':' old_test 8.102 + 8.103 +test: or_test ['if' or_test 'else' test] | lambdef 8.104 +or_test: and_test ('or' and_test)* 8.105 +and_test: not_test ('and' not_test)* 8.106 +not_test: 'not' not_test | comparison 8.107 +comparison: expr (comp_op expr)* 8.108 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 8.109 +expr: xor_expr ('|' xor_expr)* 8.110 +xor_expr: and_expr ('^' and_expr)* 8.111 +and_expr: shift_expr ('&' shift_expr)* 8.112 +shift_expr: arith_expr (('<<'|'>>') arith_expr)* 8.113 +arith_expr: term (('+'|'-') term)* 8.114 +term: factor (('*'|'/'|'%'|'//') factor)* 8.115 +factor: ('+'|'-'|'~') factor | power 8.116 +power: atom trailer* ['**' factor] 8.117 +atom: ('(' [yield_expr|testlist_gexp] ')' | 8.118 + '[' [listmaker] ']' | 8.119 + '{' [dictmaker] '}' | 8.120 + '`' testlist1 '`' | 8.121 + NAME | NUMBER | STRING+) 8.122 +listmaker: test ( list_for | (',' test)* [','] ) 8.123 +testlist_gexp: test ( gen_for | (',' test)* [','] ) 8.124 +lambdef: 'lambda' [varargslist] ':' test 8.125 +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 8.126 +subscriptlist: subscript (',' subscript)* [','] 8.127 +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 8.128 +sliceop: ':' [test] 8.129 +exprlist: expr (',' expr)* [','] 8.130 +testlist: test (',' test)* [','] 8.131 +dictmaker: test ':' test (',' test ':' test)* [','] 8.132 + 8.133 +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite 8.134 + 8.135 +arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) 8.136 +argument: test [gen_for] | test '=' test # Really [keyword '='] test 8.137 + 8.138 +list_iter: list_for | list_if 8.139 +list_for: 'for' exprlist 'in' testlist_safe [list_iter] 8.140 +list_if: 'if' old_test [list_iter] 8.141 + 8.142 +gen_iter: gen_for | gen_if 8.143 +gen_for: 'for' exprlist 'in' or_test [gen_iter] 8.144 +gen_if: 'if' old_test [gen_iter] 8.145 + 8.146 +testlist1: test (',' test)* 8.147 + 8.148 +# not used in grammar, but may appear in "node" passed from Parser to Compiler 8.149 +encoding_decl: NAME 8.150 + 8.151 +yield_expr: 'yield' [testlist]
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/pyparser/data/Grammar2.7 Sun Jan 08 20:20:39 2017 +0100 9.3 @@ -0,0 +1,143 @@ 9.4 +# Grammar for Python 9.5 + 9.6 +# Note: Changing the grammar specified in this file will most likely 9.7 +# require corresponding changes in the parser module 9.8 +# (../Modules/parsermodule.c). If you can't make the changes to 9.9 +# that module yourself, please co-ordinate the required changes 9.10 +# with someone who can; ask around on python-dev for help. Fred 9.11 +# Drake <fdrake@acm.org> will probably be listening there. 9.12 + 9.13 +# NOTE WELL: You should also follow all the steps listed in PEP 306, 9.14 +# "How to Change Python's Grammar" 9.15 + 9.16 +# Start symbols for the grammar: 9.17 +# single_input is a single interactive statement; 9.18 +# file_input is a module or sequence of commands read from an input file; 9.19 +# eval_input is the input for the eval() and input() functions. 9.20 +# NB: compound_stmt in single_input is followed by extra NEWLINE! 9.21 +single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 9.22 +file_input: (NEWLINE | stmt)* ENDMARKER 9.23 +eval_input: testlist NEWLINE* ENDMARKER 9.24 + 9.25 +decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 9.26 +decorators: decorator+ 9.27 +decorated: decorators (classdef | funcdef) 9.28 +funcdef: 'def' NAME parameters ':' suite 9.29 +parameters: '(' [varargslist] ')' 9.30 +varargslist: ((fpdef ['=' test] ',')* 9.31 + ('*' NAME [',' '**' NAME] | '**' NAME) | 9.32 + fpdef ['=' test] (',' fpdef ['=' test])* [',']) 9.33 +fpdef: NAME | '(' fplist ')' 9.34 +fplist: fpdef (',' fpdef)* [','] 9.35 + 9.36 +stmt: simple_stmt | compound_stmt 9.37 +simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 9.38 +small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 9.39 + import_stmt | global_stmt | exec_stmt | assert_stmt) 9.40 +expr_stmt: testlist (augassign (yield_expr|testlist) | 9.41 + ('=' (yield_expr|testlist))*) 9.42 +augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 9.43 + '<<=' | '>>=' | '**=' | '//=') 9.44 +# For normal assignments, additional restrictions enforced by the interpreter 9.45 +print_stmt: 'print' ( [ test (',' test)* [','] ] | 9.46 + '>>' test [ (',' test)+ [','] ] ) 9.47 +del_stmt: 'del' exprlist 9.48 +pass_stmt: 'pass' 9.49 +flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 9.50 +break_stmt: 'break' 9.51 +continue_stmt: 'continue' 9.52 +return_stmt: 'return' [testlist] 9.53 +yield_stmt: yield_expr 9.54 +raise_stmt: 'raise' [test [',' test [',' test]]] 9.55 +import_stmt: import_name | import_from 9.56 +import_name: 'import' dotted_as_names 9.57 +import_from: ('from' ('.'* dotted_name | '.'+) 9.58 + 'import' ('*' | '(' import_as_names ')' | import_as_names)) 9.59 +import_as_name: NAME ['as' NAME] 9.60 +dotted_as_name: dotted_name ['as' NAME] 9.61 +import_as_names: import_as_name (',' import_as_name)* [','] 9.62 +dotted_as_names: dotted_as_name (',' dotted_as_name)* 9.63 +dotted_name: NAME ('.' NAME)* 9.64 +global_stmt: 'global' NAME (',' NAME)* 9.65 +exec_stmt: 'exec' expr ['in' test [',' test]] 9.66 +assert_stmt: 'assert' test [',' test] 9.67 + 9.68 +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated 9.69 +if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 9.70 +while_stmt: 'while' test ':' suite ['else' ':' suite] 9.71 +for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 9.72 +try_stmt: ('try' ':' suite 9.73 + ((except_clause ':' suite)+ 9.74 + ['else' ':' suite] 9.75 + ['finally' ':' suite] | 9.76 + 'finally' ':' suite)) 9.77 +with_stmt: 'with' with_item (',' with_item)* ':' suite 9.78 +with_item: test ['as' expr] 9.79 +# NB compile.c makes sure that the default except clause is last 9.80 +except_clause: 'except' [test [('as' | ',') test]] 9.81 +suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 9.82 + 9.83 +# Backward compatibility cruft to support: 9.84 +# [ x for x in lambda: True, lambda: False if x() ] 9.85 +# even while also allowing: 9.86 +# lambda x: 5 if x else 2 9.87 +# (But not a mix of the two) 9.88 +testlist_safe: old_test [(',' old_test)+ [',']] 9.89 +old_test: or_test | old_lambdef 9.90 +old_lambdef: 'lambda' [varargslist] ':' old_test 9.91 + 9.92 +test: or_test ['if' or_test 'else' test] | lambdef 9.93 +or_test: and_test ('or' and_test)* 9.94 +and_test: not_test ('and' not_test)* 9.95 +not_test: 'not' not_test | comparison 9.96 +comparison: expr (comp_op expr)* 9.97 +comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 9.98 +expr: xor_expr ('|' xor_expr)* 9.99 +xor_expr: and_expr ('^' and_expr)* 9.100 +and_expr: shift_expr ('&' shift_expr)* 9.101 +shift_expr: arith_expr (('<<'|'>>') arith_expr)* 9.102 +arith_expr: term (('+'|'-') term)* 9.103 +term: factor (('*'|'/'|'%'|'//') factor)* 9.104 +factor: ('+'|'-'|'~') factor | power 9.105 +power: atom trailer* ['**' factor] 9.106 +atom: ('(' [yield_expr|testlist_comp] ')' | 9.107 + '[' [listmaker] ']' | 9.108 + '{' [dictorsetmaker] '}' | 9.109 + '`' testlist1 '`' | 9.110 + NAME | NUMBER | STRING+) 9.111 +listmaker: test ( list_for | (',' test)* [','] ) 9.112 +testlist_comp: test ( comp_for | (',' test)* [','] ) 9.113 +lambdef: 'lambda' [varargslist] ':' test 9.114 +trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 9.115 +subscriptlist: subscript (',' subscript)* [','] 9.116 +subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 9.117 +sliceop: ':' [test] 9.118 +exprlist: expr (',' expr)* [','] 9.119 +testlist: test (',' test)* [','] 9.120 +dictmaker: test ':' test (',' test ':' test)* [','] 9.121 +dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 9.122 + (test (comp_for | (',' test)* [','])) ) 9.123 + 9.124 +classdef: 'class' NAME ['(' [testlist] ')'] ':' suite 9.125 + 9.126 +arglist: (argument ',')* (argument [','] 9.127 + |'*' test (',' argument)* [',' '**' test] 9.128 + |'**' test) 9.129 +# The reason that keywords are test nodes instead of NAME is that using NAME 9.130 +# results in an ambiguity. ast.c makes sure it's a NAME. 9.131 +argument: test [comp_for] | test '=' test 9.132 + 9.133 +list_iter: list_for | list_if 9.134 +list_for: 'for' exprlist 'in' testlist_safe [list_iter] 9.135 +list_if: 'if' old_test [list_iter] 9.136 + 9.137 +comp_iter: comp_for | comp_if 9.138 +comp_for: 'for' exprlist 'in' or_test [comp_iter] 9.139 +comp_if: 'if' old_test [comp_iter] 9.140 + 9.141 +testlist1: test (',' test)* 9.142 + 9.143 +# not used in grammar, but may appear in "node" passed from Parser to Compiler 9.144 +encoding_decl: NAME 9.145 + 9.146 +yield_expr: 'yield' [testlist]
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/pyparser/error.py Sun Jan 08 20:20:39 2017 +0100 10.3 @@ -0,0 +1,40 @@ 10.4 + 10.5 +class SyntaxError(Exception): 10.6 + """Base class for exceptions raised by the parser.""" 10.7 + 10.8 + def __init__(self, msg, lineno=0, offset=0, text=None, filename=None, 10.9 + lastlineno=0): 10.10 + self.msg = msg 10.11 + self.lineno = lineno 10.12 + self.offset = offset 10.13 + self.text = text 10.14 + self.filename = filename 10.15 + self.lastlineno = lastlineno 10.16 + 10.17 + def __str__(self): 10.18 + return "%s at pos (%d, %d) in %r" % (self.__class__.__name__, 10.19 + self.lineno, 10.20 + self.offset, 10.21 + self.text) 10.22 + 10.23 +class IndentationError(SyntaxError): 10.24 + pass 10.25 + 10.26 +class ASTError(Exception): 10.27 + def __init__(self, msg, ast_node ): 10.28 + self.msg = msg 10.29 + self.ast_node = ast_node 10.30 + 10.31 + 10.32 +class TokenError(SyntaxError): 10.33 + 10.34 + def __init__(self, msg, line, lineno, column, tokens, lastlineno=0): 10.35 + SyntaxError.__init__(self, msg, lineno, column, line, 10.36 + lastlineno=lastlineno) 10.37 + self.tokens = tokens 10.38 + 10.39 +class TokenIndentationError(IndentationError): 10.40 + 10.41 + def __init__(self, msg, line, lineno, column, tokens): 10.42 + SyntaxError.__init__(self, msg, lineno, column, line) 10.43 + self.tokens = tokens
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/pyparser/genpytokenize.py Sun Jan 08 20:20:39 2017 +0100 11.3 @@ -0,0 +1,340 @@ 11.4 +#! /usr/bin/env python 11.5 +"""Module genPytokenize 11.6 + 11.7 +Generates finite state automata for recognizing Python tokens. These are hand 11.8 +coded versions of the regular expressions originally appearing in Ping's 11.9 +tokenize module in the Python standard library. 11.10 + 11.11 +When run from the command line, this should pretty print the DFA machinery. 11.12 + 11.13 +$Id: genPytokenize.py,v 1.1 2003/10/02 17:37:17 jriehl Exp $ 11.14 +""" 11.15 + 11.16 +from pyparser.pylexer import * 11.17 +from pyparser.automata import NonGreedyDFA, DFA, DEFAULT 11.18 + 11.19 +def makePyPseudoDFA (): 11.20 + import string 11.21 + states = [] 11.22 + def makeEOL(): 11.23 + return group(states, 11.24 + newArcPair(states, "\n"), 11.25 + chain(states, 11.26 + newArcPair(states, "\r"), 11.27 + maybe(states, newArcPair(states, "\n")))) 11.28 + # ____________________________________________________________ 11.29 + def makeLineCont (): 11.30 + return chain(states, 11.31 + newArcPair(states, "\\"), 11.32 + makeEOL()) 11.33 + # ____________________________________________________________ 11.34 + # Ignore stuff 11.35 + def makeWhitespace (): 11.36 + return any(states, groupStr(states, " \f\t")) 11.37 + # ____________________________________________________________ 11.38 + def makeComment (): 11.39 + return chain(states, 11.40 + newArcPair(states, "#"), 11.41 + any(states, notGroupStr(states, "\r\n"))) 11.42 + # ____________________________________________________________ 11.43 + #ignore = chain(states, 11.44 + # makeWhitespace(), 11.45 + # any(states, chain(states, 11.46 + # makeLineCont(), 11.47 + # makeWhitespace())), 11.48 + # maybe(states, makeComment())) 11.49 + # ____________________________________________________________ 11.50 + # Names 11.51 + name = chain(states, 11.52 + groupStr(states, string.letters + "_"), 11.53 + any(states, groupStr(states, 11.54 + string.letters + string.digits + "_"))) 11.55 + # ____________________________________________________________ 11.56 + # Digits 11.57 + def makeDigits (): 11.58 + return groupStr(states, "0123456789") 11.59 + # ____________________________________________________________ 11.60 + # Integer numbers 11.61 + hexNumber = chain(states, 11.62 + newArcPair(states, "0"), 11.63 + groupStr(states, "xX"), 11.64 + atleastonce(states, 11.65 + groupStr(states, "0123456789abcdefABCDEF")), 11.66 + maybe(states, groupStr(states, "lL"))) 11.67 + octNumber = chain(states, 11.68 + newArcPair(states, "0"), 11.69 + maybe(states, 11.70 + chain(states, 11.71 + groupStr(states, "oO"), 11.72 + groupStr(states, "01234567"))), 11.73 + any(states, groupStr(states, "01234567")), 11.74 + maybe(states, groupStr(states, "lL"))) 11.75 + binNumber = chain(states, 11.76 + newArcPair(states, "0"), 11.77 + groupStr(states, "bB"), 11.78 + atleastonce(states, groupStr(states, "01")), 11.79 + maybe(states, groupStr(states, "lL"))) 11.80 + decNumber = chain(states, 11.81 + groupStr(states, "123456789"), 11.82 + any(states, makeDigits()), 11.83 + maybe(states, groupStr(states, "lL"))) 11.84 + intNumber = group(states, hexNumber, octNumber, binNumber, decNumber) 11.85 + # ____________________________________________________________ 11.86 + # Exponents 11.87 + def makeExp (): 11.88 + return chain(states, 11.89 + groupStr(states, "eE"), 11.90 + maybe(states, groupStr(states, "+-")), 11.91 + atleastonce(states, makeDigits())) 11.92 + # ____________________________________________________________ 11.93 + # Floating point numbers 11.94 + def makeFloat (): 11.95 + pointFloat = chain(states, 11.96 + group(states, 11.97 + chain(states, 11.98 + atleastonce(states, makeDigits()), 11.99 + newArcPair(states, "."), 11.100 + any(states, makeDigits())), 11.101 + chain(states, 11.102 + newArcPair(states, "."), 11.103 + atleastonce(states, makeDigits()))), 11.104 + maybe(states, makeExp())) 11.105 + expFloat = chain(states, 11.106 + atleastonce(states, makeDigits()), 11.107 + makeExp()) 11.108 + return group(states, pointFloat, expFloat) 11.109 + # ____________________________________________________________ 11.110 + # Imaginary numbers 11.111 + imagNumber = group(states, 11.112 + chain(states, 11.113 + atleastonce(states, makeDigits()), 11.114 + groupStr(states, "jJ")), 11.115 + chain(states, 11.116 + makeFloat(), 11.117 + groupStr(states, "jJ"))) 11.118 + # ____________________________________________________________ 11.119 + # Any old number. 11.120 + number = group(states, imagNumber, makeFloat(), intNumber) 11.121 + # ____________________________________________________________ 11.122 + # Funny 11.123 + operator = group(states, 11.124 + chain(states, 11.125 + chainStr(states, "**"), 11.126 + maybe(states, newArcPair(states, "="))), 11.127 + chain(states, 11.128 + chainStr(states, ">>"), 11.129 + maybe(states, newArcPair(states, "="))), 11.130 + chain(states, 11.131 + chainStr(states, "<<"), 11.132 + maybe(states, newArcPair(states, "="))), 11.133 + chainStr(states, "<>"), 11.134 + chainStr(states, "!="), 11.135 + chain(states, 11.136 + chainStr(states, "//"), 11.137 + maybe(states, newArcPair(states, "="))), 11.138 + chain(states, 11.139 + groupStr(states, "+-*/%&|^=<>"), 11.140 + maybe(states, newArcPair(states, "="))), 11.141 + newArcPair(states, "~")) 11.142 + bracket = groupStr(states, "[](){}") 11.143 + special = group(states, 11.144 + makeEOL(), 11.145 + groupStr(states, "@:;.,`")) 11.146 + funny = group(states, operator, bracket, special) 11.147 + # ____________________________________________________________ 11.148 + def makeStrPrefix (): 11.149 + return chain(states, 11.150 + maybe(states, groupStr(states, "uUbB")), 11.151 + maybe(states, groupStr(states, "rR"))) 11.152 + # ____________________________________________________________ 11.153 + contStr = group(states, 11.154 + chain(states, 11.155 + makeStrPrefix(), 11.156 + newArcPair(states, "'"), 11.157 + any(states, 11.158 + notGroupStr(states, "\r\n'\\")), 11.159 + any(states, 11.160 + chain(states, 11.161 + newArcPair(states, "\\"), 11.162 + newArcPair(states, DEFAULT), 11.163 + any(states, 11.164 + notGroupStr(states, "\r\n'\\")))), 11.165 + group(states, 11.166 + newArcPair(states, "'"), 11.167 + makeLineCont())), 11.168 + chain(states, 11.169 + makeStrPrefix(), 11.170 + newArcPair(states, '"'), 11.171 + any(states, 11.172 + notGroupStr(states, '\r\n"\\')), 11.173 + any(states, 11.174 + chain(states, 11.175 + newArcPair(states, "\\"), 11.176 + newArcPair(states, DEFAULT), 11.177 + any(states, 11.178 + notGroupStr(states, '\r\n"\\')))), 11.179 + group(states, 11.180 + newArcPair(states, '"'), 11.181 + makeLineCont()))) 11.182 + triple = chain(states, 11.183 + makeStrPrefix(), 11.184 + group(states, 11.185 + chainStr(states, "'''"), 11.186 + chainStr(states, '"""'))) 11.187 + pseudoExtras = group(states, 11.188 + makeLineCont(), 11.189 + makeComment(), 11.190 + triple) 11.191 + pseudoToken = chain(states, 11.192 + makeWhitespace(), 11.193 + group(states, 11.194 + newArcPair(states, EMPTY), 11.195 + pseudoExtras, number, funny, contStr, name)) 11.196 + dfaStates, dfaAccepts = nfaToDfa(states, *pseudoToken) 11.197 + return DFA(dfaStates, dfaAccepts), dfaStates 11.198 + 11.199 +# ______________________________________________________________________ 11.200 + 11.201 +def makePyEndDFAMap (): 11.202 + states = [] 11.203 + single = chain(states, 11.204 + any(states, notGroupStr(states, "'\\")), 11.205 + any(states, 11.206 + chain(states, 11.207 + newArcPair(states, "\\"), 11.208 + newArcPair(states, DEFAULT), 11.209 + any(states, notGroupStr(states, "'\\")))), 11.210 + newArcPair(states, "'")) 11.211 + states, accepts = nfaToDfa(states, *single) 11.212 + singleDFA = DFA(states, accepts) 11.213 + states_singleDFA = states 11.214 + states = [] 11.215 + double = chain(states, 11.216 + any(states, notGroupStr(states, '"\\')), 11.217 + any(states, 11.218 + chain(states, 11.219 + newArcPair(states, "\\"), 11.220 + newArcPair(states, DEFAULT), 11.221 + any(states, notGroupStr(states, '"\\')))), 11.222 + newArcPair(states, '"')) 11.223 + states, accepts = nfaToDfa(states, *double) 11.224 + doubleDFA = DFA(states, accepts) 11.225 + states_doubleDFA = states 11.226 + states = [] 11.227 + single3 = chain(states, 11.228 + any(states, notGroupStr(states, "'\\")), 11.229 + any(states, 11.230 + chain(states, 11.231 + group(states, 11.232 + chain(states, 11.233 + newArcPair(states, "\\"), 11.234 + newArcPair(states, DEFAULT)), 11.235 + chain(states, 11.236 + newArcPair(states, "'"), 11.237 + notChainStr(states, "''"))), 11.238 + any(states, notGroupStr(states, "'\\")))), 11.239 + chainStr(states, "'''")) 11.240 + states, accepts = nfaToDfa(states, *single3) 11.241 + single3DFA = NonGreedyDFA(states, accepts) 11.242 + states_single3DFA = states 11.243 + states = [] 11.244 + double3 = chain(states, 11.245 + any(states, notGroupStr(states, '"\\')), 11.246 + any(states, 11.247 + chain(states, 11.248 + group(states, 11.249 + chain(states, 11.250 + newArcPair(states, "\\"), 11.251 + newArcPair(states, DEFAULT)), 11.252 + chain(states, 11.253 + newArcPair(states, '"'), 11.254 + notChainStr(states, '""'))), 11.255 + any(states, notGroupStr(states, '"\\')))), 11.256 + chainStr(states, '"""')) 11.257 + states, accepts = nfaToDfa(states, *double3) 11.258 + double3DFA = NonGreedyDFA(states, accepts) 11.259 + states_double3DFA = states 11.260 + map = {"'" : (singleDFA, states_singleDFA), 11.261 + '"' : (doubleDFA, states_doubleDFA), 11.262 + "r" : None, 11.263 + "R" : None, 11.264 + "u" : None, 11.265 + "U" : None, 11.266 + "b" : None, 11.267 + "B" : None} 11.268 + for uniPrefix in ("", "u", "U", "b", "B", ): 11.269 + for rawPrefix in ("", "r", "R"): 11.270 + prefix = uniPrefix + rawPrefix 11.271 + map[prefix + "'''"] = (single3DFA, states_single3DFA) 11.272 + map[prefix + '"""'] = (double3DFA, states_double3DFA) 11.273 + return map 11.274 + 11.275 +# ______________________________________________________________________ 11.276 + 11.277 +def output(name, dfa_class, dfa, states): 11.278 + import textwrap 11.279 + lines = [] 11.280 + i = 0 11.281 + for line in textwrap.wrap(repr(dfa.accepts), width = 50): 11.282 + if i == 0: 11.283 + lines.append("accepts = ") 11.284 + else: 11.285 + lines.append(" ") 11.286 + lines.append(line) 11.287 + lines.append("\n") 11.288 + i += 1 11.289 + import StringIO 11.290 + lines.append("states = [\n") 11.291 + for numstate, state in enumerate(states): 11.292 + lines.append(" # ") 11.293 + lines.append(str(numstate)) 11.294 + lines.append('\n') 11.295 + s = StringIO.StringIO() 11.296 + i = 0 11.297 + for k, v in sorted(state.items()): 11.298 + i += 1 11.299 + if k == DEFAULT: 11.300 + k = "automata.DEFAULT" 11.301 + else: 11.302 + k = repr(k) 11.303 + s.write(k) 11.304 + s.write('::') 11.305 + s.write(repr(v)) 11.306 + if i < len(state): 11.307 + s.write(', ') 11.308 + s.write('},') 11.309 + i = 0 11.310 + if len(state) <= 4: 11.311 + text = [s.getvalue()] 11.312 + else: 11.313 + text = textwrap.wrap(s.getvalue(), width=36) 11.314 + for line in text: 11.315 + line = line.replace('::', ': ') 11.316 + if i == 0: 11.317 + lines.append(' {') 11.318 + else: 11.319 + lines.append(' ') 11.320 + lines.append(line) 11.321 + lines.append('\n') 11.322 + i += 1 11.323 + lines.append(" ]\n") 11.324 + lines.append("%s = automata.%s(states, accepts)\n" % (name, dfa_class)) 11.325 + return ''.join(lines) 11.326 + 11.327 +def main (): 11.328 + pseudoDFA, states_pseudoDFA = makePyPseudoDFA() 11.329 + print output("pseudoDFA", "DFA", pseudoDFA, states_pseudoDFA) 11.330 + endDFAMap = makePyEndDFAMap() 11.331 + dfa, states = endDFAMap['"""'] 11.332 + print output("double3DFA", "NonGreedyDFA", dfa, states) 11.333 + dfa, states = endDFAMap["'''"] 11.334 + print output("single3DFA", "NonGreedyDFA", dfa, states) 11.335 + dfa, states = endDFAMap["'"] 11.336 + print output("singleDFA", "DFA", dfa, states) 11.337 + dfa, states = endDFAMap["\""] 11.338 + print output("doubleDFA", "DFA", dfa, states) 11.339 + 11.340 +# ______________________________________________________________________ 11.341 + 11.342 +if __name__ == "__main__": 11.343 + main()
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/pyparser/metaparser.py Sun Jan 08 20:20:39 2017 +0100 12.3 @@ -0,0 +1,357 @@ 12.4 +""" 12.5 +Makes a parser from a grammar source. 12.6 + 12.7 +Inspired by Guido van Rossum's pgen2. 12.8 +""" 12.9 + 12.10 +import StringIO 12.11 +import tokenize 12.12 +import token 12.13 + 12.14 +from pyparser import parser 12.15 + 12.16 + 12.17 +class PgenError(Exception): 12.18 + 12.19 + def __init__(self, msg, location=None): 12.20 + Exception.__init__(self, msg) 12.21 + self.location = location 12.22 + 12.23 + 12.24 +class NFA(object): 12.25 + 12.26 + def __init__(self): 12.27 + self.arcs = [] 12.28 + 12.29 + def arc(self, to_state, label=None): 12.30 + self.arcs.append((label, to_state)) 12.31 + 12.32 + def find_unlabeled_states(self, into): 12.33 + if self in into: 12.34 + return 12.35 + into.add(self) 12.36 + for label, state in self.arcs: 12.37 + if label is None: 12.38 + state.find_unlabeled_states(into) 12.39 + 12.40 + 12.41 +class DFA(object): 12.42 + 12.43 + def __init__(self, nfa_set, final_state): 12.44 + self.nfas = nfa_set 12.45 + self.is_final = final_state in nfa_set 12.46 + self.arcs = {} 12.47 + 12.48 + def arc(self, next, label): 12.49 + self.arcs[label] = next 12.50 + 12.51 + def unify_state(self, old, new): 12.52 + for label, state in self.arcs.iteritems(): 12.53 + if state is old: 12.54 + self.arcs[label] = new 12.55 + 12.56 + def __repr__(self): 12.57 + return "<DFA arcs=%r>" % self.arcs 12.58 + 12.59 + def __eq__(self, other): 12.60 + if not isinstance(other, DFA): 12.61 + # This shouldn't really happen. 12.62 + return NotImplemented 12.63 + if other.is_final != self.is_final: 12.64 + return False 12.65 + if len(self.arcs) != len(other.arcs): 12.66 + return False 12.67 + for label, state in self.arcs.iteritems(): 12.68 + try: 12.69 + other_state = other.arcs[label] 12.70 + except KeyError: 12.71 + return False 12.72 + else: 12.73 + if other_state is not state: 12.74 + return False 12.75 + return True 12.76 + 12.77 + 12.78 +def nfa_to_dfa(start, end): 12.79 + """Convert an NFA to a DFA(s) 12.80 + 12.81 + Each DFA is initially a set of NFA states without labels. We start with the 12.82 + DFA for the start NFA. Then we add labeled arcs to it pointing to another 12.83 + set of NFAs (the next state). Finally, we do the same thing to every DFA 12.84 + that is found and return the list of states. 12.85 + """ 12.86 + base_nfas = set() 12.87 + start.find_unlabeled_states(base_nfas) 12.88 + state_stack = [DFA(base_nfas, end)] 12.89 + for state in state_stack: 12.90 + arcs = {} 12.91 + for nfa in state.nfas: 12.92 + for label, sub_nfa in nfa.arcs: 12.93 + if label is not None: 12.94 + sub_nfa.find_unlabeled_states(arcs.setdefault(label, set())) 12.95 + for label, nfa_set in arcs.iteritems(): 12.96 + for st in state_stack: 12.97 + if st.nfas == nfa_set: 12.98 + break 12.99 + else: 12.100 + st = DFA(nfa_set, end) 12.101 + state_stack.append(st) 12.102 + state.arc(st, label) 12.103 + return state_stack 12.104 + 12.105 +def simplify_dfa(dfa): 12.106 + changed = True 12.107 + while changed: 12.108 + changed = False 12.109 + for i, state in enumerate(dfa): 12.110 + for j in xrange(i + 1, len(dfa)): 12.111 + other_state = dfa[j] 12.112 + if state == other_state: 12.113 + del dfa[j] 12.114 + for sub_state in dfa: 12.115 + sub_state.unify_state(other_state, state) 12.116 + changed = True 12.117 + break 12.118 + 12.119 + 12.120 +class ParserGenerator(object): 12.121 + """NOT_RPYTHON""" 12.122 + 12.123 + def __init__(self, grammar_source): 12.124 + self.start_symbol = None 12.125 + self.dfas = {} 12.126 + stream = StringIO.StringIO(grammar_source) 12.127 + self.token_stream = tokenize.generate_tokens(stream.readline) 12.128 + self.parse() 12.129 + self.first = {} 12.130 + self.add_first_sets() 12.131 + 12.132 + def build_grammar(self, grammar_cls): 12.133 + gram = grammar_cls() 12.134 + gram.start = self.start_symbol 12.135 + names = self.dfas.keys() 12.136 + names.sort() 12.137 + names.remove(self.start_symbol) 12.138 + names.insert(0, self.start_symbol) 12.139 + # First, build symbol and id mappings. 12.140 + for name in names: 12.141 + i = 256 + len(gram.symbol_ids) 12.142 + gram.symbol_ids[name] = i 12.143 + gram.symbol_names[i] = name 12.144 + # Then, iterate through again and finalize labels. 12.145 + for name in names: 12.146 + dfa = self.dfas[name] 12.147 + states = [] 12.148 + for state in dfa: 12.149 + arcs = [] 12.150 + for label, next in state.arcs.iteritems(): 12.151 + arcs.append((self.make_label(gram, label), dfa.index(next))) 12.152 + states.append((arcs, state.is_final)) 12.153 + gram.dfas.append((states, self.make_first(gram, name))) 12.154 + assert len(gram.dfas) - 1 == gram.symbol_ids[name] - 256 12.155 + gram.start = gram.symbol_ids[self.start_symbol] 12.156 + return gram 12.157 + 12.158 + def make_label(self, gram, label): 12.159 + label_index = len(gram.labels) 12.160 + if label[0].isalpha(): 12.161 + # Either a symbol or a token. 12.162 + if label in gram.symbol_ids: 12.163 + if label in gram.symbol_to_label: 12.164 + return gram.symbol_to_label[label] 12.165 + else: 12.166 + gram.labels.append(gram.symbol_ids[label]) 12.167 + gram.symbol_to_label[label] = label_index 12.168 + return label_index 12.169 + elif label.isupper(): 12.170 + token_index = gram.TOKENS[label] 12.171 + if token_index in gram.token_ids: 12.172 + return gram.token_ids[token_index] 12.173 + else: 12.174 + gram.labels.append(token_index) 12.175 + gram.token_ids[token_index] = label_index 12.176 + return label_index 12.177 + else: 12.178 + # Probably a rule without a definition. 12.179 + raise PgenError("no such rule: %r" % (label,)) 12.180 + else: 12.181 + # A keyword or operator. 12.182 + value = label.strip("\"'") 12.183 + if value[0].isalpha(): 12.184 + if value in gram.keyword_ids: 12.185 + return gram.keyword_ids[value] 12.186 + else: 12.187 + gram.labels.append(gram.KEYWORD_TOKEN) 12.188 + gram.keyword_ids[value] = label_index 12.189 + return label_index 12.190 + else: 12.191 + try: 12.192 + token_index = gram.OPERATOR_MAP[value] 12.193 + except KeyError: 12.194 + raise PgenError("no such operator: %r" % (value,)) 12.195 + if token_index in gram.token_ids: 12.196 + return gram.token_ids[token_index] 12.197 + else: 12.198 + gram.labels.append(token_index) 12.199 + gram.token_ids[token_index] = label_index 12.200 + return label_index 12.201 + 12.202 + def make_first(self, gram, name): 12.203 + original_firsts = self.first[name] 12.204 + firsts = dict() 12.205 + for label in original_firsts: 12.206 + firsts[self.make_label(gram, label)] = None 12.207 + return firsts 12.208 + 12.209 + def add_first_sets(self): 12.210 + for name, dfa in self.dfas.iteritems(): 12.211 + if name not in self.first: 12.212 + self.get_first(name, dfa) 12.213 + 12.214 + def get_first(self, name, dfa): 12.215 + self.first[name] = None 12.216 + state = dfa[0] 12.217 + all_labels = set() 12.218 + overlap_check = {} 12.219 + for label, sub_state in state.arcs.iteritems(): 12.220 + if label in self.dfas: 12.221 + if label in self.first: 12.222 + new_labels = self.first[label] 12.223 + if new_labels is None: 12.224 + raise PgenError("recursion in rule: %r" % (name,)) 12.225 + else: 12.226 + new_labels = self.get_first(label, self.dfas[label]) 12.227 + all_labels.update(new_labels) 12.228 + overlap_check[label] = new_labels 12.229 + else: 12.230 + all_labels.add(label) 12.231 + overlap_check[label] = set((label,)) 12.232 + inverse = {} 12.233 + for label, their_first in overlap_check.iteritems(): 12.234 + for sub_label in their_first: 12.235 + if sub_label in inverse: 12.236 + raise PgenError("ambiguous symbol with label %s" 12.237 + % (label,)) 12.238 + inverse[sub_label] = label 12.239 + self.first[name] = all_labels 12.240 + return all_labels 12.241 + 12.242 + def expect(self, token_type, value=None): 12.243 + if token_type != self.type: 12.244 + expected = token.tok_name[token_type] 12.245 + got = token.tok_name[self.type] 12.246 + raise PgenError("expected token %s but got %s" % (expected, got), 12.247 + self.location) 12.248 + current_value = self.value 12.249 + if value is not None: 12.250 + if value != current_value: 12.251 + msg = "expected %r but got %r" % (value, current_value) 12.252 + raise PgenError(msg,self.location) 12.253 + self.advance_token() 12.254 + return current_value 12.255 + 12.256 + def test_token(self, token_type, value): 12.257 + if self.type == token_type and self.value == value: 12.258 + return True 12.259 + return False 12.260 + 12.261 + def advance_token(self): 12.262 + data = self.token_stream.next() 12.263 + # Ignore comments and non-logical newlines. 12.264 + while data[0] in (tokenize.NL, tokenize.COMMENT): 12.265 + data = self.token_stream.next() 12.266 + self.type, self.value = data[:2] 12.267 + self.location = data[2:] 12.268 + 12.269 + def parse(self): 12.270 + self.advance_token() 12.271 + while self.type != token.ENDMARKER: 12.272 + # Skip over whitespace. 12.273 + while self.type == token.NEWLINE: 12.274 + self.advance_token() 12.275 + name, start_state, end_state = self.parse_rule() 12.276 + dfa = nfa_to_dfa(start_state, end_state) 12.277 + simplify_dfa(dfa) 12.278 + self.dfas[name] = dfa 12.279 + if self.start_symbol is None: 12.280 + self.start_symbol = name 12.281 + 12.282 + def parse_rule(self): 12.283 + # RULE: NAME ':' ALTERNATIVES 12.284 + name = self.expect(token.NAME) 12.285 + self.expect(token.OP, ":") 12.286 + start_state, end_state = self.parse_alternatives() 12.287 + self.expect(token.NEWLINE) 12.288 + return name, start_state, end_state 12.289 + 12.290 + def parse_alternatives(self): 12.291 + # ALTERNATIVES: ITEMS ('|' ITEMS)* 12.292 + first_state, end_state = self.parse_items() 12.293 + if self.test_token(token.OP, "|"): 12.294 + # Link all alternatives into a enclosing set of states. 12.295 + enclosing_start_state = NFA() 12.296 + enclosing_end_state = NFA() 12.297 + enclosing_start_state.arc(first_state) 12.298 + end_state.arc(enclosing_end_state) 12.299 + while self.test_token(token.OP, "|"): 12.300 + self.advance_token() 12.301 + sub_start_state, sub_end_state = self.parse_items() 12.302 + enclosing_start_state.arc(sub_start_state) 12.303 + sub_end_state.arc(enclosing_end_state) 12.304 + first_state = enclosing_start_state 12.305 + end_state = enclosing_end_state 12.306 + return first_state, end_state 12.307 + 12.308 + def parse_items(self): 12.309 + # ITEMS: ITEM+ 12.310 + first_state, end_state = self.parse_item() 12.311 + while self.type in (token.STRING, token.NAME) or \ 12.312 + self.test_token(token.OP, "(") or \ 12.313 + self.test_token(token.OP, "["): 12.314 + sub_first_state, new_end_state = self.parse_item() 12.315 + end_state.arc(sub_first_state) 12.316 + end_state = new_end_state 12.317 + return first_state, end_state 12.318 + 12.319 + def parse_item(self): 12.320 + # ITEM: '[' ALTERNATIVES ']' | ATOM ['+' | '*'] 12.321 + if self.test_token(token.OP, "["): 12.322 + self.advance_token() 12.323 + start_state, end_state = self.parse_alternatives() 12.324 + self.expect(token.OP, "]") 12.325 + # Bypass the rule if this is optional. 12.326 + start_state.arc(end_state) 12.327 + return start_state, end_state 12.328 + else: 12.329 + atom_state, next_state = self.parse_atom() 12.330 + # Check for a repeater. 12.331 + if self.type == token.OP and self.value in ("+", "*"): 12.332 + next_state.arc(atom_state) 12.333 + repeat = self.value 12.334 + self.advance_token() 12.335 + if repeat == "*": 12.336 + # Optionally repeated 12.337 + return atom_state, atom_state 12.338 + else: 12.339 + # Required 12.340 + return atom_state, next_state 12.341 + else: 12.342 + return atom_state, next_state 12.343 + 12.344 + def parse_atom(self): 12.345 + # ATOM: '(' ALTERNATIVES ')' | NAME | STRING 12.346 + if self.test_token(token.OP, "("): 12.347 + self.advance_token() 12.348 + rule = self.parse_alternatives() 12.349 + self.expect(token.OP, ")") 12.350 + return rule 12.351 + elif self.type in (token.NAME, token.STRING): 12.352 + atom_state = NFA() 12.353 + next_state = NFA() 12.354 + atom_state.arc(next_state, self.value) 12.355 + self.advance_token() 12.356 + return atom_state, next_state 12.357 + else: 12.358 + invalid = token.tok_name[self.type] 12.359 + raise PgenError("unexpected token: %s" % (invalid,), 12.360 + self.location)
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/pyparser/parser.py Sun Jan 08 20:20:39 2017 +0100 13.3 @@ -0,0 +1,287 @@ 13.4 +""" 13.5 +A CPython inspired RPython parser. 13.6 +""" 13.7 + 13.8 + 13.9 +class Grammar(object): 13.10 + """ 13.11 + Base Grammar object. 13.12 + 13.13 + Pass this to ParserGenerator.build_grammar to fill it with useful values for 13.14 + the Parser. 13.15 + """ 13.16 + 13.17 + def __init__(self): 13.18 + self.symbol_ids = {} 13.19 + self.symbol_names = {} 13.20 + self.symbol_to_label = {} 13.21 + self.keyword_ids = {} 13.22 + self.dfas = [] 13.23 + self.labels = [0] 13.24 + self.token_ids = {} 13.25 + self.start = -1 13.26 + 13.27 + def shared_copy(self): 13.28 + new = self.__class__() 13.29 + new.symbol_ids = self.symbol_ids 13.30 + new.symbols_names = self.symbol_names 13.31 + new.keyword_ids = self.keyword_ids 13.32 + new.dfas = self.dfas 13.33 + new.labels = self.labels 13.34 + new.token_ids = self.token_ids 13.35 + return new 13.36 + 13.37 + def _freeze_(self): 13.38 + # Remove some attributes not used in parsing. 13.39 + try: 13.40 + del self.symbol_to_label 13.41 + del self.symbol_names 13.42 + del self.symbol_ids 13.43 + except AttributeError: 13.44 + pass 13.45 + return True 13.46 + 13.47 + 13.48 +class Node(object): 13.49 + 13.50 + __slots__ = ("type", ) 13.51 + 13.52 + def __init__(self, type): 13.53 + self.type = type 13.54 + 13.55 + def __eq__(self, other): 13.56 + raise NotImplementedError("abstract base class") 13.57 + 13.58 + def __ne__(self, other): 13.59 + return not self == other 13.60 + 13.61 + def get_value(self): 13.62 + return None 13.63 + 13.64 + def get_child(self, i): 13.65 + raise NotImplementedError("abstract base class") 13.66 + 13.67 + def num_children(self): 13.68 + return 0 13.69 + 13.70 + def append_child(self, child): 13.71 + raise NotImplementedError("abstract base class") 13.72 + 13.73 + def get_lineno(self): 13.74 + raise NotImplementedError("abstract base class") 13.75 + 13.76 + def get_column(self): 13.77 + raise NotImplementedError("abstract base class") 13.78 + 13.79 + 13.80 +class Terminal(Node): 13.81 + __slots__ = ("value", "lineno", "column") 13.82 + def __init__(self, type, value, lineno, column): 13.83 + Node.__init__(self, type) 13.84 + self.value = value 13.85 + self.lineno = lineno 13.86 + self.column = column 13.87 + 13.88 + def __repr__(self): 13.89 + return "Terminal(type=%s, value=%r)" % (self.type, self.value) 13.90 + 13.91 + def __eq__(self, other): 13.92 + # For tests. 13.93 + return (type(self) == type(other) and 13.94 + self.type == other.type and 13.95 + self.value == other.value) 13.96 + 13.97 + def get_value(self): 13.98 + return self.value 13.99 + 13.100 + def get_lineno(self): 13.101 + return self.lineno 13.102 + 13.103 + def get_column(self): 13.104 + return self.column 13.105 + 13.106 + 13.107 +class AbstractNonterminal(Node): 13.108 + __slots__ = () 13.109 + 13.110 + def get_lineno(self): 13.111 + return self.get_child(0).get_lineno() 13.112 + 13.113 + def get_column(self): 13.114 + return self.get_child(0).get_column() 13.115 + 13.116 + def __eq__(self, other): 13.117 + # For tests. 13.118 + # grumble, annoying 13.119 + if not isinstance(other, AbstractNonterminal): 13.120 + return False 13.121 + if self.type != other.type: 13.122 + return False 13.123 + if self.num_children() != other.num_children(): 13.124 + return False 13.125 + for i in range(self.num_children()): 13.126 + if self.get_child(i) != other.get_child(i): 13.127 + return False 13.128 + return True 13.129 + 13.130 + 13.131 +class Nonterminal(AbstractNonterminal): 13.132 + __slots__ = ("_children", ) 13.133 + def __init__(self, type, children): 13.134 + Node.__init__(self, type) 13.135 + self._children = children 13.136 + 13.137 + def __repr__(self): 13.138 + return "Nonterminal(type=%s, children=%r)" % (self.type, self._children) 13.139 + 13.140 + def get_child(self, i): 13.141 + return self._children[i] 13.142 + 13.143 + def num_children(self): 13.144 + return len(self._children) 13.145 + 13.146 + def append_child(self, child): 13.147 + self._children.append(child) 13.148 + 13.149 + 13.150 +class Nonterminal1(AbstractNonterminal): 13.151 + __slots__ = ("_child", ) 13.152 + def __init__(self, type, child): 13.153 + Node.__init__(self, type) 13.154 + self._child = child 13.155 + 13.156 + def __repr__(self): 13.157 + return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child) 13.158 + 13.159 + def get_child(self, i): 13.160 + assert i == 0 or i == -1 13.161 + return self._child 13.162 + 13.163 + def num_children(self): 13.164 + return 1 13.165 + 13.166 + def append_child(self, child): 13.167 + assert 0, "should be unreachable" 13.168 + 13.169 + 13.170 + 13.171 +class ParseError(Exception): 13.172 + 13.173 + def __init__(self, msg, token_type, value, lineno, column, line, 13.174 + expected=-1): 13.175 + self.msg = msg 13.176 + self.token_type = token_type 13.177 + self.value = value 13.178 + self.lineno = lineno 13.179 + self.column = column 13.180 + self.line = line 13.181 + self.expected = expected 13.182 + 13.183 + def __str__(self): 13.184 + return "ParserError(%s, %r)" % (self.token_type, self.value) 13.185 + 13.186 + 13.187 +class Parser(object): 13.188 + 13.189 + def __init__(self, grammar): 13.190 + self.grammar = grammar 13.191 + self.root = None 13.192 + self.stack = None 13.193 + 13.194 + def prepare(self, start=-1): 13.195 + """Setup the parser for parsing. 13.196 + 13.197 + Takes the starting symbol as an argument. 13.198 + """ 13.199 + if start == -1: 13.200 + start = self.grammar.start 13.201 + self.root = None 13.202 + current_node = Nonterminal(start, []) 13.203 + self.stack = [] 13.204 + self.stack.append((self.grammar.dfas[start - 256], 0, current_node)) 13.205 + 13.206 + def add_token(self, token_type, value, lineno, column, line): 13.207 + label_index = self.classify(token_type, value, lineno, column, line) 13.208 + sym_id = 0 # for the annotator 13.209 + while True: 13.210 + dfa, state_index, node = self.stack[-1] 13.211 + states, first = dfa 13.212 + arcs, is_accepting = states[state_index] 13.213 + for i, next_state in arcs: 13.214 + sym_id = self.grammar.labels[i] 13.215 + if label_index == i: 13.216 + # We matched a non-terminal. 13.217 + self.shift(next_state, token_type, value, lineno, column) 13.218 + state = states[next_state] 13.219 + # While the only possible action is to accept, pop nodes off 13.220 + # the stack. 13.221 + while state[1] and not state[0]: 13.222 + self.pop() 13.223 + if not self.stack: 13.224 + # Parsing is done. 13.225 + return True 13.226 + dfa, state_index, node = self.stack[-1] 13.227 + state = dfa[0][state_index] 13.228 + return False 13.229 + elif sym_id >= 256: 13.230 + sub_node_dfa = self.grammar.dfas[sym_id - 256] 13.231 + # Check if this token can start a child node. 13.232 + if label_index in sub_node_dfa[1]: 13.233 + self.push(sub_node_dfa, next_state, sym_id, lineno, 13.234 + column) 13.235 + break 13.236 + else: 13.237 + # We failed to find any arcs to another state, so unless this 13.238 + # state is accepting, it's invalid input. 13.239 + if is_accepting: 13.240 + self.pop() 13.241 + if not self.stack: 13.242 + raise ParseError("too much input", token_type, value, 13.243 + lineno, column, line) 13.244 + else: 13.245 + # If only one possible input would satisfy, attach it to the 13.246 + # error. 13.247 + if len(arcs) == 1: 13.248 + expected = sym_id 13.249 + else: 13.250 + expected = -1 13.251 + raise ParseError("bad input", token_type, value, lineno, 13.252 + column, line, expected) 13.253 + 13.254 + def classify(self, token_type, value, lineno, column, line): 13.255 + """Find the label for a token.""" 13.256 + if token_type == self.grammar.KEYWORD_TOKEN: 13.257 + label_index = self.grammar.keyword_ids.get(value, -1) 13.258 + if label_index != -1: 13.259 + return label_index 13.260 + label_index = self.grammar.token_ids.get(token_type, -1) 13.261 + if label_index == -1: 13.262 + raise ParseError("invalid token", token_type, value, lineno, column, 13.263 + line) 13.264 + return label_index 13.265 + 13.266 + def shift(self, next_state, token_type, value, lineno, column): 13.267 + """Shift a non-terminal and prepare for the next state.""" 13.268 + dfa, state, node = self.stack[-1] 13.269 + new_node = Terminal(token_type, value, lineno, column) 13.270 + node.append_child(new_node) 13.271 + self.stack[-1] = (dfa, next_state, node) 13.272 + 13.273 + def push(self, next_dfa, next_state, node_type, lineno, column): 13.274 + """Push a terminal and adjust the current state.""" 13.275 + dfa, state, node = self.stack[-1] 13.276 + new_node = Nonterminal(node_type, []) 13.277 + self.stack[-1] = (dfa, next_state, node) 13.278 + self.stack.append((next_dfa, 0, new_node)) 13.279 + 13.280 + def pop(self): 13.281 + """Pop an entry off the stack and make its node a child of the last.""" 13.282 + dfa, state, node = self.stack.pop() 13.283 + if self.stack: 13.284 + # we are now done with node, so we can store it more efficiently if 13.285 + # it has just one child 13.286 + if node.num_children() == 1: 13.287 + node = Nonterminal1(node.type, node.get_child(0)) 13.288 + self.stack[-1][2].append_child(node) 13.289 + else: 13.290 + self.root = node
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/pyparser/pygram.py Sun Jan 08 20:20:39 2017 +0100 14.3 @@ -0,0 +1,43 @@ 14.4 +import os 14.5 +from pyparser import parser, pytoken, metaparser 14.6 + 14.7 +class PythonGrammar(parser.Grammar): 14.8 + 14.9 + KEYWORD_TOKEN = pytoken.python_tokens["NAME"] 14.10 + TOKENS = pytoken.python_tokens 14.11 + OPERATOR_MAP = pytoken.python_opmap 14.12 + 14.13 +def _get_python_grammar(): 14.14 + here = os.path.dirname(__file__) 14.15 + fp = open(os.path.join(here, "data", "Grammar2.7")) 14.16 + try: 14.17 + gram_source = fp.read() 14.18 + finally: 14.19 + fp.close() 14.20 + pgen = metaparser.ParserGenerator(gram_source) 14.21 + return pgen.build_grammar(PythonGrammar) 14.22 + 14.23 + 14.24 +python_grammar = _get_python_grammar() 14.25 +python_grammar_no_print = python_grammar.shared_copy() 14.26 +python_grammar_no_print.keyword_ids = python_grammar_no_print.keyword_ids.copy() 14.27 +del python_grammar_no_print.keyword_ids["print"] 14.28 + 14.29 +class _Tokens(object): 14.30 + pass 14.31 + 14.32 +for tok_name, idx in pytoken.python_tokens.iteritems(): 14.33 + setattr(_Tokens, tok_name, idx) 14.34 +tokens = _Tokens() 14.35 + 14.36 +class _Symbols(object): 14.37 + pass 14.38 +rev_lookup = {} 14.39 +for sym_name, idx in python_grammar.symbol_ids.iteritems(): 14.40 + setattr(_Symbols, sym_name, idx) 14.41 + rev_lookup[idx] = sym_name 14.42 +syms = _Symbols() 14.43 +syms._rev_lookup = rev_lookup # for debugging 14.44 +syms.sym_name = rev_lookup # for symbol module compatibility 14.45 + 14.46 +del _get_python_grammar, _Tokens, tok_name, sym_name, idx
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/pyparser/pylexer.py Sun Jan 08 20:20:39 2017 +0100 15.3 @@ -0,0 +1,240 @@ 15.4 +# Used by genpytokenize.py to generate the parser in pytokenize.py 15.5 +from pyparser.automata import DFA, DEFAULT 15.6 + 15.7 +class EMPTY: pass 15.8 + 15.9 +def newArcPair (states, transitionLabel): 15.10 + s1Index = len(states) 15.11 + s2Index = s1Index + 1 15.12 + states.append([(transitionLabel, s2Index)]) 15.13 + states.append([]) 15.14 + return s1Index, s2Index 15.15 + 15.16 +# ______________________________________________________________________ 15.17 + 15.18 +def chain (states, *stateIndexPairs): 15.19 + if len(stateIndexPairs) > 1: 15.20 + start, lastFinish = stateIndexPairs[0] 15.21 + for nStart, nFinish in stateIndexPairs[1:]: 15.22 + states[lastFinish].append((EMPTY, nStart)) 15.23 + lastFinish = nFinish 15.24 + return start, nFinish 15.25 + else: 15.26 + return stateIndexPairs[0] 15.27 + 15.28 + 15.29 +# ______________________________________________________________________ 15.30 + 15.31 +def chainStr (states, str): 15.32 + return chain(states, *map(lambda x : newArcPair(states, x), str)) 15.33 + 15.34 +# ______________________________________________________________________ 15.35 + 15.36 +def notChainStr (states, str): 15.37 + """XXX I'm not sure this is how it should be done, but I'm going to 15.38 + try it anyway. Note that for this case, I require only single character 15.39 + arcs, since I would have to basically invert all accepting states and 15.40 + non-accepting states of any sub-NFA's. 15.41 + """ 15.42 + assert len(str) > 0 15.43 + arcs = map(lambda x : newArcPair(states, x), str) 15.44 + finish = len(states) 15.45 + states.append([]) 15.46 + start, lastFinish = arcs[0] 15.47 + states[start].append((EMPTY, finish)) 15.48 + for crntStart, crntFinish in arcs[1:]: 15.49 + states[lastFinish].append((EMPTY, crntStart)) 15.50 + states[crntStart].append((EMPTY, finish)) 15.51 + return start, finish 15.52 + 15.53 +# ______________________________________________________________________ 15.54 + 15.55 +def group (states, *stateIndexPairs): 15.56 + if len(stateIndexPairs) > 1: 15.57 + start = len(states) 15.58 + finish = start + 1 15.59 + startList = [] 15.60 + states.append(startList) 15.61 + states.append([]) 15.62 + for eStart, eFinish in stateIndexPairs: 15.63 + startList.append((EMPTY, eStart)) 15.64 + states[eFinish].append((EMPTY, finish)) 15.65 + return start, finish 15.66 + else: 15.67 + return stateIndexPairs[0] 15.68 + 15.69 +# ______________________________________________________________________ 15.70 + 15.71 +def groupStr (states, str): 15.72 + return group(states, *map(lambda x : newArcPair(states, x), str)) 15.73 + 15.74 +# ______________________________________________________________________ 15.75 + 15.76 +def notGroup (states, *stateIndexPairs): 15.77 + """Like group, but will add a DEFAULT transition to a new end state, 15.78 + causing anything in the group to not match by going to a dead state. 15.79 + XXX I think this is right... 15.80 + """ 15.81 + start, dead = group(states, *stateIndexPairs) 15.82 + finish = len(states) 15.83 + states.append([]) 15.84 + states[start].append((DEFAULT, finish)) 15.85 + return start, finish 15.86 + 15.87 +# ______________________________________________________________________ 15.88 + 15.89 +def notGroupStr (states, str): 15.90 + return notGroup(states, *map(lambda x : newArcPair(states, x), str)) 15.91 +# ______________________________________________________________________ 15.92 + 15.93 +def any (states, *stateIndexPairs): 15.94 + start, finish = group(states, *stateIndexPairs) 15.95 + states[finish].append((EMPTY, start)) 15.96 + return start, start 15.97 + 15.98 +# ______________________________________________________________________ 15.99 + 15.100 +def maybe (states, *stateIndexPairs): 15.101 + start, finish = group(states, *stateIndexPairs) 15.102 + states[start].append((EMPTY, finish)) 15.103 + return start, finish 15.104 + 15.105 +# ______________________________________________________________________ 15.106 + 15.107 +def atleastonce (states, *stateIndexPairs): 15.108 + start, finish = group(states, *stateIndexPairs) 15.109 + states[finish].append((EMPTY, start)) 15.110 + return start, finish 15.111 + 15.112 +# ______________________________________________________________________ 15.113 + 15.114 +def closure (states, start, result = 0L): 15.115 + if None == result: 15.116 + result = 0L 15.117 + if 0 == (result & (1L << start)): 15.118 + result |= (1L << start) 15.119 + for label, arrow in states[start]: 15.120 + if label == EMPTY: 15.121 + result |= closure(states, arrow, result) 15.122 + return result 15.123 + 15.124 +# ______________________________________________________________________ 15.125 + 15.126 +def nfaToDfa (states, start, finish): 15.127 + tempStates = [] 15.128 + startClosure = closure(states, start) 15.129 + crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))] 15.130 + tempStates.append(crntTempState) 15.131 + index = 0 15.132 + while index < len(tempStates): 15.133 + crntTempState = tempStates[index] 15.134 + crntClosure, crntArcs, crntAccept = crntTempState 15.135 + for index2 in range(0, len(states)): 15.136 + if 0 != (crntClosure & (1L << index2)): 15.137 + for label, nfaArrow in states[index2]: 15.138 + if label == EMPTY: 15.139 + continue 15.140 + foundTempArc = False 15.141 + for tempArc in crntArcs: 15.142 + if tempArc[0] == label: 15.143 + foundTempArc = True 15.144 + break 15.145 + if not foundTempArc: 15.146 + tempArc = [label, -1, 0L] 15.147 + crntArcs.append(tempArc) 15.148 + tempArc[2] = closure(states, nfaArrow, tempArc[2]) 15.149 + for arcIndex in range(0, len(crntArcs)): 15.150 + label, arrow, targetStates = crntArcs[arcIndex] 15.151 + targetFound = False 15.152 + arrow = 0 15.153 + for destTempState in tempStates: 15.154 + if destTempState[0] == targetStates: 15.155 + targetFound = True 15.156 + break 15.157 + arrow += 1 15.158 + if not targetFound: 15.159 + assert arrow == len(tempStates) 15.160 + newState = [targetStates, [], 0 != (targetStates & 15.161 + (1L << finish))] 15.162 + tempStates.append(newState) 15.163 + crntArcs[arcIndex][1] = arrow 15.164 + index += 1 15.165 + tempStates = simplifyTempDfa(tempStates) 15.166 + states = finalizeTempDfa(tempStates) 15.167 + return states 15.168 + 15.169 +# ______________________________________________________________________ 15.170 + 15.171 +def sameState (s1, s2): 15.172 + """sameState(s1, s2) 15.173 + Note: 15.174 + state := [ nfaclosure : Long, [ arc ], accept : Boolean ] 15.175 + arc := [ label, arrow : Int, nfaClosure : Long ] 15.176 + """ 15.177 + if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]): 15.178 + return False 15.179 + for arcIndex in range(0, len(s1[1])): 15.180 + arc1 = s1[1][arcIndex] 15.181 + arc2 = s2[1][arcIndex] 15.182 + if arc1[:-1] != arc2[:-1]: 15.183 + return False 15.184 + return True 15.185 + 15.186 +# ______________________________________________________________________ 15.187 + 15.188 +def simplifyTempDfa (tempStates): 15.189 + """simplifyTempDfa (tempStates) 15.190 + """ 15.191 + changes = True 15.192 + deletedStates = [] 15.193 + while changes: 15.194 + changes = False 15.195 + for i in range(1, len(tempStates)): 15.196 + if i in deletedStates: 15.197 + continue 15.198 + for j in range(0, i): 15.199 + if j in deletedStates: 15.200 + continue 15.201 + if sameState(tempStates[i], tempStates[j]): 15.202 + deletedStates.append(i) 15.203 + for k in range(0, len(tempStates)): 15.204 + if k in deletedStates: 15.205 + continue 15.206 + for arc in tempStates[k][1]: 15.207 + if arc[1] == i: 15.208 + arc[1] = j 15.209 + changes = True 15.210 + break 15.211 + for stateIndex in deletedStates: 15.212 + tempStates[stateIndex] = None 15.213 + return tempStates 15.214 +# ______________________________________________________________________ 15.215 + 15.216 +def finalizeTempDfa (tempStates): 15.217 + """finalizeTempDfa (tempStates) 15.218 + 15.219 + Input domain: 15.220 + tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ] 15.221 + tempArc := [ label, arrow, nfaClosure ] 15.222 + 15.223 + Output domain: 15.224 + state := [ arcMap, accept : Boolean ] 15.225 + """ 15.226 + states = [] 15.227 + accepts = [] 15.228 + stateMap = {} 15.229 + tempIndex = 0 15.230 + for tempIndex in range(0, len(tempStates)): 15.231 + tempState = tempStates[tempIndex] 15.232 + if None != tempState: 15.233 + stateMap[tempIndex] = len(states) 15.234 + states.append({}) 15.235 + accepts.append(tempState[2]) 15.236 + for tempIndex in stateMap.keys(): 15.237 + stateBitset, tempArcs, accepting = tempStates[tempIndex] 15.238 + newIndex = stateMap[tempIndex] 15.239 + arcMap = states[newIndex] 15.240 + for tempArc in tempArcs: 15.241 + arcMap[tempArc[0]] = stateMap[tempArc[1]] 15.242 + return states, accepts 15.243 +
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 16.2 +++ b/pyparser/pyparse.py Sun Jan 08 20:20:39 2017 +0100 16.3 @@ -0,0 +1,200 @@ 16.4 +from pyparser import parser, pytokenizer, pygram, error 16.5 +from pyparser import consts 16.6 + 16.7 +def recode_to_utf8(bytes, encoding): 16.8 + text = bytes.decode(encoding) 16.9 + if not isinstance(text, unicode): 16.10 + raise error.SyntaxError("codec did not return a unicode object") 16.11 + recoded = text.encode("utf-8") 16.12 + return recoded 16.13 + 16.14 +def _normalize_encoding(encoding): 16.15 + """returns normalized name for <encoding> 16.16 + 16.17 + see dist/src/Parser/tokenizer.c 'get_normal_name()' 16.18 + for implementation details / reference 16.19 + 16.20 + NOTE: for now, parser.suite() raises a MemoryError when 16.21 + a bad encoding is used. (SF bug #979739) 16.22 + """ 16.23 + if encoding is None: 16.24 + return None 16.25 + # lower() + '_' / '-' conversion 16.26 + encoding = encoding.replace('_', '-').lower() 16.27 + if encoding == 'utf-8' or encoding.startswith('utf-8-'): 16.28 + return 'utf-8' 16.29 + for variant in ['latin-1', 'iso-latin-1', 'iso-8859-1']: 16.30 + if (encoding == variant or 16.31 + encoding.startswith(variant + '-')): 16.32 + return 'iso-8859-1' 16.33 + return encoding 16.34 + 16.35 +def _check_for_encoding(s): 16.36 + eol = s.find('\n') 16.37 + if eol < 0: 16.38 + return _check_line_for_encoding(s)[0] 16.39 + enc, again = _check_line_for_encoding(s[:eol]) 16.40 + if enc or not again: 16.41 + return enc 16.42 + eol2 = s.find('\n', eol + 1) 16.43 + if eol2 < 0: 16.44 + return _check_line_for_encoding(s[eol + 1:])[0] 16.45 + return _check_line_for_encoding(s[eol + 1:eol2])[0] 16.46 + 16.47 + 16.48 +def _check_line_for_encoding(line): 16.49 + """returns the declared encoding or None""" 16.50 + i = 0 16.51 + for i in range(len(line)): 16.52 + if line[i] == '#': 16.53 + break 16.54 + if line[i] not in ' \t\014': 16.55 + return None, False # Not a comment, don't read the second line. 16.56 + return pytokenizer.match_encoding_declaration(line[i:]), True 16.57 + 16.58 + 16.59 +class CompileInfo(object): 16.60 + """Stores information about the source being compiled. 16.61 + 16.62 + * filename: The filename of the source. 16.63 + * mode: The parse mode to use. ('exec', 'eval', or 'single') 16.64 + * flags: Parser and compiler flags. 16.65 + * encoding: The source encoding. 16.66 + """ 16.67 + 16.68 + def __init__(self, filename, mode="exec", flags=0): 16.69 + self.filename = filename 16.70 + self.mode = mode 16.71 + self.encoding = None 16.72 + self.flags = flags 16.73 + 16.74 + 16.75 +_targets = { 16.76 +'eval' : pygram.syms.eval_input, 16.77 +'single' : pygram.syms.single_input, 16.78 +'exec' : pygram.syms.file_input, 16.79 +} 16.80 + 16.81 +class PythonParser(parser.Parser): 16.82 + 16.83 + def __init__(self, grammar=pygram.python_grammar): 16.84 + parser.Parser.__init__(self, grammar) 16.85 + 16.86 + def parse_source(self, textsrc, compile_info): 16.87 + """Main entry point for parsing Python source. 16.88 + 16.89 + Everything from decoding the source to tokenizing to building the parse 16.90 + tree is handled here. 16.91 + """ 16.92 + # Detect source encoding. 16.93 + enc = None 16.94 + if textsrc.startswith("\xEF\xBB\xBF"): 16.95 + textsrc = textsrc[3:] 16.96 + enc = 'utf-8' 16.97 + # If an encoding is explicitly given check that it is utf-8. 16.98 + decl_enc = _check_for_encoding(textsrc) 16.99 + if decl_enc and decl_enc != "utf-8": 16.100 + raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc, 16.101 + filename=compile_info.filename) 16.102 + elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8: 16.103 + enc = 'utf-8' 16.104 + if _check_for_encoding(textsrc) is not None: 16.105 + raise error.SyntaxError("coding declaration in unicode string", 16.106 + filename=compile_info.filename) 16.107 + else: 16.108 + enc = _normalize_encoding(_check_for_encoding(textsrc)) 16.109 + if enc is not None and enc not in ('utf-8', 'iso-8859-1'): 16.110 + try: 16.111 + textsrc = recode_to_utf8(textsrc, enc) 16.112 + except LookupError as e: 16.113 + # if the codec is not found, LookupError is raised. 16.114 + raise error.SyntaxError("Unknown encoding: %s" % enc, 16.115 + filename=compile_info.filename) 16.116 + # Transform unicode errors into SyntaxError 16.117 + except UnicodeDecodeError as e: 16.118 + message = str(e) 16.119 + raise error.SyntaxError(message) 16.120 + 16.121 + flags = compile_info.flags 16.122 + 16.123 + # The tokenizer is very picky about how it wants its input. 16.124 + source_lines = textsrc.splitlines(True) 16.125 + if source_lines and not source_lines[-1].endswith("\n"): 16.126 + source_lines[-1] += '\n' 16.127 + if textsrc and textsrc[-1] == "\n": 16.128 + flags &= ~consts.PyCF_DONT_IMPLY_DEDENT 16.129 + 16.130 + self.prepare(_targets[compile_info.mode]) 16.131 + tp = 0 16.132 + try: 16.133 + try: 16.134 + # Note: we no longer pass the CO_FUTURE_* to the tokenizer, 16.135 + # which is expected to work independently of them. It's 16.136 + # certainly the case for all futures in Python <= 2.7. 16.137 + tokens = pytokenizer.generate_tokens(source_lines, flags) 16.138 + 16.139 + self.grammar = pygram.python_grammar 16.140 + 16.141 + for tp, value, lineno, column, line in tokens: 16.142 + if self.add_token(tp, value, lineno, column, line): 16.143 + break 16.144 + except error.TokenError as e: 16.145 + e.filename = compile_info.filename 16.146 + raise 16.147 + except parser.ParseError as e: 16.148 + # Catch parse errors, pretty them up and reraise them as a 16.149 + # SyntaxError. 16.150 + new_err = error.IndentationError 16.151 + if tp == pygram.tokens.INDENT: 16.152 + msg = "unexpected indent" 16.153 + elif e.expected == pygram.tokens.INDENT: 16.154 + msg = "expected an indented block" 16.155 + else: 16.156 + new_err = error.SyntaxError 16.157 + msg = "invalid syntax" 16.158 + raise new_err(msg, e.lineno, e.column, e.line, 16.159 + compile_info.filename) 16.160 + else: 16.161 + tree = self.root 16.162 + finally: 16.163 + # Avoid hanging onto the tree. 16.164 + self.root = None 16.165 + if enc is not None: 16.166 + compile_info.encoding = enc 16.167 + return tree 16.168 + 16.169 +def parse(filename): 16.170 + """returns the parsed contents of <filename>""" 16.171 + info = CompileInfo(filename) 16.172 + f = open(filename) 16.173 + try: 16.174 + return PythonParser().parse_source(f.read(), info) 16.175 + finally: 16.176 + f.close() 16.177 + 16.178 +def suite(text): 16.179 + """returns the parsed form of the given program <text>""" 16.180 + info = CompileInfo("<stdin>") 16.181 + return PythonParser().parse_source(text, info) 16.182 + 16.183 +def expr(text): 16.184 + """returns the parsed form of the given expression <text>""" 16.185 + info = CompileInfo("<stdin>", "single") 16.186 + return PythonParser().parse_source(text, info) 16.187 + 16.188 +def st2tuple(tree, line_info=True, col_info=False): 16.189 + """returns <tree> in tuple form for the compiler package""" 16.190 + if isinstance(tree, parser.AbstractNonterminal): 16.191 + l = [tree.type] 16.192 + for i in range(0, tree.num_children()): 16.193 + l.append(st2tuple(tree.get_child(i))) 16.194 + return tuple(l) 16.195 + elif isinstance(tree, parser.Terminal): 16.196 + l = [tree.type, tree.value] 16.197 + if line_info: 16.198 + l.append(tree.get_lineno()) 16.199 + if col_info: 16.200 + l.append(tree.get_column()) 16.201 + return tuple(l) 16.202 + else: 16.203 + raise TypeError, tree
17.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 17.2 +++ b/pyparser/pytoken.py Sun Jan 08 20:20:39 2017 +0100 17.3 @@ -0,0 +1,71 @@ 17.4 +"""Python token definitions.""" 17.5 + 17.6 +python_tokens = {} 17.7 +python_opmap = {} 17.8 + 17.9 +def _add_tok(name, *values): 17.10 + index = len(python_tokens) 17.11 + assert index < 256 17.12 + python_tokens[name] = index 17.13 + for value in values: 17.14 + python_opmap[value] = index 17.15 + 17.16 +_add_tok('ENDMARKER') 17.17 +_add_tok('NAME') 17.18 +_add_tok('NUMBER') 17.19 +_add_tok('STRING') 17.20 +_add_tok('NEWLINE') 17.21 +_add_tok('INDENT') 17.22 +_add_tok('DEDENT') 17.23 +_add_tok('LPAR', "(") 17.24 +_add_tok('RPAR', ")") 17.25 +_add_tok('LSQB', "[") 17.26 +_add_tok('RSQB', "]") 17.27 +_add_tok('COLON', ":") 17.28 +_add_tok('COMMA', "," ) 17.29 +_add_tok('SEMI', ";" ) 17.30 +_add_tok('PLUS', "+" ) 17.31 +_add_tok('MINUS', "-" ) 17.32 +_add_tok('STAR', "*" ) 17.33 +_add_tok('SLASH', "/" ) 17.34 +_add_tok('VBAR', "|" ) 17.35 +_add_tok('AMPER', "&" ) 17.36 +_add_tok('LESS', "<" ) 17.37 +_add_tok('GREATER', ">" ) 17.38 +_add_tok('EQUAL', "=" ) 17.39 +_add_tok('DOT', "." ) 17.40 +_add_tok('PERCENT', "%" ) 17.41 +_add_tok('BACKQUOTE', "`" ) 17.42 +_add_tok('LBRACE', "{" ) 17.43 +_add_tok('RBRACE', "}" ) 17.44 +_add_tok('EQEQUAL', "==" ) 17.45 +_add_tok('NOTEQUAL', "!=", "<>" ) 17.46 +_add_tok('LESSEQUAL', "<=" ) 17.47 +_add_tok('GREATEREQUAL', ">=" ) 17.48 +_add_tok('TILDE', "~" ) 17.49 +_add_tok('CIRCUMFLEX', "^" ) 17.50 +_add_tok('LEFTSHIFT', "<<" ) 17.51 +_add_tok('RIGHTSHIFT', ">>" ) 17.52 +_add_tok('DOUBLESTAR', "**" ) 17.53 +_add_tok('PLUSEQUAL', "+=" ) 17.54 +_add_tok('MINEQUAL', "-=" ) 17.55 +_add_tok('STAREQUAL', "*=" ) 17.56 +_add_tok('SLASHEQUAL', "/=" ) 17.57 +_add_tok('PERCENTEQUAL', "%=" ) 17.58 +_add_tok('AMPEREQUAL', "&=" ) 17.59 +_add_tok('VBAREQUAL', "|=" ) 17.60 +_add_tok('CIRCUMFLEXEQUAL', "^=" ) 17.61 +_add_tok('LEFTSHIFTEQUAL', "<<=" ) 17.62 +_add_tok('RIGHTSHIFTEQUAL', ">>=" ) 17.63 +_add_tok('DOUBLESTAREQUAL', "**=" ) 17.64 +_add_tok('DOUBLESLASH', "//" ) 17.65 +_add_tok('DOUBLESLASHEQUAL',"//=" ) 17.66 +_add_tok('AT', "@" ) 17.67 +_add_tok('OP') 17.68 +_add_tok('ERRORTOKEN') 17.69 + 17.70 +# extra PyPy-specific tokens 17.71 +_add_tok("COMMENT") 17.72 +_add_tok("NL") 17.73 + 17.74 +del _add_tok
18.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 18.2 +++ b/pyparser/pytokenize.py Sun Jan 08 20:20:39 2017 +0100 18.3 @@ -0,0 +1,375 @@ 18.4 +# ______________________________________________________________________ 18.5 +"""Module pytokenize 18.6 + 18.7 +THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED 18.8 +TO BE ANNOTABLE (Mainly made lists homogeneous) 18.9 + 18.10 +This is a modified version of Ka-Ping Yee's tokenize module found in the 18.11 +Python standard library. 18.12 + 18.13 +The primary modification is the removal of the tokenizer's dependence on the 18.14 +standard Python regular expression module, which is written in C. The regular 18.15 +expressions have been replaced with hand built DFA's using the 18.16 +basil.util.automata module. 18.17 + 18.18 +$Id: pytokenize.py,v 1.3 2003/10/03 16:31:53 jriehl Exp $ 18.19 +""" 18.20 +# ______________________________________________________________________ 18.21 + 18.22 +from pyparser import automata 18.23 + 18.24 +__all__ = [ "tokenize" ] 18.25 + 18.26 +# ______________________________________________________________________ 18.27 +# Automatically generated DFA's 18.28 + 18.29 +accepts = [True, True, True, True, True, True, True, True, 18.30 + True, True, False, True, True, True, True, False, 18.31 + False, False, True, False, False, True, False, 18.32 + False, True, False, True, False, True, False, 18.33 + False, True, False, False, True, True, True, 18.34 + False, False, True, False, False, False, True] 18.35 +states = [ 18.36 + # 0 18.37 + {'\t': 0, '\n': 13, '\x0c': 0, 18.38 + '\r': 14, ' ': 0, '!': 10, '"': 16, 18.39 + '#': 18, '%': 12, '&': 12, "'": 15, 18.40 + '(': 13, ')': 13, '*': 7, '+': 12, 18.41 + ',': 13, '-': 12, '.': 6, '/': 11, 18.42 + '0': 4, '1': 5, '2': 5, '3': 5, 18.43 + '4': 5, '5': 5, '6': 5, '7': 5, 18.44 + '8': 5, '9': 5, ':': 13, ';': 13, 18.45 + '<': 9, '=': 12, '>': 8, '@': 13, 18.46 + 'A': 1, 'B': 2, 'C': 1, 'D': 1, 18.47 + 'E': 1, 'F': 1, 'G': 1, 'H': 1, 18.48 + 'I': 1, 'J': 1, 'K': 1, 'L': 1, 18.49 + 'M': 1, 'N': 1, 'O': 1, 'P': 1, 18.50 + 'Q': 1, 'R': 3, 'S': 1, 'T': 1, 18.51 + 'U': 2, 'V': 1, 'W': 1, 'X': 1, 18.52 + 'Y': 1, 'Z': 1, '[': 13, '\\': 17, 18.53 + ']': 13, '^': 12, '_': 1, '`': 13, 18.54 + 'a': 1, 'b': 2, 'c': 1, 'd': 1, 18.55 + 'e': 1, 'f': 1, 'g': 1, 'h': 1, 18.56 + 'i': 1, 'j': 1, 'k': 1, 'l': 1, 18.57 + 'm': 1, 'n': 1, 'o': 1, 'p': 1, 18.58 + 'q': 1, 'r': 3, 's': 1, 't': 1, 18.59 + 'u': 2, 'v': 1, 'w': 1, 'x': 1, 18.60 + 'y': 1, 'z': 1, '{': 13, '|': 12, 18.61 + '}': 13, '~': 13}, 18.62 + # 1 18.63 + {'0': 1, '1': 1, '2': 1, '3': 1, 18.64 + '4': 1, '5': 1, '6': 1, '7': 1, 18.65 + '8': 1, '9': 1, 'A': 1, 'B': 1, 18.66 + 'C': 1, 'D': 1, 'E': 1, 'F': 1, 18.67 + 'G': 1, 'H': 1, 'I': 1, 'J': 1, 18.68 + 'K': 1, 'L': 1, 'M': 1, 'N': 1, 18.69 + 'O': 1, 'P': 1, 'Q': 1, 'R': 1, 18.70 + 'S': 1, 'T': 1, 'U': 1, 'V': 1, 18.71 + 'W': 1, 'X': 1, 'Y': 1, 'Z': 1, 18.72 + '_': 1, 'a': 1, 'b': 1, 'c': 1, 18.73 + 'd': 1, 'e': 1, 'f': 1, 'g': 1, 18.74 + 'h': 1, 'i': 1, 'j': 1, 'k': 1, 18.75 + 'l': 1, 'm': 1, 'n': 1, 'o': 1, 18.76 + 'p': 1, 'q': 1, 'r': 1, 's': 1, 18.77 + 't': 1, 'u': 1, 'v': 1, 'w': 1, 18.78 + 'x': 1, 'y': 1, 'z': 1}, 18.79 + # 2 18.80 + {'"': 16, "'": 15, '0': 1, '1': 1, 18.81 + '2': 1, '3': 1, '4': 1, '5': 1, 18.82 + '6': 1, '7': 1, '8': 1, '9': 1, 18.83 + 'A': 1, 'B': 1, 'C': 1, 'D': 1, 18.84 + 'E': 1, 'F': 1, 'G': 1, 'H': 1, 18.85 + 'I': 1, 'J': 1, 'K': 1, 'L': 1, 18.86 + 'M': 1, 'N': 1, 'O': 1, 'P': 1, 18.87 + 'Q': 1, 'R': 3, 'S': 1, 'T': 1, 18.88 + 'U': 1, 'V': 1, 'W': 1, 'X': 1, 18.89 + 'Y': 1, 'Z': 1, '_': 1, 'a': 1, 18.90 + 'b': 1, 'c': 1, 'd': 1, 'e': 1, 18.91 + 'f': 1, 'g': 1, 'h': 1, 'i': 1, 18.92 + 'j': 1, 'k': 1, 'l': 1, 'm': 1, 18.93 + 'n': 1, 'o': 1, 'p': 1, 'q': 1, 18.94 + 'r': 3, 's': 1, 't': 1, 'u': 1, 18.95 + 'v': 1, 'w': 1, 'x': 1, 'y': 1, 18.96 + 'z': 1}, 18.97 + # 3 18.98 + {'"': 16, "'": 15, '0': 1, '1': 1, 18.99 + '2': 1, '3': 1, '4': 1, '5': 1, 18.100 + '6': 1, '7': 1, '8': 1, '9': 1, 18.101 + 'A': 1, 'B': 1, 'C': 1, 'D': 1, 18.102 + 'E': 1, 'F': 1, 'G': 1, 'H': 1, 18.103 + 'I': 1, 'J': 1, 'K': 1, 'L': 1, 18.104 + 'M': 1, 'N': 1, 'O': 1, 'P': 1, 18.105 + 'Q': 1, 'R': 1, 'S': 1, 'T': 1, 18.106 + 'U': 1, 'V': 1, 'W': 1, 'X': 1, 18.107 + 'Y': 1, 'Z': 1, '_': 1, 'a': 1, 18.108 + 'b': 1, 'c': 1, 'd': 1, 'e': 1, 18.109 + 'f': 1, 'g': 1, 'h': 1, 'i': 1, 18.110 + 'j': 1, 'k': 1, 'l': 1, 'm': 1, 18.111 + 'n': 1, 'o': 1, 'p': 1, 'q': 1, 18.112 + 'r': 1, 's': 1, 't': 1, 'u': 1, 18.113 + 'v': 1, 'w': 1, 'x': 1, 'y': 1, 18.114 + 'z': 1}, 18.115 + # 4 18.116 + {'.': 24, '0': 21, '1': 21, '2': 21, 18.117 + '3': 21, '4': 21, '5': 21, '6': 21, 18.118 + '7': 21, '8': 23, '9': 23, 'B': 22, 18.119 + 'E': 25, 'J': 13, 'L': 13, 'O': 20, 18.120 + 'X': 19, 'b': 22, 'e': 25, 'j': 13, 18.121 + 'l': 13, 'o': 20, 'x': 19}, 18.122 + # 5 18.123 + {'.': 24, '0': 5, '1': 5, '2': 5, 18.124 + '3': 5, '4': 5, '5': 5, '6': 5, 18.125 + '7': 5, '8': 5, '9': 5, 'E': 25, 18.126 + 'J': 13, 'L': 13, 'e': 25, 'j': 13, 18.127 + 'l': 13}, 18.128 + # 6 18.129 + {'0': 26, '1': 26, '2': 26, '3': 26, 18.130 + '4': 26, '5': 26, '6': 26, '7': 26, 18.131 + '8': 26, '9': 26}, 18.132 + # 7 18.133 + {'*': 12, '=': 13}, 18.134 + # 8 18.135 + {'=': 13, '>': 12}, 18.136 + # 9 18.137 + {'<': 12, '=': 13, '>': 13}, 18.138 + # 10 18.139 + {'=': 13}, 18.140 + # 11 18.141 + {'/': 12, '=': 13}, 18.142 + # 12 18.143 + {'=': 13}, 18.144 + # 13 18.145 + {}, 18.146 + # 14 18.147 + {'\n': 13}, 18.148 + # 15 18.149 + {automata.DEFAULT: 30, '\n': 27, 18.150 + '\r': 27, "'": 28, '\\': 29}, 18.151 + # 16 18.152 + {automata.DEFAULT: 33, '\n': 27, 18.153 + '\r': 27, '"': 31, '\\': 32}, 18.154 + # 17 18.155 + {'\n': 13, '\r': 14}, 18.156 + # 18 18.157 + {automata.DEFAULT: 18, '\n': 27, '\r': 27}, 18.158 + # 19 18.159 + {'0': 34, '1': 34, '2': 34, '3': 34, 18.160 + '4': 34, '5': 34, '6': 34, '7': 34, 18.161 + '8': 34, '9': 34, 'A': 34, 'B': 34, 18.162 + 'C': 34, 'D': 34, 'E': 34, 'F': 34, 18.163 + 'a': 34, 'b': 34, 'c': 34, 'd': 34, 18.164 + 'e': 34, 'f': 34}, 18.165 + # 20 18.166 + {'0': 35, '1': 35, '2': 35, '3': 35, 18.167 + '4': 35, '5': 35, '6': 35, '7': 35}, 18.168 + # 21 18.169 + {'.': 24, '0': 21, '1': 21, '2': 21, 18.170 + '3': 21, '4': 21, '5': 21, '6': 21, 18.171 + '7': 21, '8': 23, '9': 23, 'E': 25, 18.172 + 'J': 13, 'L': 13, 'e': 25, 'j': 13, 18.173 + 'l': 13}, 18.174 + # 22 18.175 + {'0': 36, '1': 36}, 18.176 + # 23 18.177 + {'.': 24, '0': 23, '1': 23, '2': 23, 18.178 + '3': 23, '4': 23, '5': 23, '6': 23, 18.179 + '7': 23, '8': 23, '9': 23, 'E': 25, 18.180 + 'J': 13, 'e': 25, 'j': 13}, 18.181 + # 24 18.182 + {'0': 24, '1': 24, '2': 24, '3': 24, 18.183 + '4': 24, '5': 24, '6': 24, '7': 24, 18.184 + '8': 24, '9': 24, 'E': 37, 'J': 13, 18.185 + 'e': 37, 'j': 13}, 18.186 + # 25 18.187 + {'+': 38, '-': 38, '0': 39, '1': 39, 18.188 + '2': 39, '3': 39, '4': 39, '5': 39, 18.189 + '6': 39, '7': 39, '8': 39, '9': 39}, 18.190 + # 26 18.191 + {'0': 26, '1': 26, '2': 26, '3': 26, 18.192 + '4': 26, '5': 26, '6': 26, '7': 26, 18.193 + '8': 26, '9': 26, 'E': 37, 'J': 13, 18.194 + 'e': 37, 'j': 13}, 18.195 + # 27 18.196 + {}, 18.197 + # 28 18.198 + {"'": 13}, 18.199 + # 29 18.200 + {automata.DEFAULT: 40, '\n': 13, '\r': 14}, 18.201 + # 30 18.202 + {automata.DEFAULT: 30, '\n': 27, 18.203 + '\r': 27, "'": 13, '\\': 29}, 18.204 + # 31 18.205 + {'"': 13}, 18.206 + # 32 18.207 + {automata.DEFAULT: 41, '\n': 13, '\r': 14}, 18.208 + # 33 18.209 + {automata.DEFAULT: 33, '\n': 27, 18.210 + '\r': 27, '"': 13, '\\': 32}, 18.211 + # 34 18.212 + {'0': 34, '1': 34, '2': 34, '3': 34, 18.213 + '4': 34, '5': 34, '6': 34, '7': 34, 18.214 + '8': 34, '9': 34, 'A': 34, 'B': 34, 18.215 + 'C': 34, 'D': 34, 'E': 34, 'F': 34, 18.216 + 'L': 13, 'a': 34, 'b': 34, 'c': 34, 18.217 + 'd': 34, 'e': 34, 'f': 34, 'l': 13}, 18.218 + # 35 18.219 + {'0': 35, '1': 35, '2': 35, '3': 35, 18.220 + '4': 35, '5': 35, '6': 35, '7': 35, 18.221 + 'L': 13, 'l': 13}, 18.222 + # 36 18.223 + {'0': 36, '1': 36, 'L': 13, 'l': 13}, 18.224 + # 37 18.225 + {'+': 42, '-': 42, '0': 43, '1': 43, 18.226 + '2': 43, '3': 43, '4': 43, '5': 43, 18.227 + '6': 43, '7': 43, '8': 43, '9': 43}, 18.228 + # 38 18.229 + {'0': 39, '1': 39, '2': 39, '3': 39, 18.230 + '4': 39, '5': 39, '6': 39, '7': 39, 18.231 + '8': 39, '9': 39}, 18.232 + # 39 18.233 + {'0': 39, '1': 39, '2': 39, '3': 39, 18.234 + '4': 39, '5': 39, '6': 39, '7': 39, 18.235 + '8': 39, '9': 39, 'J': 13, 'j': 13}, 18.236 + # 40 18.237 + {automata.DEFAULT: 40, '\n': 27, 18.238 + '\r': 27, "'": 13, '\\': 29}, 18.239 + # 41 18.240 + {automata.DEFAULT: 41, '\n': 27, 18.241 + '\r': 27, '"': 13, '\\': 32}, 18.242 + # 42 18.243 + {'0': 43, '1': 43, '2': 43, '3': 43, 18.244 + '4': 43, '5': 43, '6': 43, '7': 43, 18.245 + '8': 43, '9': 43}, 18.246 + # 43 18.247 + {'0': 43, '1': 43, '2': 43, '3': 43, 18.248 + '4': 43, '5': 43, '6': 43, '7': 43, 18.249 + '8': 43, '9': 43, 'J': 13, 'j': 13}, 18.250 + ] 18.251 +pseudoDFA = automata.DFA(states, accepts) 18.252 + 18.253 +accepts = [False, False, False, False, False, True] 18.254 +states = [ 18.255 + # 0 18.256 + {automata.DEFAULT: 0, '"': 1, '\\': 2}, 18.257 + # 1 18.258 + {automata.DEFAULT: 4, '"': 3, '\\': 2}, 18.259 + # 2 18.260 + {automata.DEFAULT: 4}, 18.261 + # 3 18.262 + {automata.DEFAULT: 4, '"': 5, '\\': 2}, 18.263 + # 4 18.264 + {automata.DEFAULT: 4, '"': 1, '\\': 2}, 18.265 + # 5 18.266 + {automata.DEFAULT: 4, '"': 5, '\\': 2}, 18.267 + ] 18.268 +double3DFA = automata.NonGreedyDFA(states, accepts) 18.269 + 18.270 +accepts = [False, False, False, False, False, True] 18.271 +states = [ 18.272 + # 0 18.273 + {automata.DEFAULT: 0, "'": 1, '\\': 2}, 18.274 + # 1 18.275 + {automata.DEFAULT: 4, "'": 3, '\\': 2}, 18.276 + # 2 18.277 + {automata.DEFAULT: 4}, 18.278 + # 3 18.279 + {automata.DEFAULT: 4, "'": 5, '\\': 2}, 18.280 + # 4 18.281 + {automata.DEFAULT: 4, "'": 1, '\\': 2}, 18.282 + # 5 18.283 + {automata.DEFAULT: 4, "'": 5, '\\': 2}, 18.284 + ] 18.285 +single3DFA = automata.NonGreedyDFA(states, accepts) 18.286 + 18.287 +accepts = [False, True, False, False] 18.288 +states = [ 18.289 + # 0 18.290 + {automata.DEFAULT: 0, "'": 1, '\\': 2}, 18.291 + # 1 18.292 + {}, 18.293 + # 2 18.294 + {automata.DEFAULT: 3}, 18.295 + # 3 18.296 + {automata.DEFAULT: 3, "'": 1, '\\': 2}, 18.297 + ] 18.298 +singleDFA = automata.DFA(states, accepts) 18.299 + 18.300 +accepts = [False, True, False, False] 18.301 +states = [ 18.302 + # 0 18.303 + {automata.DEFAULT: 0, '"': 1, '\\': 2}, 18.304 + # 1 18.305 + {}, 18.306 + # 2 18.307 + {automata.DEFAULT: 3}, 18.308 + # 3 18.309 + {automata.DEFAULT: 3, '"': 1, '\\': 2}, 18.310 + ] 18.311 +doubleDFA = automata.DFA(states, accepts) 18.312 + 18.313 +#_______________________________________________________________________ 18.314 +# End of automatically generated DFA's 18.315 + 18.316 +endDFAs = {"'" : singleDFA, 18.317 + '"' : doubleDFA, 18.318 + 'r' : None, 18.319 + 'R' : None, 18.320 + 'u' : None, 18.321 + 'U' : None, 18.322 + 'b' : None, 18.323 + 'B' : None} 18.324 + 18.325 +for uniPrefix in ("", "u", "U", "b", "B"): 18.326 + for rawPrefix in ("", "r", "R"): 18.327 + prefix = uniPrefix + rawPrefix 18.328 + endDFAs[prefix + "'''"] = single3DFA 18.329 + endDFAs[prefix + '"""'] = double3DFA 18.330 + 18.331 +whiteSpaceStatesAccepts = [True] 18.332 +whiteSpaceStates = [{'\t': 0, ' ': 0, '\x0c': 0}] 18.333 +whiteSpaceDFA = automata.DFA(whiteSpaceStates, whiteSpaceStatesAccepts) 18.334 + 18.335 +# ______________________________________________________________________ 18.336 +# COPIED: 18.337 + 18.338 +triple_quoted = {} 18.339 +for t in ("'''", '"""', 18.340 + "r'''", 'r"""', "R'''", 'R"""', 18.341 + "u'''", 'u"""', "U'''", 'U"""', 18.342 + "b'''", 'b"""', "B'''", 'B"""', 18.343 + "ur'''", 'ur"""', "Ur'''", 'Ur"""', 18.344 + "uR'''", 'uR"""', "UR'''", 'UR"""', 18.345 + "br'''", 'br"""', "Br'''", 'Br"""', 18.346 + "bR'''", 'bR"""', "BR'''", 'BR"""'): 18.347 + triple_quoted[t] = t 18.348 +single_quoted = {} 18.349 +for t in ("'", '"', 18.350 + "r'", 'r"', "R'", 'R"', 18.351 + "u'", 'u"', "U'", 'U"', 18.352 + "b'", 'b"', "B'", 'B"', 18.353 + "ur'", 'ur"', "Ur'", 'Ur"', 18.354 + "uR'", 'uR"', "UR'", 'UR"', 18.355 + "br'", 'br"', "Br'", 'Br"', 18.356 + "bR'", 'bR"', "BR'", 'BR"'): 18.357 + single_quoted[t] = t 18.358 + 18.359 +tabsize = 8 18.360 + 18.361 +# PYPY MODIFICATION: removed TokenError class as it's not needed here 18.362 + 18.363 +# PYPY MODIFICATION: removed StopTokenizing class as it's not needed here 18.364 + 18.365 +# PYPY MODIFICATION: removed printtoken() as it's not needed here 18.366 + 18.367 +# PYPY MODIFICATION: removed tokenize() as it's not needed here 18.368 + 18.369 +# PYPY MODIFICATION: removed tokenize_loop() as it's not needed here 18.370 + 18.371 +# PYPY MODIFICATION: removed generate_tokens() as it was copied / modified 18.372 +# in pythonlexer.py 18.373 + 18.374 +# PYPY MODIFICATION: removed main() as it's not needed here 18.375 + 18.376 +# ______________________________________________________________________ 18.377 +# End of pytokenize.py 18.378 +
19.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 19.2 +++ b/pyparser/pytokenizer.py Sun Jan 08 20:20:39 2017 +0100 19.3 @@ -0,0 +1,273 @@ 19.4 +from pyparser import automata 19.5 +from pyparser.pygram import tokens 19.6 +from pyparser.pytoken import python_opmap 19.7 +from pyparser.error import TokenError, TokenIndentationError 19.8 +from pyparser.pytokenize import tabsize, whiteSpaceDFA, \ 19.9 + triple_quoted, endDFAs, single_quoted, pseudoDFA 19.10 +from pyparser import consts 19.11 + 19.12 +NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' 19.13 +NUMCHARS = '0123456789' 19.14 +ALNUMCHARS = NAMECHARS + NUMCHARS 19.15 +EXTENDED_ALNUMCHARS = ALNUMCHARS + '-.' 19.16 +WHITESPACES = ' \t\n\r\v\f' 19.17 + 19.18 +def match_encoding_declaration(comment): 19.19 + """returns the declared encoding or None 19.20 + 19.21 + This function is a replacement for : 19.22 + >>> py_encoding = re.compile(r"coding[:=]\s*([-\w.]+)") 19.23 + >>> py_encoding.search(comment) 19.24 + """ 19.25 + index = comment.find('coding') 19.26 + if index < 0: 19.27 + return None 19.28 + next_char = comment[index + 6] 19.29 + if next_char not in ':=': 19.30 + return None 19.31 + end_of_decl = comment[index + 7:] 19.32 + index = 0 19.33 + for char in end_of_decl: 19.34 + if char not in WHITESPACES: 19.35 + break 19.36 + index += 1 19.37 + else: 19.38 + return None 19.39 + encoding = '' 19.40 + for char in end_of_decl[index:]: 19.41 + if char in EXTENDED_ALNUMCHARS: 19.42 + encoding += char 19.43 + else: 19.44 + break 19.45 + if encoding != '': 19.46 + return encoding 19.47 + return None 19.48 + 19.49 + 19.50 +DUMMY_DFA = automata.DFA([], []) 19.51 + 19.52 +def generate_tokens(lines, flags): 19.53 + """ 19.54 + This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since 19.55 + the original function is not RPYTHON (uses yield) 19.56 + It was also slightly modified to generate Token instances instead 19.57 + of the original 5-tuples -- it's now a 4-tuple of 19.58 + 19.59 + * the Token instance 19.60 + * the whole line as a string 19.61 + * the line number (the real one, counting continuation lines) 19.62 + * the position on the line of the end of the token. 19.63 + 19.64 + Original docstring :: 19.65 + 19.66 + The generate_tokens() generator requires one argment, readline, which 19.67 + must be a callable object which provides the same interface as the 19.68 + readline() method of built-in file objects. Each call to the function 19.69 + should return one line of input as a string. 19.70 + 19.71 + The generator produces 5-tuples with these members: the token type; the 19.72 + token string; a 2-tuple (srow, scol) of ints specifying the row and 19.73 + column where the token begins in the source; a 2-tuple (erow, ecol) of 19.74 + ints specifying the row and column where the token ends in the source; 19.75 + and the line on which the token was found. The line passed is the 19.76 + logical line; continuation lines are included. 19.77 + """ 19.78 + token_list = [] 19.79 + lnum = parenlev = continued = 0 19.80 + namechars = NAMECHARS 19.81 + numchars = NUMCHARS 19.82 + contstr, needcont = '', 0 19.83 + contline = None 19.84 + indents = [0] 19.85 + last_comment = '' 19.86 + parenlevstart = (0, 0, "") 19.87 + 19.88 + # make the annotator happy 19.89 + endDFA = DUMMY_DFA 19.90 + # make the annotator happy 19.91 + line = '' 19.92 + pos = 0 19.93 + lines.append("") 19.94 + strstart = (0, 0, "") 19.95 + for line in lines: 19.96 + lnum = lnum + 1 19.97 + line = universal_newline(line) 19.98 + pos, max = 0, len(line) 19.99 + 19.100 + if contstr: 19.101 + if not line: 19.102 + raise TokenError( 19.103 + "EOF while scanning triple-quoted string literal", 19.104 + strstart[2], strstart[0], strstart[1]+1, 19.105 + token_list, lnum-1) 19.106 + endmatch = endDFA.recognize(line) 19.107 + if endmatch >= 0: 19.108 + pos = end = endmatch 19.109 + tok = (tokens.STRING, contstr + line[:end], strstart[0], 19.110 + strstart[1], line) 19.111 + token_list.append(tok) 19.112 + last_comment = '' 19.113 + contstr, needcont = '', 0 19.114 + contline = None 19.115 + elif (needcont and not line.endswith('\\\n') and 19.116 + not line.endswith('\\\r\n')): 19.117 + tok = (tokens.ERRORTOKEN, contstr + line, strstart[0], 19.118 + strstart[1], line) 19.119 + token_list.append(tok) 19.120 + last_comment = '' 19.121 + contstr = '' 19.122 + contline = None 19.123 + continue 19.124 + else: 19.125 + contstr = contstr + line 19.126 + contline = contline + line 19.127 + continue 19.128 + 19.129 + elif parenlev == 0 and not continued: # new statement 19.130 + if not line: break 19.131 + column = 0 19.132 + while pos < max: # measure leading whitespace 19.133 + if line[pos] == ' ': column = column + 1 19.134 + elif line[pos] == '\t': column = (column/tabsize + 1)*tabsize 19.135 + elif line[pos] == '\f': column = 0 19.136 + else: break 19.137 + pos = pos + 1 19.138 + if pos == max: break 19.139 + 19.140 + if line[pos] in '#\r\n': 19.141 + # skip comments or blank lines 19.142 + continue 19.143 + 19.144 + if column > indents[-1]: # count indents or dedents 19.145 + indents.append(column) 19.146 + token_list.append((tokens.INDENT, line[:pos], lnum, 0, line)) 19.147 + last_comment = '' 19.148 + while column < indents[-1]: 19.149 + indents = indents[:-1] 19.150 + token_list.append((tokens.DEDENT, '', lnum, pos, line)) 19.151 + last_comment = '' 19.152 + if column != indents[-1]: 19.153 + err = "unindent does not match any outer indentation level" 19.154 + raise TokenIndentationError(err, line, lnum, 0, token_list) 19.155 + 19.156 + else: # continued statement 19.157 + if not line: 19.158 + if parenlev > 0: 19.159 + lnum1, start1, line1 = parenlevstart 19.160 + raise TokenError("parenthesis is never closed", line1, 19.161 + lnum1, start1 + 1, token_list, lnum) 19.162 + raise TokenError("EOF in multi-line statement", line, 19.163 + lnum, 0, token_list) 19.164 + continued = 0 19.165 + 19.166 + while pos < max: 19.167 + pseudomatch = pseudoDFA.recognize(line, pos) 19.168 + if pseudomatch >= 0: # scan for tokens 19.169 + # JDR: Modified 19.170 + start = whiteSpaceDFA.recognize(line, pos) 19.171 + if start < 0: 19.172 + start = pos 19.173 + end = pseudomatch 19.174 + 19.175 + if start == end: 19.176 + raise TokenError("Unknown character", line, 19.177 + lnum, start + 1, token_list) 19.178 + 19.179 + pos = end 19.180 + token, initial = line[start:end], line[start] 19.181 + if initial in numchars or \ 19.182 + (initial == '.' and token != '.'): # ordinary number 19.183 + token_list.append((tokens.NUMBER, token, lnum, start, line)) 19.184 + last_comment = '' 19.185 + elif initial in '\r\n': 19.186 + if parenlev <= 0: 19.187 + tok = (tokens.NEWLINE, last_comment, lnum, start, line) 19.188 + token_list.append(tok) 19.189 + last_comment = '' 19.190 + elif initial == '#': 19.191 + # skip comment 19.192 + last_comment = token 19.193 + elif token in triple_quoted: 19.194 + endDFA = endDFAs[token] 19.195 + endmatch = endDFA.recognize(line, pos) 19.196 + if endmatch >= 0: # all on one line 19.197 + pos = endmatch 19.198 + token = line[start:pos] 19.199 + tok = (tokens.STRING, token, lnum, start, line) 19.200 + token_list.append(tok) 19.201 + last_comment = '' 19.202 + else: 19.203 + strstart = (lnum, start, line) 19.204 + contstr = line[start:] 19.205 + contline = line 19.206 + break 19.207 + elif initial in single_quoted or \ 19.208 + token[:2] in single_quoted or \ 19.209 + token[:3] in single_quoted: 19.210 + if token[-1] == '\n': # continued string 19.211 + strstart = (lnum, start, line) 19.212 + endDFA = (endDFAs[initial] or endDFAs[token[1]] or 19.213 + endDFAs[token[2]]) 19.214 + contstr, needcont = line[start:], 1 19.215 + contline = line 19.216 + break 19.217 + else: # ordinary string 19.218 + tok = (tokens.STRING, token, lnum, start, line) 19.219 + token_list.append(tok) 19.220 + last_comment = '' 19.221 + elif initial in namechars: # ordinary name 19.222 + token_list.append((tokens.NAME, token, lnum, start, line)) 19.223 + last_comment = '' 19.224 + elif initial == '\\': # continued stmt 19.225 + continued = 1 19.226 + else: 19.227 + if initial in '([{': 19.228 + if parenlev == 0: 19.229 + parenlevstart = (lnum, start, line) 19.230 + parenlev = parenlev + 1 19.231 + elif initial in ')]}': 19.232 + parenlev = parenlev - 1 19.233 + if parenlev < 0: 19.234 + raise TokenError("unmatched '%s'" % initial, line, 19.235 + lnum, start + 1, token_list) 19.236 + if token in python_opmap: 19.237 + punct = python_opmap[token] 19.238 + else: 19.239 + punct = tokens.OP 19.240 + token_list.append((punct, token, lnum, start, line)) 19.241 + last_comment = '' 19.242 + else: 19.243 + start = whiteSpaceDFA.recognize(line, pos) 19.244 + if start < 0: 19.245 + start = pos 19.246 + if start<max and line[start] in single_quoted: 19.247 + raise TokenError("EOL while scanning string literal", 19.248 + line, lnum, start+1, token_list) 19.249 + tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line) 19.250 + token_list.append(tok) 19.251 + last_comment = '' 19.252 + pos = pos + 1 19.253 + 19.254 + lnum -= 1 19.255 + if not (flags & consts.PyCF_DONT_IMPLY_DEDENT): 19.256 + if token_list and token_list[-1][0] != tokens.NEWLINE: 19.257 + tok = (tokens.NEWLINE, '', lnum, 0, '\n') 19.258 + token_list.append(tok) 19.259 + for indent in indents[1:]: # pop remaining indent levels 19.260 + token_list.append((tokens.DEDENT, '', lnum, pos, line)) 19.261 + tok = (tokens.NEWLINE, '', lnum, 0, '\n') 19.262 + token_list.append(tok) 19.263 + 19.264 + token_list.append((tokens.ENDMARKER, '', lnum, pos, line)) 19.265 + return token_list 19.266 + 19.267 + 19.268 +def universal_newline(line): 19.269 + # show annotator that indexes below are non-negative 19.270 + line_len_m2 = len(line) - 2 19.271 + if line_len_m2 >= 0 and line[-2] == '\r' and line[-1] == '\n': 19.272 + return line[:line_len_m2] + '\n' 19.273 + line_len_m1 = len(line) - 1 19.274 + if line_len_m1 >= 0 and line[-1] == '\r': 19.275 + return line[:line_len_m1] + '\n' 19.276 + return line
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/pyparser/test/__init__.py Sun Jan 08 20:20:39 2017 +0100 20.3 @@ -0,0 +1,1 @@ 20.4 +
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/pyparser/test/expressions.py Sun Jan 08 20:20:39 2017 +0100 21.3 @@ -0,0 +1,510 @@ 21.4 +""" 21.5 +list of tested expressions / suites (used by test_parser and test_astbuilder) 21.6 +""" 21.7 + 21.8 +constants = [ 21.9 + "0", 21.10 + "7", 21.11 + "-3", 21.12 + "053", 21.13 + "0x18", 21.14 + "14L", 21.15 + "1.0", 21.16 + "3.9", 21.17 + "-3.6", 21.18 + "1.8e19", 21.19 + "90000000000000", 21.20 + "90000000000000.", 21.21 + "3j" 21.22 + ] 21.23 + 21.24 +expressions = [ 21.25 + "x = a + 1", 21.26 + "x = 1 - a", 21.27 + "x = a * b", 21.28 + "x = a ** 2", 21.29 + "x = a / b", 21.30 + "x = a & b", 21.31 + "x = a | b", 21.32 + "x = a ^ b", 21.33 + "x = a // b", 21.34 + "x = a * b + 1", 21.35 + "x = a + 1 * b", 21.36 + "x = a * b / c", 21.37 + "x = a * (1 + c)", 21.38 + "x, y, z = 1, 2, 3", 21.39 + "x = 'a' 'b' 'c'", 21.40 + "del foo", 21.41 + "del foo[bar]", 21.42 + "del foo.bar", 21.43 + "l[0]", 21.44 + "k[v,]", 21.45 + "m[a,b]", 21.46 + "a.b.c[d]", 21.47 + "file('some.txt').read()", 21.48 + "a[0].read()", 21.49 + "a[1:1].read()", 21.50 + "f('foo')('bar')('spam')", 21.51 + "f('foo')('bar')('spam').read()[0]", 21.52 + "a.b[0][0]", 21.53 + "a.b[0][:]", 21.54 + "a.b[0][::]", 21.55 + "a.b[0][0].pop()[0].push('bar')('baz').spam", 21.56 + "a.b[0].read()[1][2].foo().spam()[0].bar", 21.57 + "a**2", 21.58 + "a**2**2", 21.59 + "a.b[0]**2", 21.60 + "a.b[0].read()[1][2].foo().spam()[0].bar ** 2", 21.61 + "l[start:end] = l2", 21.62 + "l[::] = l2", 21.63 + "a = `s`", 21.64 + "a = `1 + 2 + f(3, 4)`", 21.65 + "[a, b] = c", 21.66 + "(a, b) = c", 21.67 + "[a, (b,c), d] = e", 21.68 + "a, (b, c), d = e", 21.69 + ] 21.70 + 21.71 +# We do not export the following tests because we would have to implement 2.5 21.72 +# features in the stable compiler (other than just building the AST). 21.73 +expressions_inbetweenversions = expressions + [ 21.74 + "1 if True else 2", 21.75 + "1 if False else 2", 21.76 + ] 21.77 + 21.78 +funccalls = [ 21.79 + "l = func()", 21.80 + "l = func(10)", 21.81 + "l = func(10, 12, a, b=c, *args)", 21.82 + "l = func(10, 12, a, b=c, **kwargs)", 21.83 + "l = func(10, 12, a, b=c, *args, **kwargs)", 21.84 + "l = func(10, 12, a, b=c)", 21.85 + "e = l.pop(3)", 21.86 + "e = k.l.pop(3)", 21.87 + "simplefilter('ignore', category=PendingDeprecationWarning, append=1)", 21.88 + """methodmap = dict(subdirs=phase4, 21.89 + same_files=phase3, diff_files=phase3, funny_files=phase3, 21.90 + common_dirs = phase2, common_files=phase2, common_funny=phase2, 21.91 + common=phase1, left_only=phase1, right_only=phase1, 21.92 + left_list=phase0, right_list=phase0)""", 21.93 + "odata = b2a_qp(data, quotetabs = quotetabs, header = header)", 21.94 + ] 21.95 + 21.96 +listmakers = [ 21.97 + "l = []", 21.98 + "l = [1, 2, 3]", 21.99 + "l = [i for i in range(10)]", 21.100 + "l = [i for i in range(10) if i%2 == 0]", 21.101 + "l = [i for i in range(10) if i%2 == 0 or i%2 == 1]", # <-- 21.102 + "l = [i for i in range(10) if i%2 == 0 and i%2 == 1]", 21.103 + "l = [i for j in range(10) for i in range(j)]", 21.104 + "l = [i for j in range(10) for i in range(j) if j%2 == 0]", 21.105 + "l = [i for j in range(10) for i in range(j) if j%2 == 0 and i%2 == 0]", 21.106 + "l = [(a, b) for (a,b,c) in l2]", 21.107 + "l = [{a:b} for (a,b,c) in l2]", 21.108 + "l = [i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0]", 21.109 + ] 21.110 + 21.111 +genexps = [ 21.112 + "l = (i for i in j)", 21.113 + "l = (i for i in j if i%2 == 0)", 21.114 + "l = (i for j in k for i in j)", 21.115 + "l = (i for j in k for i in j if j%2==0)", 21.116 + "l = (i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0)", 21.117 + "l = (i for i in [ j*2 for j in range(10) ] )", 21.118 + "l = [i for i in ( j*2 for j in range(10) ) ]", 21.119 + "l = (i for i in [ j*2 for j in ( k*3 for k in range(10) ) ] )", 21.120 + "l = [i for j in ( j*2 for j in [ k*3 for k in range(10) ] ) ]", 21.121 + "l = f(i for i in j)", 21.122 + ] 21.123 + 21.124 + 21.125 +dictmakers = [ 21.126 + "l = {a : b, 'c' : 0}", 21.127 + "l = {}", 21.128 + ] 21.129 + 21.130 +backtrackings = [ 21.131 + "f = lambda x: x+1", 21.132 + "f = lambda x,y: x+y", 21.133 + "f = lambda x,y=1,z=t: x+y", 21.134 + "f = lambda x,y=1,z=t,*args,**kwargs: x+y", 21.135 + "f = lambda x,y=1,z=t,*args: x+y", 21.136 + "f = lambda x,y=1,z=t,**kwargs: x+y", 21.137 + "f = lambda: 1", 21.138 + "f = lambda *args: 1", 21.139 + "f = lambda **kwargs: 1", 21.140 + ] 21.141 + 21.142 +comparisons = [ 21.143 + "a < b", 21.144 + "a > b", 21.145 + "a not in b", 21.146 + "a is not b", 21.147 + "a in b", 21.148 + "a is b", 21.149 + "3 < x < 5", 21.150 + "(3 < x) < 5", 21.151 + "a < b < c < d", 21.152 + "(a < b) < (c < d)", 21.153 + "a < (b < c) < d", 21.154 + ] 21.155 + 21.156 +multiexpr = [ 21.157 + 'a = b; c = d;', 21.158 + 'a = b = c = d', 21.159 + ] 21.160 + 21.161 +attraccess = [ 21.162 + 'a.b = 2', 21.163 + 'x = a.b', 21.164 + ] 21.165 + 21.166 +slices = [ 21.167 + "l[:]", 21.168 + "l[::]", 21.169 + "l[1:2]", 21.170 + "l[1:]", 21.171 + "l[:2]", 21.172 + "l[1::]", 21.173 + "l[:1:]", 21.174 + "l[::1]", 21.175 + "l[1:2:]", 21.176 + "l[:1:2]", 21.177 + "l[1::2]", 21.178 + "l[0:1:2]", 21.179 + "a.b.l[:]", 21.180 + "a.b.l[1:2]", 21.181 + "a.b.l[1:]", 21.182 + "a.b.l[:2]", 21.183 + "a.b.l[0:1:2]", 21.184 + "a[1:2:3, 100]", 21.185 + "a[:2:3, 100]", 21.186 + "a[1::3, 100,]", 21.187 + "a[1:2:, 100]", 21.188 + "a[1:2, 100]", 21.189 + "a[1:, 100,]", 21.190 + "a[:2, 100]", 21.191 + "a[:, 100]", 21.192 + "a[100, 1:2:3,]", 21.193 + "a[100, :2:3]", 21.194 + "a[100, 1::3]", 21.195 + "a[100, 1:2:,]", 21.196 + "a[100, 1:2]", 21.197 + "a[100, 1:]", 21.198 + "a[100, :2,]", 21.199 + "a[100, :]", 21.200 + ] 21.201 + 21.202 +imports = [ 21.203 + 'import os', 21.204 + 'import sys, os', 21.205 + 'import os.path', 21.206 + 'import os.path, sys', 21.207 + 'import sys, os.path as osp', 21.208 + 'import os.path as osp', 21.209 + 'import os.path as osp, sys as _sys', 21.210 + 'import a.b.c.d', 21.211 + 'import a.b.c.d as abcd', 21.212 + 'from os import path', 21.213 + 'from os import path, system', 21.214 + ] 21.215 + 21.216 +imports_newstyle = [ 21.217 + 'from os import path, system', 21.218 + 'from os import path as P, system as S', 21.219 + 'from os import (path as P, system as S,)', 21.220 + 'from os import *', 21.221 + ] 21.222 + 21.223 +if_stmts = [ 21.224 + "if a == 1: a+= 2", 21.225 + """if a == 1: 21.226 + a += 2 21.227 +elif a == 2: 21.228 + a += 3 21.229 +else: 21.230 + a += 4 21.231 +""", 21.232 + "if a and not b == c: pass", 21.233 + "if a and not not not b == c: pass", 21.234 + "if 0: print 'foo'" 21.235 + ] 21.236 + 21.237 +asserts = [ 21.238 + 'assert False', 21.239 + 'assert a == 1', 21.240 + 'assert a == 1 and b == 2', 21.241 + 'assert a == 1 and b == 2, "assertion failed"', 21.242 + ] 21.243 + 21.244 +execs = [ 21.245 + 'exec a', 21.246 + 'exec "a=b+3"', 21.247 + 'exec a in f()', 21.248 + 'exec a in f(), g()', 21.249 + ] 21.250 + 21.251 +prints = [ 21.252 + 'print', 21.253 + 'print a', 21.254 + 'print a,', 21.255 + 'print a, b', 21.256 + 'print a, "b", c', 21.257 + 'print >> err', 21.258 + 'print >> err, "error"', 21.259 + 'print >> err, "error",', 21.260 + 'print >> err, "error", a', 21.261 + ] 21.262 + 21.263 +globs = [ 21.264 + 'global a', 21.265 + 'global a,b,c', 21.266 + ] 21.267 + 21.268 +raises_ = [ # NB. 'raises' creates a name conflict with py.test magic 21.269 + 'raise', 21.270 + 'raise ValueError', 21.271 + 'raise ValueError("error")', 21.272 + 'raise ValueError, "error"', 21.273 + 'raise ValueError, "error", foo', 21.274 + ] 21.275 + 21.276 +tryexcepts = [ 21.277 + """try: 21.278 + a 21.279 + b 21.280 +except: 21.281 + pass 21.282 +""", 21.283 + """try: 21.284 + a 21.285 + b 21.286 +except NameError: 21.287 + pass 21.288 +""", 21.289 + """try: 21.290 + a 21.291 + b 21.292 +except NameError, err: 21.293 + pass 21.294 +""", 21.295 + """try: 21.296 + a 21.297 + b 21.298 +except (NameError, ValueError): 21.299 + pass 21.300 +""", 21.301 + """try: 21.302 + a 21.303 + b 21.304 +except (NameError, ValueError), err: 21.305 + pass 21.306 +""", 21.307 + """try: 21.308 + a 21.309 +except NameError, err: 21.310 + pass 21.311 +except ValueError, err: 21.312 + pass 21.313 +""", 21.314 + """def f(): 21.315 + try: 21.316 + a 21.317 + except NameError, err: 21.318 + a = 1 21.319 + b = 2 21.320 + except ValueError, err: 21.321 + a = 2 21.322 + return a 21.323 +""" 21.324 + """try: 21.325 + a 21.326 +except NameError, err: 21.327 + a = 1 21.328 +except ValueError, err: 21.329 + a = 2 21.330 +else: 21.331 + a += 3 21.332 +""", 21.333 + """try: 21.334 + a 21.335 +finally: 21.336 + b 21.337 +""", 21.338 + """def f(): 21.339 + try: 21.340 + return a 21.341 + finally: 21.342 + a = 3 21.343 + return 1 21.344 +""", 21.345 + 21.346 + ] 21.347 + 21.348 +one_stmt_funcdefs = [ 21.349 + "def f(): return 1", 21.350 + "def f(x): return x+1", 21.351 + "def f(x,y): return x+y", 21.352 + "def f(x,y=1,z=t): return x+y", 21.353 + "def f(x,y=1,z=t,*args,**kwargs): return x+y", 21.354 + "def f(x,y=1,z=t,*args): return x+y", 21.355 + "def f(x,y=1,z=t,**kwargs): return x+y", 21.356 + "def f(*args): return 1", 21.357 + "def f(**kwargs): return 1", 21.358 + "def f(t=()): pass", 21.359 + "def f(a, b, (c, d), e): pass", 21.360 + "def f(a, b, (c, (d, e), f, (g, h))): pass", 21.361 + "def f(a, b, (c, (d, e), f, (g, h)), i): pass", 21.362 + "def f((a)): pass", 21.363 + ] 21.364 + 21.365 +one_stmt_classdefs = [ 21.366 + "class Pdb(bdb.Bdb, cmd.Cmd): pass", 21.367 + "class A: pass", 21.368 + ] 21.369 + 21.370 +docstrings = [ 21.371 + '''def foo(): return 1''', 21.372 + '''class Foo: pass''', 21.373 + '''class Foo: "foo"''', 21.374 + '''def foo(): 21.375 + """foo docstring""" 21.376 + return 1 21.377 +''', 21.378 + '''def foo(): 21.379 + """foo docstring""" 21.380 + a = 1 21.381 + """bar""" 21.382 + return a 21.383 +''', 21.384 + '''def foo(): 21.385 + """doc"""; print 1 21.386 + a=1 21.387 +''', 21.388 + '''"""Docstring""";print 1''', 21.389 + ] 21.390 + 21.391 +returns = [ 21.392 + 'def f(): return', 21.393 + 'def f(): return 1', 21.394 + 'def f(): return a.b', 21.395 + 'def f(): return a', 21.396 + 'def f(): return a,b,c,d', 21.397 + #'return (a,b,c,d)', --- this one makes no sense, as far as I can tell 21.398 + ] 21.399 + 21.400 +augassigns = [ 21.401 + 'a=1;a+=2', 21.402 + 'a=1;a-=2', 21.403 + 'a=1;a*=2', 21.404 + 'a=1;a/=2', 21.405 + 'a=1;a//=2', 21.406 + 'a=1;a%=2', 21.407 + 'a=1;a**=2', 21.408 + 'a=1;a>>=2', 21.409 + 'a=1;a<<=2', 21.410 + 'a=1;a&=2', 21.411 + 'a=1;a^=2', 21.412 + 'a=1;a|=2', 21.413 + 21.414 + 'a=A();a.x+=2', 21.415 + 'a=A();a.x-=2', 21.416 + 'a=A();a.x*=2', 21.417 + 'a=A();a.x/=2', 21.418 + 'a=A();a.x//=2', 21.419 + 'a=A();a.x%=2', 21.420 + 'a=A();a.x**=2', 21.421 + 'a=A();a.x>>=2', 21.422 + 'a=A();a.x<<=2', 21.423 + 'a=A();a.x&=2', 21.424 + 'a=A();a.x^=2', 21.425 + 'a=A();a.x|=2', 21.426 + 21.427 + 'a=A();a[0]+=2', 21.428 + 'a=A();a[0]-=2', 21.429 + 'a=A();a[0]*=2', 21.430 + 'a=A();a[0]/=2', 21.431 + 'a=A();a[0]//=2', 21.432 + 'a=A();a[0]%=2', 21.433 + 'a=A();a[0]**=2', 21.434 + 'a=A();a[0]>>=2', 21.435 + 'a=A();a[0]<<=2', 21.436 + 'a=A();a[0]&=2', 21.437 + 'a=A();a[0]^=2', 21.438 + 'a=A();a[0]|=2', 21.439 + 21.440 + 'a=A();a[0:2]+=2', 21.441 + 'a=A();a[0:2]-=2', 21.442 + 'a=A();a[0:2]*=2', 21.443 + 'a=A();a[0:2]/=2', 21.444 + 'a=A();a[0:2]//=2', 21.445 + 'a=A();a[0:2]%=2', 21.446 + 'a=A();a[0:2]**=2', 21.447 + 'a=A();a[0:2]>>=2', 21.448 + 'a=A();a[0:2]<<=2', 21.449 + 'a=A();a[0:2]&=2', 21.450 + 'a=A();a[0:2]^=2', 21.451 + 'a=A();a[0:2]|=2', 21.452 + ] 21.453 + 21.454 +PY23_TESTS = [ 21.455 + constants, 21.456 + expressions, 21.457 + augassigns, 21.458 + comparisons, 21.459 + funccalls, 21.460 + backtrackings, 21.461 + listmakers, # ERRORS 21.462 + dictmakers, 21.463 + multiexpr, 21.464 + attraccess, 21.465 + slices, 21.466 + imports, 21.467 + execs, 21.468 + prints, 21.469 + globs, 21.470 + raises_, 21.471 + 21.472 + ] 21.473 + 21.474 +OPTIONAL_TESTS = [ 21.475 + # expressions_inbetweenversions, 21.476 + genexps, 21.477 + imports_newstyle, 21.478 + asserts, 21.479 + ] 21.480 + 21.481 +TESTS = PY23_TESTS + OPTIONAL_TESTS 21.482 + 21.483 + 21.484 +## TESTS = [ 21.485 +## ["l = [i for i in range(10) if i%2 == 0 or i%2 == 1]"], 21.486 +## ] 21.487 + 21.488 +CHANGES_25_INPUTS = [ 21.489 + ["class A(): pass"], 21.490 + ["def f(): x = yield 3"] 21.491 + ] 21.492 + 21.493 +EXEC_INPUTS = [ 21.494 + one_stmt_classdefs, 21.495 + one_stmt_funcdefs, 21.496 + if_stmts, 21.497 + tryexcepts, 21.498 + docstrings, 21.499 + returns, 21.500 + ] 21.501 + 21.502 +SINGLE_INPUTS = [ 21.503 + one_stmt_funcdefs, 21.504 + ['\t # hello\n', 21.505 + 'print 6*7', 21.506 + 'if 1: x\n', 21.507 + 'x = 5', 21.508 + 'x = 5 ', 21.509 + '''"""Docstring""";print 1''', 21.510 + '''"Docstring"''', 21.511 + '''"Docstring" "\\x00"''', 21.512 + ] 21.513 +]
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 22.2 +++ b/pyparser/test/test_automata.py Sun Jan 08 20:20:39 2017 +0100 22.3 @@ -0,0 +1,12 @@ 22.4 +from pyparser.automata import DFA, DEFAULT 22.5 + 22.6 +def test_states(): 22.7 + d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True]) 22.8 + assert d.states == "\x01\xff\xff\x00" 22.9 + assert d.defaults == "\xff\xff" 22.10 + assert d.max_char == 2 22.11 + 22.12 + d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True]) 22.13 + assert d.states == "\x01\x00" 22.14 + assert d.defaults == "\xff\x00" 22.15 + assert d.max_char == 1
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 23.2 +++ b/pyparser/test/test_gendfa.py Sun Jan 08 20:20:39 2017 +0100 23.3 @@ -0,0 +1,16 @@ 23.4 +from pyparser.automata import DFA, DEFAULT 23.5 +from pyparser.genpytokenize import output 23.6 + 23.7 +def test_states(): 23.8 + states = [{"\x00": 1}, {"\x01": 0}] 23.9 + d = DFA(states[:], [False, True]) 23.10 + assert output('test', DFA, d, states) == """\ 23.11 +accepts = [False, True] 23.12 +states = [ 23.13 + # 0 23.14 + {'\\x00': 1}, 23.15 + # 1 23.16 + {'\\x01': 0}, 23.17 + ] 23.18 +test = automata.pyparser.automata.DFA(states, accepts) 23.19 +"""
24.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 24.2 +++ b/pyparser/test/test_metaparser.py Sun Jan 08 20:20:39 2017 +0100 24.3 @@ -0,0 +1,112 @@ 24.4 +import py 24.5 +import os 24.6 +import glob 24.7 +import tokenize 24.8 +import token 24.9 +import StringIO 24.10 +from pyparser.metaparser import ParserGenerator, PgenError 24.11 +from pyparser.pygram import PythonGrammar 24.12 +from pyparser import parser 24.13 + 24.14 + 24.15 +class MyGrammar(parser.Grammar): 24.16 + TOKENS = token.__dict__ 24.17 + OPERATOR_MAP = { 24.18 + "+" : token.OP, 24.19 + "-" : token.OP, 24.20 + } 24.21 + KEYWORD_TOKEN = token.NAME 24.22 + 24.23 + 24.24 +class TestParserGenerator: 24.25 + 24.26 + def gram_for(self, grammar_source): 24.27 + p = ParserGenerator(grammar_source + "\n") 24.28 + return p.build_grammar(MyGrammar) 24.29 + 24.30 + def test_multiple_rules(self): 24.31 + g = self.gram_for("foo: NAME bar\nbar: STRING") 24.32 + assert len(g.dfas) == 2 24.33 + assert g.start == g.symbol_ids["foo"] 24.34 + 24.35 + def test_simple(self): 24.36 + g = self.gram_for("eval: NAME\n") 24.37 + assert len(g.dfas) == 1 24.38 + eval_sym = g.symbol_ids["eval"] 24.39 + assert g.start == eval_sym 24.40 + states, first = g.dfas[eval_sym - 256] 24.41 + assert states == [([(1, 1)], False), ([], True)] 24.42 + assert g.labels[0] == 0 24.43 + 24.44 + def test_load_python_grammars(self): 24.45 + gram_pat = os.path.join(os.path.dirname(__file__), "..", "data", 24.46 + "Grammar*") 24.47 + for gram_file in glob.glob(gram_pat): 24.48 + fp = open(gram_file, "r") 24.49 + try: 24.50 + ParserGenerator(fp.read()).build_grammar(PythonGrammar) 24.51 + finally: 24.52 + fp.close() 24.53 + 24.54 + def test_items(self): 24.55 + g = self.gram_for("foo: NAME STRING OP '+'") 24.56 + assert len(g.dfas) == 1 24.57 + states = g.dfas[g.symbol_ids["foo"] - 256][0] 24.58 + last = states[0][0][0][1] 24.59 + for state in states[1:-1]: 24.60 + assert last < state[0][0][1] 24.61 + last = state[0][0][1] 24.62 + 24.63 + def test_alternatives(self): 24.64 + g = self.gram_for("foo: STRING | OP") 24.65 + assert len(g.dfas) == 1 24.66 + 24.67 + def test_optional(self): 24.68 + g = self.gram_for("foo: [NAME]") 24.69 + 24.70 + def test_grouping(self): 24.71 + g = self.gram_for("foo: (NAME | STRING) OP") 24.72 + 24.73 + def test_keyword(self): 24.74 + g = self.gram_for("foo: 'some_keyword' 'for'") 24.75 + assert len(g.keyword_ids) == 2 24.76 + assert len(g.token_ids) == 0 24.77 + 24.78 + def test_token(self): 24.79 + g = self.gram_for("foo: NAME") 24.80 + assert len(g.token_ids) == 1 24.81 + 24.82 + def test_operator(self): 24.83 + g = self.gram_for("add: NUMBER '+' NUMBER") 24.84 + assert len(g.keyword_ids) == 0 24.85 + assert len(g.token_ids) == 2 24.86 + 24.87 + exc = py.test.raises(PgenError, self.gram_for, "add: '/'").value 24.88 + assert str(exc) == "no such operator: '/'" 24.89 + 24.90 + def test_symbol(self): 24.91 + g = self.gram_for("foo: some_other_rule\nsome_other_rule: NAME") 24.92 + assert len(g.dfas) == 2 24.93 + assert len(g.labels) == 3 24.94 + 24.95 + exc = py.test.raises(PgenError, self.gram_for, "foo: no_rule").value 24.96 + assert str(exc) == "no such rule: 'no_rule'" 24.97 + 24.98 + def test_repeaters(self): 24.99 + g1 = self.gram_for("foo: NAME+") 24.100 + g2 = self.gram_for("foo: NAME*") 24.101 + assert g1.dfas != g2.dfas 24.102 + 24.103 + g = self.gram_for("foo: (NAME | STRING)*") 24.104 + g = self.gram_for("foo: (NAME | STRING)+") 24.105 + 24.106 + def test_error(self): 24.107 + exc = py.test.raises(PgenError, self.gram_for, "hi").value 24.108 + assert str(exc) == "expected token OP but got NEWLINE" 24.109 + assert exc.location == ((1, 2), (1, 3), "hi\n") 24.110 + exc = py.test.raises(PgenError, self.gram_for, "hi+").value 24.111 + assert str(exc) == "expected ':' but got '+'" 24.112 + assert exc.location == ((1, 2), (1, 3), "hi+\n") 24.113 + 24.114 + def test_comments_and_whitespace(self): 24.115 + self.gram_for("\n\n# comment\nrule: NAME # comment")
25.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 25.2 +++ b/pyparser/test/test_parser.py Sun Jan 08 20:20:39 2017 +0100 25.3 @@ -0,0 +1,293 @@ 25.4 +# New parser tests. 25.5 +import py 25.6 +import tokenize 25.7 +import token 25.8 +import StringIO 25.9 +from pyparser import parser, metaparser, pygram 25.10 +from pyparser.test.test_metaparser import MyGrammar 25.11 + 25.12 + 25.13 +class SimpleParser(parser.Parser): 25.14 + 25.15 + def parse(self, input): 25.16 + self.prepare() 25.17 + rl = StringIO.StringIO(input + "\n").readline 25.18 + gen = tokenize.generate_tokens(rl) 25.19 + for tp, value, begin, end, line in gen: 25.20 + if self.add_token(tp, value, begin[0], begin[1], line): 25.21 + py.test.raises(StopIteration, gen.next) 25.22 + return self.root 25.23 + 25.24 + 25.25 +def tree_from_string(expected, gram): 25.26 + def count_indent(s): 25.27 + indent = 0 25.28 + for char in s: 25.29 + if char != " ": 25.30 + break 25.31 + indent += 1 25.32 + return indent 25.33 + last_newline_index = 0 25.34 + for i, char in enumerate(expected): 25.35 + if char == "\n": 25.36 + last_newline_index = i 25.37 + elif char != " ": 25.38 + break 25.39 + if last_newline_index: 25.40 + expected = expected[last_newline_index + 1:] 25.41 + base_indent = count_indent(expected) 25.42 + assert not divmod(base_indent, 4)[1], "not using 4 space indentation" 25.43 + lines = [line[base_indent:] for line in expected.splitlines()] 25.44 + last_indent = 0 25.45 + node_stack = [] 25.46 + for line in lines: 25.47 + if not line.strip(): 25.48 + continue 25.49 + data = line.split() 25.50 + if data[0].isupper(): 25.51 + tp = getattr(token, data[0]) 25.52 + if len(data) == 2: 25.53 + value = data[1].strip("\"") 25.54 + elif tp == token.NEWLINE: 25.55 + value = "\n" 25.56 + else: 25.57 + value = "" 25.58 + n = parser.Terminal(tp, value, 0, 0) 25.59 + else: 25.60 + tp = gram.symbol_ids[data[0]] 25.61 + children = [] 25.62 + n = parser.Nonterminal(tp, children) 25.63 + new_indent = count_indent(line) 25.64 + if new_indent >= last_indent: 25.65 + if new_indent == last_indent and node_stack: 25.66 + node_stack.pop() 25.67 + if node_stack: 25.68 + node_stack[-1].append_child(n) 25.69 + node_stack.append(n) 25.70 + else: 25.71 + diff = last_indent - new_indent 25.72 + pop_nodes = diff // 4 + 1 25.73 + del node_stack[-pop_nodes:] 25.74 + node_stack[-1].append_child(n) 25.75 + node_stack.append(n) 25.76 + last_indent = new_indent 25.77 + return node_stack[0] 25.78 + 25.79 + 25.80 +class TestParser: 25.81 + 25.82 + def parser_for(self, gram, add_endmarker=True): 25.83 + if add_endmarker: 25.84 + gram += " NEWLINE ENDMARKER\n" 25.85 + pgen = metaparser.ParserGenerator(gram) 25.86 + g = pgen.build_grammar(MyGrammar) 25.87 + return SimpleParser(g), g 25.88 + 25.89 + def test_multiple_rules(self): 25.90 + gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER 25.91 +bar: NAME NUMBER\n""" 25.92 + p, gram = self.parser_for(gram, False) 25.93 + expected = """ 25.94 + foo 25.95 + NAME "next_rule" 25.96 + bar 25.97 + NAME "a_name" 25.98 + NUMBER "42" 25.99 + NAME "end" 25.100 + NEWLINE 25.101 + ENDMARKER""" 25.102 + input = "next_rule a_name 42 end" 25.103 + assert tree_from_string(expected, gram) == p.parse(input) 25.104 + 25.105 + def test_recursive_rule(self): 25.106 + gram = """foo: NAME bar STRING NEWLINE ENDMARKER 25.107 +bar: NAME [bar] NUMBER\n""" 25.108 + p, gram = self.parser_for(gram, False) 25.109 + expected = """ 25.110 + foo 25.111 + NAME "hi" 25.112 + bar 25.113 + NAME "hello" 25.114 + bar 25.115 + NAME "a_name" 25.116 + NUMBER "32" 25.117 + NUMBER "42" 25.118 + STRING "'string'" 25.119 + NEWLINE 25.120 + ENDMARKER""" 25.121 + input = "hi hello a_name 32 42 'string'" 25.122 + assert tree_from_string(expected, gram) == p.parse(input) 25.123 + 25.124 + def test_symbol(self): 25.125 + gram = """parent: first_child second_child NEWLINE ENDMARKER 25.126 +first_child: NAME age 25.127 +second_child: STRING 25.128 +age: NUMBER\n""" 25.129 + p, gram = self.parser_for(gram, False) 25.130 + expected = """ 25.131 + parent 25.132 + first_child 25.133 + NAME "harry" 25.134 + age 25.135 + NUMBER "13" 25.136 + second_child 25.137 + STRING "'fred'" 25.138 + NEWLINE 25.139 + ENDMARKER""" 25.140 + input = "harry 13 'fred'" 25.141 + assert tree_from_string(expected, gram) == p.parse(input) 25.142 + 25.143 + def test_token(self): 25.144 + p, gram = self.parser_for("foo: NAME") 25.145 + expected = """ 25.146 + foo 25.147 + NAME "hi" 25.148 + NEWLINE 25.149 + ENDMARKER""" 25.150 + assert tree_from_string(expected, gram) == p.parse("hi") 25.151 + py.test.raises(parser.ParseError, p.parse, "567") 25.152 + p, gram = self.parser_for("foo: NUMBER NAME STRING") 25.153 + expected = """ 25.154 + foo 25.155 + NUMBER "42" 25.156 + NAME "hi" 25.157 + STRING "'bar'" 25.158 + NEWLINE 25.159 + ENDMARKER""" 25.160 + assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'") 25.161 + 25.162 + def test_optional(self): 25.163 + p, gram = self.parser_for("foo: [NAME] 'end'") 25.164 + expected = """ 25.165 + foo 25.166 + NAME "hi" 25.167 + NAME "end" 25.168 + NEWLINE 25.169 + ENDMARKER""" 25.170 + assert tree_from_string(expected, gram) == p.parse("hi end") 25.171 + expected = """ 25.172 + foo 25.173 + NAME "end" 25.174 + NEWLINE 25.175 + ENDMARKER""" 25.176 + assert tree_from_string(expected, gram) == p.parse("end") 25.177 + 25.178 + def test_grouping(self): 25.179 + p, gram = self.parser_for( 25.180 + "foo: ((NUMBER NAME | STRING) | 'second_option')") 25.181 + expected = """ 25.182 + foo 25.183 + NUMBER "42" 25.184 + NAME "hi" 25.185 + NEWLINE 25.186 + ENDMARKER""" 25.187 + assert tree_from_string(expected, gram) == p.parse("42 hi") 25.188 + expected = """ 25.189 + foo 25.190 + STRING "'hi'" 25.191 + NEWLINE 25.192 + ENDMARKER""" 25.193 + assert tree_from_string(expected, gram) == p.parse("'hi'") 25.194 + expected = """ 25.195 + foo 25.196 + NAME "second_option" 25.197 + NEWLINE 25.198 + ENDMARKER""" 25.199 + assert tree_from_string(expected, gram) == p.parse("second_option") 25.200 + py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'") 25.201 + py.test.raises(parser.ParseError, p.parse, "42 second_option") 25.202 + 25.203 + def test_alternative(self): 25.204 + p, gram = self.parser_for("foo: (NAME | NUMBER)") 25.205 + expected = """ 25.206 + foo 25.207 + NAME "hi" 25.208 + NEWLINE 25.209 + ENDMARKER""" 25.210 + assert tree_from_string(expected, gram) == p.parse("hi") 25.211 + expected = """ 25.212 + foo 25.213 + NUMBER "42" 25.214 + NEWLINE 25.215 + ENDMARKER""" 25.216 + assert tree_from_string(expected, gram) == p.parse("42") 25.217 + py.test.raises(parser.ParseError, p.parse, "hi 23") 25.218 + py.test.raises(parser.ParseError, p.parse, "23 hi") 25.219 + py.test.raises(parser.ParseError, p.parse, "'some string'") 25.220 + 25.221 + def test_keyword(self): 25.222 + p, gram = self.parser_for("foo: 'key'") 25.223 + expected = """ 25.224 + foo 25.225 + NAME "key" 25.226 + NEWLINE 25.227 + ENDMARKER""" 25.228 + assert tree_from_string(expected, gram) == p.parse("key") 25.229 + py.test.raises(parser.ParseError, p.parse, "") 25.230 + p, gram = self.parser_for("foo: NAME 'key'") 25.231 + expected = """ 25.232 + foo 25.233 + NAME "some_name" 25.234 + NAME "key" 25.235 + NEWLINE 25.236 + ENDMARKER""" 25.237 + assert tree_from_string(expected, gram) == p.parse("some_name key") 25.238 + py.test.raises(parser.ParseError, p.parse, "some_name") 25.239 + 25.240 + def test_repeaters(self): 25.241 + p, gram = self.parser_for("foo: NAME+ 'end'") 25.242 + expected = """ 25.243 + foo 25.244 + NAME "hi" 25.245 + NAME "bye" 25.246 + NAME "nothing" 25.247 + NAME "end" 25.248 + NEWLINE 25.249 + ENDMARKER""" 25.250 + assert tree_from_string(expected, gram) == p.parse("hi bye nothing end") 25.251 + py.test.raises(parser.ParseError, p.parse, "end") 25.252 + py.test.raises(parser.ParseError, p.parse, "hi bye") 25.253 + p, gram = self.parser_for("foo: NAME* 'end'") 25.254 + expected = """ 25.255 + foo 25.256 + NAME "hi" 25.257 + NAME "bye" 25.258 + NAME "end" 25.259 + NEWLINE 25.260 + ENDMARKER""" 25.261 + assert tree_from_string(expected, gram) == p.parse("hi bye end") 25.262 + py.test.raises(parser.ParseError, p.parse, "hi bye") 25.263 + expected = """ 25.264 + foo 25.265 + NAME "end" 25.266 + NEWLINE 25.267 + ENDMARKER""" 25.268 + assert tree_from_string(expected, gram) == p.parse("end") 25.269 + 25.270 + p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'") 25.271 + expected = """ 25.272 + foo 25.273 + NAME "a_name" 25.274 + NAME "name_two" 25.275 + NAME "end" 25.276 + NEWLINE 25.277 + ENDMARKER""" 25.278 + assert tree_from_string(expected, gram) == p.parse("a_name name_two end") 25.279 + expected = """ 25.280 + foo 25.281 + NUMBER "42" 25.282 + NAME "name" 25.283 + NAME "end" 25.284 + NEWLINE 25.285 + ENDMARKER""" 25.286 + assert tree_from_string(expected, gram) == p.parse("42 name end") 25.287 + py.test.raises(parser.ParseError, p.parse, "end") 25.288 + p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'") 25.289 + expected = """ 25.290 + foo 25.291 + NAME "hi" 25.292 + NUMBER 42 25.293 + NAME "end" 25.294 + NEWLINE 25.295 + ENDMARKER""" 25.296 + assert tree_from_string(expected, gram) == p.parse("hi 42 end")
26.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 26.2 +++ b/pyparser/test/test_pyparse.py Sun Jan 08 20:20:39 2017 +0100 26.3 @@ -0,0 +1,164 @@ 26.4 +# -*- coding: utf-8 -*- 26.5 +import py 26.6 +from pyparser import pyparse 26.7 +from pyparser.pygram import syms, tokens 26.8 +from pyparser.error import SyntaxError, IndentationError 26.9 +from pyparser import consts 26.10 + 26.11 + 26.12 +class TestPythonParser: 26.13 + 26.14 + def setup_class(self): 26.15 + self.parser = pyparse.PythonParser() 26.16 + 26.17 + def parse(self, source, mode="exec", info=None): 26.18 + if info is None: 26.19 + info = pyparse.CompileInfo("<test>", mode) 26.20 + return self.parser.parse_source(source, info) 26.21 + 26.22 + def test_with_and_as(self): 26.23 + py.test.raises(SyntaxError, self.parse, "with = 23") 26.24 + py.test.raises(SyntaxError, self.parse, "as = 2") 26.25 + 26.26 + def test_dont_imply_dedent(self): 26.27 + info = pyparse.CompileInfo("<test>", "single", 26.28 + consts.PyCF_DONT_IMPLY_DEDENT) 26.29 + self.parse('if 1:\n x\n', info=info) 26.30 + self.parse('x = 5 ', info=info) 26.31 + 26.32 + def test_clear_state(self): 26.33 + assert self.parser.root is None 26.34 + tree = self.parse("name = 32") 26.35 + assert self.parser.root is None 26.36 + 26.37 + def test_encoding(self): 26.38 + info = pyparse.CompileInfo("<test>", "exec") 26.39 + tree = self.parse("""# coding: latin-1 26.40 +stuff = "nothing" 26.41 +""", info=info) 26.42 + assert tree.type == syms.file_input 26.43 + assert info.encoding == "iso-8859-1" 26.44 + sentence = u"u'Die Männer ärgen sich!'" 26.45 + input = (u"# coding: utf-7\nstuff = %s" % (sentence,)).encode("utf-7") 26.46 + tree = self.parse(input, info=info) 26.47 + assert info.encoding == "utf-7" 26.48 + input = "# coding: iso-8859-15\nx" 26.49 + self.parse(input, info=info) 26.50 + assert info.encoding == "iso-8859-15" 26.51 + input = "\xEF\xBB\xBF# coding: utf-8\nx" 26.52 + self.parse(input, info=info) 26.53 + assert info.encoding == "utf-8" 26.54 + input = "# coding: utf-8\nx" 26.55 + info.flags |= consts.PyCF_SOURCE_IS_UTF8 26.56 + exc = py.test.raises(SyntaxError, self.parse, input, info=info).value 26.57 + info.flags &= ~consts.PyCF_SOURCE_IS_UTF8 26.58 + assert exc.msg == "coding declaration in unicode string" 26.59 + input = "\xEF\xBB\xBF# coding: latin-1\nx" 26.60 + exc = py.test.raises(SyntaxError, self.parse, input).value 26.61 + assert exc.msg == "UTF-8 BOM with latin-1 coding cookie" 26.62 + input = "# coding: not-here" 26.63 + exc = py.test.raises(SyntaxError, self.parse, input).value 26.64 + assert exc.msg == "Unknown encoding: not-here" 26.65 + input = u"# coding: ascii\n\xe2".encode('utf-8') 26.66 + exc = py.test.raises(SyntaxError, self.parse, input).value 26.67 + assert exc.msg == ("'ascii' codec can't decode byte 0xc3 " 26.68 + "in position 16: ordinal not in range(128)") 26.69 + 26.70 + def test_non_unicode_codec(self): 26.71 + exc = py.test.raises(SyntaxError, self.parse, """\ 26.72 +# coding: string-escape 26.73 +\x70\x72\x69\x6e\x74\x20\x32\x2b\x32\x0a 26.74 +""").value 26.75 + assert exc.msg == "codec did not return a unicode object" 26.76 + 26.77 + def test_syntax_error(self): 26.78 + parse = self.parse 26.79 + exc = py.test.raises(SyntaxError, parse, "name another for").value 26.80 + assert exc.msg == "invalid syntax" 26.81 + assert exc.lineno == 1 26.82 + assert exc.offset == 5 26.83 + assert exc.text.startswith("name another for") 26.84 + exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value 26.85 + assert exc.msg == "EOL while scanning string literal" 26.86 + assert exc.lineno == 1 26.87 + assert exc.offset == 5 26.88 + exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value 26.89 + assert exc.msg == "EOF while scanning triple-quoted string literal" 26.90 + assert exc.lineno == 1 26.91 + assert exc.offset == 5 26.92 + assert exc.lastlineno == 3 26.93 + for input in ("())", "(()", "((", "))"): 26.94 + py.test.raises(SyntaxError, parse, input) 26.95 + exc = py.test.raises(SyntaxError, parse, "x = (\n\n(),\n(),").value 26.96 + assert exc.msg == "parenthesis is never closed" 26.97 + assert exc.lineno == 1 26.98 + assert exc.offset == 5 26.99 + assert exc.lastlineno == 5 26.100 + exc = py.test.raises(SyntaxError, parse, "abc)").value 26.101 + assert exc.msg == "unmatched ')'" 26.102 + assert exc.lineno == 1 26.103 + assert exc.offset == 4 26.104 + 26.105 + def test_is(self): 26.106 + self.parse("x is y") 26.107 + self.parse("x is not y") 26.108 + 26.109 + def test_indentation_error(self): 26.110 + parse = self.parse 26.111 + input = """ 26.112 +def f(): 26.113 +pass""" 26.114 + exc = py.test.raises(IndentationError, parse, input).value 26.115 + assert exc.msg == "expected an indented block" 26.116 + assert exc.lineno == 3 26.117 + assert exc.text.startswith("pass") 26.118 + assert exc.offset == 0 26.119 + input = "hi\n indented" 26.120 + exc = py.test.raises(IndentationError, parse, input).value 26.121 + assert exc.msg == "unexpected indent" 26.122 + input = "def f():\n pass\n next_stmt" 26.123 + exc = py.test.raises(IndentationError, parse, input).value 26.124 + assert exc.msg == "unindent does not match any outer indentation level" 26.125 + assert exc.lineno == 3 26.126 + 26.127 + def test_mac_newline(self): 26.128 + self.parse("this_is\ra_mac\rfile") 26.129 + 26.130 + def test_mode(self): 26.131 + assert self.parse("x = 43*54").type == syms.file_input 26.132 + tree = self.parse("43**54", "eval") 26.133 + assert tree.type == syms.eval_input 26.134 + py.test.raises(SyntaxError, self.parse, "x = 54", "eval") 26.135 + tree = self.parse("x = 43", "single") 26.136 + assert tree.type == syms.single_input 26.137 + 26.138 + def test_multiline_string(self): 26.139 + self.parse("''' \n '''") 26.140 + self.parse("r''' \n '''") 26.141 + 26.142 + def test_bytes_literal(self): 26.143 + self.parse('b" "') 26.144 + self.parse('br" "') 26.145 + self.parse('b""" """') 26.146 + self.parse("b''' '''") 26.147 + self.parse("br'\\\n'") 26.148 + 26.149 + py.test.raises(SyntaxError, self.parse, "b'a\\n") 26.150 + 26.151 + def test_new_octal_literal(self): 26.152 + self.parse('0777') 26.153 + self.parse('0o777') 26.154 + self.parse('0o777L') 26.155 + py.test.raises(SyntaxError, self.parse, "0o778") 26.156 + 26.157 + def test_new_binary_literal(self): 26.158 + self.parse('0b1101') 26.159 + self.parse('0b0l') 26.160 + py.test.raises(SyntaxError, self.parse, "0b112") 26.161 + 26.162 + def test_universal_newlines(self): 26.163 + fmt = 'stuff = """hello%sworld"""' 26.164 + expected_tree = self.parse(fmt % '\n') 26.165 + for linefeed in ["\r\n","\r"]: 26.166 + tree = self.parse(fmt % linefeed) 26.167 + assert expected_tree == tree
27.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 27.2 +++ b/pyparser/test/unittest_samples.py Sun Jan 08 20:20:39 2017 +0100 27.3 @@ -0,0 +1,95 @@ 27.4 +"""test module for CPython / PyPy nested tuples comparison""" 27.5 + 27.6 +import os, os.path as osp 27.7 +import sys 27.8 +from pyparser.pythonutil import python_parse, pypy_parse 27.9 +from pprint import pprint 27.10 +from pyparser import grammar 27.11 +grammar.DEBUG = False 27.12 +from symbol import sym_name 27.13 + 27.14 + 27.15 +def name(elt): 27.16 + return "%s[%s]"% (sym_name.get(elt,elt),elt) 27.17 + 27.18 +def read_samples_dir(): 27.19 + return [osp.join('samples', fname) for fname in os.listdir('samples') if fname.endswith('.py')] 27.20 + 27.21 +def print_sym_tuple(nested, level=0, limit=15, names=False, trace=()): 27.22 + buf = [] 27.23 + if level <= limit: 27.24 + buf.append("%s(" % (" "*level)) 27.25 + else: 27.26 + buf.append("(") 27.27 + for index, elt in enumerate(nested): 27.28 + # Test if debugging and if on last element of error path 27.29 + if trace and not trace[1:] and index == trace[0]: 27.30 + buf.append('\n----> ') 27.31 + if type(elt) is int: 27.32 + if names: 27.33 + buf.append(name(elt)) 27.34 + else: 27.35 + buf.append(str(elt)) 27.36 + buf.append(', ') 27.37 + elif type(elt) is str: 27.38 + buf.append(repr(elt)) 27.39 + else: 27.40 + if level < limit: 27.41 + buf.append('\n') 27.42 + buf.extend(print_sym_tuple(elt, level+1, limit, 27.43 + names, trace[1:])) 27.44 + buf.append(')') 27.45 + return buf 27.46 + 27.47 +def assert_tuples_equal(tup1, tup2, curpos = ()): 27.48 + for index, (elt1, elt2) in enumerate(zip(tup1, tup2)): 27.49 + if elt1 != elt2: 27.50 + if type(elt1) is tuple and type(elt2) is tuple: 27.51 + assert_tuples_equal(elt1, elt2, curpos + (index,)) 27.52 + raise AssertionError('Found difference at %s : %s != %s' % 27.53 + (curpos, name(elt1), name(elt2) ), curpos) 27.54 + 27.55 +from time import time, clock 27.56 +def test_samples( samples ): 27.57 + time_reports = {} 27.58 + for sample in samples: 27.59 + print "testing", sample 27.60 + tstart1, cstart1 = time(), clock() 27.61 + pypy_tuples = pypy_parse(sample) 27.62 + tstart2, cstart2 = time(), clock() 27.63 + python_tuples = python_parse(sample) 27.64 + time_reports[sample] = (time() - tstart2, tstart2-tstart1, clock() - cstart2, cstart2-cstart1 ) 27.65 + #print "-"*10, "PyPy parse results", "-"*10 27.66 + #print ''.join(print_sym_tuple(pypy_tuples, names=True)) 27.67 + #print "-"*10, "CPython parse results", "-"*10 27.68 + #print ''.join(print_sym_tuple(python_tuples, names=True)) 27.69 + print 27.70 + try: 27.71 + assert_tuples_equal(pypy_tuples, python_tuples) 27.72 + except AssertionError as e: 27.73 + error_path = e.args[-1] 27.74 + print "ERROR PATH =", error_path 27.75 + print "="*80 27.76 + print file(sample).read() 27.77 + print "="*80 27.78 + print "-"*10, "PyPy parse results", "-"*10 27.79 + print ''.join(print_sym_tuple(pypy_tuples, names=True, trace=error_path)) 27.80 + print "-"*10, "CPython parse results", "-"*10 27.81 + print ''.join(print_sym_tuple(python_tuples, names=True, trace=error_path)) 27.82 + print "Failed on (%s)" % sample 27.83 + # raise 27.84 + pprint(time_reports) 27.85 + 27.86 +if __name__=="__main__": 27.87 + import getopt 27.88 + opts, args = getopt.getopt( sys.argv[1:], "d:", [] ) 27.89 + for opt, val in opts: 27.90 + if opt == "-d": 27.91 + pass 27.92 +# set_debug(int(val)) 27.93 + if args: 27.94 + samples = args 27.95 + else: 27.96 + samples = read_samples_dir() 27.97 + 27.98 + test_samples( samples )