Make error message about unitialized members more descriptive by adding name, positio...
[hiphop-php.git] / hphp / parser / make-parser.sh
blob3ba5496b4e81db6ac26bb9d682bf52668d138998
1 #!/bin/bash
3 unset CDPATH
4 DIR="$( cd "$( dirname "$0" )" && pwd )"
6 # If we're using buck, then we'll be in a sandboxed source directory instead of
7 # the repo. We want the path to the repo so we can check in the generated
8 # parser artifacts.
9 if [ -n "${FBCODE_DIR}" ]; then
10 DIR="${FBCODE_DIR}/hphp/parser"
13 INFILE=hphp.y
15 if [ -z "${INSTALL_DIR}" ]; then
16 INSTALL_DIR="${DIR}"
19 BISON=$(which bison)
20 if [ ! -x "${BISON}" ]; then
21 echo "bison not found" 1>&2
22 exit 1
25 OUTFILE5=${INSTALL_DIR}/hphp.5.tab.cpp
26 OUTFILE7=${INSTALL_DIR}/hphp.7.tab.cpp
27 OUTHEADER5=${INSTALL_DIR}/hphp.5.tab.hpp
28 OUTHEADER7=${INSTALL_DIR}/hphp.7.tab.hpp
29 OUTHEADER=${INSTALL_DIR}/hphp.tab.hpp
31 BISON_OPTS="--verbose --locations -d"
33 TMP=$(mktemp)
35 # Use a potentially-scary awk script to split the single hphp.y parser into two
36 # outputs. It's actually not that bad.
37 # - The idea is that anything between a /* !PHP5_ONLY */ line and a /* !END */
38 # line are removed from the PHP7 parser, and vice versa for PHP5.
39 # - This is done just by looking for those tokens and setting the "flag".
40 # - We make sure to still print things for the removed lines, to keep line
41 # numbers consistent to ease debugging.
43 awk \
44 '/!END/{flag=0} flag{print "/* REMOVED */"} /!PHP7_ONLY/{print; flag=1} !flag' \
45 "${INFILE}" > "${TMP}"
46 # shellcheck disable=SC2086
47 "${BISON}" ${BISON_OPTS} -pCompiler5 "-o${OUTFILE5}" "${TMP}"
48 if [ $? -ne 0 ]; then
49 echo "Bison failed to compile PHP5 parser!"
50 exit 1
53 awk \
54 '/!END/{flag=0} flag{print "/* REMOVED */"} /!PHP5_ONLY/{print; flag=1} !flag' \
55 "${INFILE}" > "${TMP}"
56 # shellcheck disable=SC2086
57 "${BISON}" ${BISON_OPTS} -pCompiler7 "-o${OUTFILE7}" "${TMP}"
58 if [ $? -ne 0 ] ; then
59 echo "Bison failed to compile PHP7 parser!"
60 exit 1
63 rm "${TMP}"
65 # Remove alpha variance in "#line" directives.
66 if [ "${INSTALL_DIR}" = "${DIR}" ]; then
67 sed -i -e "s#${TMP}#${INFILE}#g" -e "s#${DIR}/##g" "${OUTFILE5}"
68 sed -i -e "s#${TMP}#${INFILE}#g" -e "s#${DIR}/##g" "${OUTFILE7}"
69 else
70 sed -i "s#${TMP}#${INFILE}#g" "${OUTFILE5}"
71 sed -i "s#${TMP}#${INFILE}#g" "${OUTFILE7}"
74 sed -i \
75 -e 's@int Compiler[57]parse.*@@' \
76 -e 's@.*int Compiler[57]debug.*@@' \
77 -e "s@#ifndef YY_COMPILER[57]_.*@@g" \
78 -e "s@# define YY_COMPILER[57]_.*@@g" \
79 -e "s@#endif /\* !YY_COMPILER[57]_.*@@g" \
80 "${OUTHEADER5}" "${OUTHEADER7}"
82 cmp "${OUTHEADER5}" "${OUTHEADER7}"
83 if [ $? -ne 0 ]; then
84 echo "PHP5 and PHP7 headers differ, must be the same tokens"
85 exit 1
87 cp "${OUTHEADER5}" "${OUTHEADER}"
89 # Lots of our logic relies on knowing the shape of the token table. Sadly it is
90 # an enum without introspection, so instead make it macros so we can control its
91 # shape on re-requires of the .hpp file.
92 sed -i -r \
93 -e 's/(T_\w+)\s+=\s+([0-9]+)\s*,?/YYTOKEN(\2, \1)/g' \
94 -e "s/\s+enum\s+yytokentype/#ifndef YYTOKEN_MAP\n#define YYTOKEN_MAP enum yytokentype\n#define YYTOKEN(num, name) name = num,\n#endif\n YYTOKEN_MAP/" \
95 "${OUTHEADER}"
97 # Remove the include guard's #endif (-e doesn't work for this).
98 sed -i '$ d' "${OUTHEADER}"
100 # We don't want to rely on the grammar to have a fixed start and end token, so
101 # let's parse the file and make two macros for the min and max.
102 TOKEN_MIN=$(grep "^\s\+YYTOKEN(" "${OUTHEADER}" -m 1 | \
103 sed -r -e 's/\s+YYTOKEN.([0-9]+).*/#ifndef YYTOKEN_MIN\n#define YYTOKEN_MIN \1\n#endif/')
104 TOKEN_MAX=$(grep "^\s\+YYTOKEN(" "${OUTHEADER}" | tail -n 1 | \
105 sed -r -e 's/\s+YYTOKEN.([0-9]+).*/#ifndef YYTOKEN_MAX\n#define YYTOKEN_MAX \1\n#endif/')
107 echo -e "${TOKEN_MIN}\n\n${TOKEN_MAX}" >> "${OUTHEADER}"
109 # We still want the files in our tree since they are checked in.
110 if [ "${INSTALL_DIR}" != "${DIR}" ]; then
111 sed -i -e "1i// @""generated" "${OUTHEADER}"
112 cp "${OUTHEADER}" "${DIR}/hphp.tab.hpp"