| 83 | | |
| 84 | | ######################################################################## |
| 85 | | |
| 86 | | =head1 Title |
| 87 | | |
| 88 | | PGE::Hs - Match and display PGE rules as Haskell expressions |
| 89 | | |
| 90 | | =head1 SYNOPSIS |
| 91 | | |
| 92 | | (You need to run C<make PGE-Hs.pbc> in F<compilers/pge> first.) |
| 93 | | |
| 94 | | .sub _main |
| 95 | | load_bytecode "PGE.pbc" |
| 96 | | load_bytecode "PGE/Hs.pir" |
| 97 | | $P0 = find_global "PGE::Hs", "match" |
| 98 | | $S0 = $P0("Hello", "(...)*$") |
| 99 | | print $S0 # PGE_Match 2 5 [PGE_Array [PGE_Match 2 5 [] []]] [] |
| 100 | | .end |
| 101 | | |
| 102 | | =head1 DESCRIPTION |
| 103 | | |
| 104 | | The Haskell-side data structure is defined thus: |
| 105 | | |
| 106 | | data MatchPGE |
| 107 | | = PGE_Match Int Int [MatchPGE] [(String, MatchPGE)] |
| 108 | | | PGE_Array [MatchPGE] |
| 109 | | | PGE_Fail |
| 110 | | deriving (Show, Eq, Ord, Read) |
| 111 | | |
| 112 | | This F<PGE-Hs.pbc> is built separately (not by default). The reason is |
| 113 | | because it's intended to be bundled with Pugs, so as to make Pugs usable |
| 114 | | with vanilla Parrot from 0.2.0 on, using either an external F<parrot> |
| 115 | | executable, or a linked F<libparrot>. |
| 116 | | |
| 117 | | In external F<parrot> mode, Parrot's include path looks into the |
| 118 | | F<.pbc> files inside the library tree first, then look into the current |
| 119 | | directory, F<.>. Hence this file includes, rather than loads, the |
| 120 | | F<PGE.pbc> library, because if Pugs is shipped with its own copy |
| 121 | | of F<PGE.pbc>, Parrot would ignore that file and prefer to load |
| 122 | | the one in the Parrot tree instead. |
| 123 | | |
| 124 | | Granted, it is possible to pass in Pugs's own library path into an |
| 125 | | environment variable (maybe C<PARROT_LIBS>?), but as this was not in |
| 126 | | the 0.3.0 release, I (autrijus) decided to take the easy route. :-) |
| 127 | | |
| 128 | | =head1 CAVEATS |
| 129 | | |
| 130 | | This is an initial sketch. The dump format may change, and the |
| 131 | | whole thing may be taken out or refactored away at any moment. |
| 132 | | |
| 133 | | =cut |
| 134 | | |
| 135 | | .namespace [ "PGE::Hs" ] |
| 136 | | |
| 137 | | .const string PGE_FAIL = "PGE_Fail" |
| 138 | | .const string PGE_SUB_POS = "@:capt" |
| 139 | | .const string PGE_SUB_NAMED = "%:capt" |
| 140 | | |
| 141 | | .sub "__onload" @LOAD |
| 142 | | .local pmc load |
| 143 | | load_bytecode "Data/Escape.imc" |
| 144 | | .end |
| 145 | | |
| 146 | | .sub "add_rule" |
| 147 | | .param string name |
| 148 | | .param string pattern |
| 149 | | .local pmc p6rule_compile, rulesub |
| 150 | | |
| 151 | | find_global p6rule_compile, "PGE", "p6rule" |
| 152 | | null rulesub |
| 153 | | rulesub = p6rule_compile(pattern) |
| 154 | | store_global name, rulesub |
| 155 | | |
| 156 | | .return (name) |
| 157 | | .end |
| 158 | | |
| 159 | | .sub "match" |
| 160 | | .param string x |
| 161 | | .param string pattern |
| 162 | | .local string out, tmps |
| 163 | | .local pmc rulesub |
| 164 | | .local pmc match |
| 165 | | .local pmc p6rule_compile |
| 166 | | .local pmc capt |
| 167 | | |
| 168 | | find_global p6rule_compile, "PGE", "p6rule" |
| 169 | | null rulesub |
| 170 | | |
| 171 | | push_eh match_error |
| 172 | | rulesub = p6rule_compile(pattern) |
| 173 | | match = rulesub(x) |
| 174 | | |
| 175 | | match_result: |
| 176 | | unless match goto match_fail |
| 177 | | tmps = match."dump_hs"() |
| 178 | | out .= tmps |
| 179 | | goto end_match |
| 180 | | |
| 181 | | match_fail: |
| 182 | | out = PGE_FAIL |
| 183 | | goto end_match |
| 184 | | |
| 185 | | match_error: |
| 186 | | out = P5 |
| 187 | | |
| 188 | | end_match: |
| 189 | | out .= "\n" |
| 190 | | |
| 191 | | .return (out) |
| 192 | | .end |
| 193 | | |
| 194 | | .sub unescape |
| 195 | | .param string str |
| 196 | | .local string ret, tmp |
| 197 | | .local int i, j |
| 198 | | |
| 199 | | ret = "" |
| 200 | | j = length str |
| 201 | | if j == 0 goto END |
| 202 | | i = 0 |
| 203 | | |
| 204 | | LOOP: |
| 205 | | tmp = str[i] |
| 206 | | inc i |
| 207 | | if i >= j goto FIN |
| 208 | | |
| 209 | | eq tmp, "\\", ESC |
| 210 | | concat ret, tmp |
| 211 | | goto LOOP |
| 212 | | |
| 213 | | ESC: |
| 214 | | tmp = str[i] |
| 215 | | inc i |
| 216 | | eq tmp, "n", LF |
| 217 | | concat ret, tmp |
| 218 | | goto UNESC |
| 219 | | LF: |
| 220 | | concat ret, "\n" |
| 221 | | UNESC: |
| 222 | | if i >= j goto END |
| 223 | | goto LOOP |
| 224 | | |
| 225 | | FIN: |
| 226 | | concat ret, tmp |
| 227 | | END: |
| 228 | | .return(ret) |
| 229 | | .end |
| 230 | | |
| 231 | | .namespace [ "PGE::Match" ] |
| 232 | | |
| 233 | | .sub "dump_hs" :method |
| 234 | | .local string out |
| 235 | | .local int spi, spc |
| 236 | | .local int ari, arc |
| 237 | | .local int tmpi, cond |
| 238 | | .local string tmps, key |
| 239 | | .local pmc capt, iter, subelm, elm, escape, is_array |
| 240 | | |
| 241 | | out = "" |
| 242 | | escape = find_global "Data::Escape", "String" |
| 243 | | |
| 244 | | start: |
| 245 | | out .= "PGE_Match " |
| 246 | | tmpi = self."from"() |
| 247 | | tmps = tmpi |
| 248 | | out .= tmps |
| 249 | | out .= " " |
| 250 | | tmpi = self."to"() |
| 251 | | tmps = tmpi |
| 252 | | out .= tmps |
| 253 | | out .= " [" |
| 254 | | |
| 255 | | subpats: |
| 256 | | capt = getattribute self, PGE_SUB_POS |
| 257 | | if_null capt, subrules |
| 258 | | spi = 0 |
| 259 | | spc = elements capt |
| 260 | | goto subpats_body |
| 261 | | subpats_loop: |
| 262 | | unless spi < spc goto subrules |
| 263 | | out .= ", " |
| 264 | | subpats_body: |
| 265 | | cond = defined capt[spi] |
| 266 | | unless cond goto subpats_fail |
| 267 | | elm = capt[spi] |
| 268 | | bsr dumper |
| 269 | | inc spi |
| 270 | | goto subpats_loop |
| 271 | | subpats_fail: |
| 272 | | out .= PGE_FAIL |
| 273 | | inc spi |
| 274 | | goto subpats_loop |
| 275 | | |
| 276 | | subrules: |
| 277 | | out .= "] [" |
| 278 | | capt = getattribute self, PGE_SUB_NAMED |
| 279 | | if_null capt, end |
| 280 | | iter = new Iterator, capt |
| 281 | | iter = 0 |
| 282 | | goto subrules_body |
| 283 | | subrules_loop: |
| 284 | | unless iter goto end |
| 285 | | out .= ", " |
| 286 | | subrules_body: |
| 287 | | key = shift iter |
| 288 | | cond = defined capt[key] |
| 289 | | unless cond goto subrules_fail |
| 290 | | elm = capt[key] |
| 291 | | out .= '("' |
| 292 | | tmps = escape(key) |
| 293 | | out .= tmps |
| 294 | | out .= '", ' |
| 295 | | bsr dumper |
| 296 | | out .= ")" |
| 297 | | goto subrules_loop |
| 298 | | subrules_fail: |
| 299 | | out .= PGE_FAIL |
| 300 | | key = shift iter |
| 301 | | goto subrules_loop |
| 302 | | |
| 303 | | dumper: |
| 304 | | ari = 0 |
| 305 | | arc = elements elm |
| 306 | | is_array = getprop "isarray", elm |
| 307 | | if is_array goto dumper_array |
| 308 | | unless ari < arc goto dumper_fail |
| 309 | | subelm = elm[-1] |
| 310 | | tmps = subelm."dump_hs"() |
| 311 | | out .= tmps |
| 312 | | ret |
| 313 | | dumper_fail: |
| 314 | | out .= PGE_FAIL |
| 315 | | ret |
| 316 | | dumper_done: |
| 317 | | out .= "]" |
| 318 | | ret |
| 319 | | dumper_array: |
| 320 | | out .= "PGE_Array [" |
| 321 | | unless ari < arc goto dumper_done |
| 322 | | goto dumper_array_body |
| 323 | | dumper_array_loop: |
| 324 | | unless ari < arc goto dumper_done |
| 325 | | out .= ", " |
| 326 | | dumper_array_body: |
| 327 | | subelm = elm[ari] |
| 328 | | tmps = subelm."dump_hs"() |
| 329 | | out .= tmps |
| 330 | | inc ari |
| 331 | | goto dumper_array_loop |
| 332 | | |
| 333 | | end: |
| 334 | | out .= "]" |
| 335 | | .return (out) |
| 336 | | .end |