我正在尝试实现用于解析语法的 Earley 算法,但是我一定做错了什么,因为在图表中的第一个条目之后,它没有通过输入字符串的其余部分。
我的测试语法如下:
S -> aXbX | bXaX
X -> aXbX | bXaX | epsilon
S 和 X 是非终结符;a 和 b 是终端。
我想检查它是否被语法接受的字符串是:'abba'。
这是我的代码:
rules = {
"S": [
['aXbX'],
['bXaX'],
],
"X" : [
['aXbX'],
['bXaX'],
['']
]
}
def predictor(rule, state):
if rule["right"][rule["dot"]].isupper(): # NON-TERMINAL
return [{
"left": rule["right"][rule["dot"]],
"right": right,
"dot": 0,
"op": "PREDICTOR",
"completor": []
} for right in rules[rule["right"][rule["dot"]]]]
else:
return []
def scanner(rule, next_input):
# TERMINAL
if rule["right"][rule["dot"]].islower() and next_input in rules[rule["right"][rule["dot"]]]:
print('scanner')
return [{
"left": rule["right"][rule["dot"]],
"right": [next_input],
"dot": 1,
"op": "SCANNER",
"completor": []
}]
else:
return []
def completor(rule, charts):
if rule["dot"] == len(rule["right"]):
print('completor')
return list(map(
lambda filter_rule: {
"left": filter_rule["left"],
"right": filter_rule["right"],
"dot": filter_rule["dot"] + 1,
"op": "COMPLETOR",
"completor": [rule] + filter_rule["completor"]
},
filter(
lambda p_rule: p_rule["dot"] < len(p_rule["right"]) and rule["left"] == p_rule["right"][p_rule["dot"]],
charts[rule["state"]]
)
))
else:
return []
input_string = 'abba'
input_arr = [char for char in input_string] + ['']
charts = [[{
"left": "S'",
"right": ["S"],
"dot": 0,
"op": "-",
"completor": []
}]]
for curr_state in range(len(input_arr)):
curr_chart = charts[curr_state]
next_chart = []
for curr_rule in curr_chart:
if curr_rule["dot"] < len(curr_rule["right"]): # not finished
curr_chart += [i for i in predictor(curr_rule, curr_state) if i not in curr_chart]
next_chart += [i for i in scanner(curr_rule, input_arr[curr_state]) if i not in next_chart]
else:
print('else')
curr_chart += [i for i in completor(curr_rule, charts) if i not in curr_chart]
charts.append(next_chart)
def print_charts(charts, inp):
for chart_no, chart in zip(range(len(charts)), charts):
print("\t{}".format("S" + str(chart_no)))
print("\t\n".join(map(
lambda x: "\t{} --> {}, {} {}".format(
x["left"],
"".join(x["right"][:x["dot"]] + ["."] + x["right"][x["dot"]:]),
str(chart_no) + ',',
x["op"]
),
chart
)))
print()
print_charts(charts[:-1], input_arr)
这是我得到的输出(对于状态 1 到 4,我应该得到 5 到 9 个条目):
S0
S' --> .S, 0, -
S --> .aXbX, 0, PREDICTOR
S --> .bXaX , 0, 预测器
S1
S2
S3
S4