# Grid format:
# 0 = Navigable space
# 1 = Occupied space
grid = [[0, 0, 1, 0, 0, 0],
[0, 0, 1, 0, 0, 0],
[0, 0, 0, 0, 1, 0],
[0, 0, 1, 1, 1, 0],
[0, 0, 0, 0, 1, 0]]
init = [0, 0]
goal = [len(grid)-1, len(grid[0])-1] # Make sure that the goal definition stays in the function.
delta = [[-1, 0 ], # go up
[ 0, -1], # go left
[ 1, 0 ], # go down
[ 0, 1 ]] # go right
delta_name = ['^', '<', 'v', '>']
cost = 1
def print_expand(expand):
for i in range(len(expand)):
print(expand[i])
def print_path(action):
path = [[' ' for col in range(len(grid[0]))] for row in range(len(grid))]
x, y = goal[0], goal[1]
path[goal[0]][goal[1]] = '*'
while x != init[0] or y != init[1]:
# 下面x,y 都要变化,所以这里提取temp,delta_name取下标就不会错
temp = action[x][y]
opposite = (temp - 2) % len(delta)
x += delta[opposite][0]
y += delta[opposite][1]
path[x][y] = delta_name[temp]
for i in range(len(path)):
print(path[i])
# breadth-first search shorest path, and print the track.
def search_breadth_first_1D():
xboundary = len(grid) - 1
yboundary = len(grid[0]) - 1
search_list = [[0, init[0], init[1]]]
result = []
action = [[-1 for col in range(yboundary+1)] for row in range(xboundary+1)]
closed = [[0 for col in range(yboundary+1)] for row in range(xboundary+1)]
closed[init[0]][init[1]] = 1
expand = [[-1 for col in range(yboundary+1)] for row in range(xboundary+1)]
expand[init[0]][init[1]] = 0
expand_count = 1
while search_list:
#search_list.sort() breadth-first 已经是按照g-value(距离Start点的距离) 排序好的。
current = search_list.pop(0)
for i in range(len(delta)):
x1 = current[1] + delta[i][0]
y1 = current[2] + delta[i][1]
if x1 < 0 or x1 > xboundary or y1 <0 or y1 > yboundary or grid[x1][y1] or closed[x1][y1]:
continue
if x1 == goal[0] and y1 == goal[1]:
result.append([current[0]+cost, x1, y1])
# 终点赋值一次,expand和 action 就都不再赋值,这样能得到最短路径
if expand[goal[0]][goal[1]] == -1: # 一般(expand[goal[0]][goal[1]] > expand_count) 条件不成立,因为短路径先到达终点,且expand_count 是递增赋值的
expand[x1][y1] = expand_count
expand_count += 1
action[x1][y1] = i # for back propagation
else:
search_list.append([current[0]+cost, x1, y1])
closed[x1][y1] = 1
expand[x1][y1] = expand_count
expand_count += 1
action[x1][y1] = i # for back propagation
print_expand(expand)
print_path(action)
if result == []:
return 'fail'
else:
result.sort()
return result#.pop(0)
print(search_breadth_first_1D())
print('\n')
# compute the value,like heuristic.Then easily get the policy.
# dynamic programming
def compute_value():
value = [[999 for row in range(len(grid[0]))] for col in range(len(grid))]
closed = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
x, y = goal[0], goal[1]
value[x][y] = 0
closed[x][y] = 1
temp = [[x, y]]
# 从后往前搜索,填写距离终点的距离数
while temp:
x, y = temp.pop(0)
for i in range(len(delta)):
x1 = x + delta[i][0]
y1 = y + delta[i][1]
if x1 < 0 or x1 >= len(grid) or y1 < 0 or y1 >= len(grid[0]) or grid[x1][y1] or closed[x1][y1]:
continue
v2 = value[x][y] + cost
if v2 > value[x1][y1]:
continue
temp.append([x1, y1])
closed[x1][y1] = 1
value[x1][y1] = v2
# 从前往后传播,得到每个点的周围四点中,距终点最近的一个,确认此点的方向
policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j]:
continue
temp = []
for k in range(len(delta)):
x1 = i + delta[k][0]
y1 = j + delta[k][1]
if x1 >= 0 and y1 >= 0 and x1 < len(grid) and y1 < len(grid[0]) and grid[x1][y1] == 0:
temp.append([value[x1][y1], k])
if temp == []:
continue
temp.sort()
lowest = temp.pop(0)
if value[i][j] > lowest[0]:
policy[i][j] = delta_name[lowest[1]]
policy[goal[0]][goal[1]] = '*'
print_expand(policy)
return value
a = compute_value()
for i in a:
print(i)
print('\n')
# You are given a car in a grid with initial state
# init = [x-position, y-position, orientation]
# where x/y-position is its position in a given
# grid and orientation is 0-3 corresponding to 'up',
# 'left', 'down' or 'right'.
#
# Your task is to compute and return the car's optimal
# path to the position specified in `goal'; where
# the costs for each motion are as defined in `cost'.
#
# grid format:
# 0 = navigable space
# 1 = occupied space
grid = [[1, 1, 1, 0, 0, 0],
[1, 1, 1, 0, 1, 0],
[0, 0, 0, 0, 0, 0],
[1, 1, 1, 0, 1, 1],
[1, 1, 1, 0, 1, 1]]
goal = [2, 0] # final position
init = [4, 3, 0] # first 2 elements are coordinates, third is direction
cost = [2, 1, 20] # the cost field has 3 values: right turn, no turn, left turn
# there are four motion directions: up/left/down/right
# increasing the index in this array corresponds to
# a left turn. Decreasing is is a right turn.
forward = [[-1, 0], # go up
[ 0, -1], # go left
[ 1, 0], # go down
[ 0, 1]] # do right
forward_name = ['up', 'left', 'down', 'right']
# the cost field has 3 values: right turn, no turn, left turn
action = [-1, 0, 1]
action_name = ['R', '#', 'L']
def optimum_policy2D():
value = [[[999 for row in range(len(grid[0]))] for col in range(len(grid))], # 距终点的各个方向的距离
[[999 for row in range(len(grid[0]))] for col in range(len(grid))],
[[999 for row in range(len(grid[0]))] for col in range(len(grid))],
[[999 for row in range(len(grid[0]))] for col in range(len(grid))]]
policy = [[[' ' for row in range(len(grid[0]))] for col in range(len(grid))], # 距终点各个方向上的走向
[[' ' for row in range(len(grid[0]))] for col in range(len(grid))],
[[' ' for row in range(len(grid[0]))] for col in range(len(grid))],
[[' ' for row in range(len(grid[0]))] for col in range(len(grid))]]
policy2D = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
change = True
while change:
change = False
# go through all grid cells and calculate values
for x in range(len(grid)):
for y in range(len(grid[0])):
for orientation in range(4):
if goal[0] == x and goal[1] == y:
if value[orientation][x][y] > 0:
value[orientation][x][y] = 0
policy[orientation][x][y] = '*'
change = True
elif grid[x][y] == 0:
# calculate the three ways to propagate value
for i in range(3):
o2 = (orientation + action[i]) % 4
x2 = x + forward[o2][0]
y2 = y + forward[o2][1]
if x2 >= 0 and x2 < len(grid) and y2 >= 0 and y2 < len(grid[0]) and grid[x2][y2] == 0:
# 从后往前搜索,填写value的过程。可以把x2,y2,o2看成goal的位置和方向,x,y,orientation走一步后到2,所以1的值=2的值+转向的值
# 相当于从前往后,不断的减去转向的值,到终点时,相减的结果变成了0
v2 = value[o2][x2][y2] + cost[i] # tricky
if v2 < value[orientation][x][y]:
value[orientation][x][y] = v2
policy[orientation][x][y] = action_name[i]
change = True
x, y, orientation = init[0], init[1], init[2]
policy2D[x][y] = policy[orientation][x][y]
while policy[orientation][x][y] != '*':
if policy[orientation][x][y] == '#':
o2 = orientation
elif policy[orientation][x][y] == 'R':
o2 = (orientation - 1) % 4
elif policy[orientation][x][y] == 'L':
o2 = (orientation + 1) % 4
x += forward[o2][0]
y += forward[o2][1]
orientation = o2
policy2D[x][y] = policy[orientation][x][y]
for i in range(len(policy2D)):
print(policy2D[i])
def optimum_policy2D_abandon(): # 结果是一个一维的东西,不是三维的思维
value = [[[999 for row in range(len(grid[0]))] for col in range(len(grid))],
[[999 for row in range(len(grid[0]))] for col in range(len(grid))],
[[999 for row in range(len(grid[0]))] for col in range(len(grid))],
[[999 for row in range(len(grid[0]))] for col in range(len(grid))]]
ori = [[[-1 for row in range(len(grid[0]))] for col in range(len(grid))],
[[-1 for row in range(len(grid[0]))] for col in range(len(grid))],
[[-1 for row in range(len(grid[0]))] for col in range(len(grid))],
[[-1 for row in range(len(grid[0]))] for col in range(len(grid))]]
policy2D = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
for i in range(len(value)):
value[i][goal[0]][goal[1]] = 0
ori[i][goal[0]][goal[1]] = i
back_cost = cost[:]
back_cost.reverse()
# 4种在终点的初始回溯方向
for idx in range(len(value)):
change = True
while change:
change = False
for i in range(len(grid)):
for j in range(len(grid[0])):
if grid[i][j] == 1 or ori[idx][i][j] == -1:
continue
for k in range(len(action)):
direction = (ori[idx][i][j] + action[k]) % len(forward)
x1 = i + forward[direction][0]
y1 = j + forward[direction][1]
if x1 < 0 or x1 >= len(grid) or y1 < 0 or y1 >= len(grid[0]) or grid[x1][y1] == 1:
continue
else:
new_cost = back_cost[k] + value[idx][i][j]
if new_cost < value[idx][x1][y1]:
value[idx][x1][y1] = new_cost
ori[idx][x1][y1] = direction
change = True
for idx in range(len(value)):
# 逆溯时方向相反,需要反向
ori[idx][init[0]][init[1]] = (ori[idx][init[0]][init[1]] + 2) % len(forward)
direction_diff = (ori[idx][init[0]][init[1]] - init[2]) % len(forward)
# 下面是不严格的推导,因为可能转向时遇到障碍物
if direction_diff == 1:
# 开始的方向加一个左转的cost,取代了原来直走一格的cost
value[idx][init[0]][init[1]] += cost[2] - cost[1]
elif direction_diff == 3:
value[idx][init[0]][init[1]] += cost[0] - cost[1]
elif direction_diff == 2:
# 反向要左转三次、右转一次,或者相反,取代原来走的2格cost
if (cost[0]*3 + cost[2]) > (cost[2]*3 + cost[0]):
value[idx][init[0]][init[1]] += (cost[2]*3 + cost[0]) - 2*cost[1]
else:
value[idx][init[0]][init[1]] += (cost[0]*3 + cost[2]) - 2*cost[1]
else:
pass
# 取出最小的一个
temp = value[0][init[0]][init[1]]
index = 0
for idx in range(1, len(value)):
if temp > value[idx][init[0]][init[1]]:
temp = value[idx][init[0]][init[1]]
index = idx
for i in ori[index]:
print(i)
for i in value[index]:
print(i)
policy2D[goal[0]][goal[1]] = '*'
x, y = goal[0], goal[1]
## while x != init[0] or y != init[y]:
## x = x + forward[ori[index][x][y]][0]
## y = y + forward[ori[index][x][y]][1]
## policy2D[x][y] = '#'
return policy2D
success_prob = 0.5
failure_prob = (1.0 - success_prob)/2.0 # Probability(stepping left) = prob(stepping right) = failure_prob
collision_cost = 100
cost_step = 1
def stochastic_value():
value = [[1000 for row in range(len(grid[0]))] for col in range(len(grid))]
policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
value[goal[0]][goal[1]] = 0
policy[goal[0]][goal[1]] = '*'
change = True
while change:
change = False
for x in range(len(grid)):
for y in range(len(grid[0])):
if grid[x][y]: continue
for direct in range(len(delta)):
v2 = cost_step
for turn in range(-1, 2):
i = (direct + turn) % len(delta)
x1 = x + delta[i][0]
y1 = y + delta[i][1]
if turn == 0:
p2 = success_prob
else:
p2 = failure_prob
if x1 < 0 or x1 >= len(grid) or y1 < 0 or y1 >= len(grid[0]) or grid[x1][y1]:
v2 += p2 * collision_cost
else:
v2 += p2 * value[x1][y1]
if v2 < value[x][y]:
value[x][y] = v2
policy[x][y] = delta_name[direct]
change = True
return value, policy
a,b = stochastic_value()
print_expand(a)
print_expand(b)
def stochastic_value_abandon():
value = [[1000 for row in range(len(grid[0]))] for col in range(len(grid))]
policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
value[goal[0]][goal[1]] = 0
policy[goal[0]][goal[1]] = '*'
change = True
while change:
change = False
update = [[goal[0], goal[1]]]
while update:
x, y = update.pop(0)
for i in range(len(delta)):
x1 = x + delta[i][0]
y1 = y + delta[i][1]
if x1 < 0 or x1 >= len(grid) or y1 < 0 or y1 >= len(grid[0]) or grid[x1][y1]:
continue
lvalue = (i - 1) % 4
rvalue = (i + 1) % 4
lx, ly = x1 + delta[lvalue][0], y1 + delta[lvalue][1]
rx, ry = x1 + delta[rvalue][0], y1 + delta[rvalue][1]
if lx < 0 or lx >= len(grid) or ly < 0 or ly >= len(grid[0]) or grid[lx][ly]:
lvalue = collision_cost
else:
lvalue = value[lx][ly]
if rx < 0 or rx >= len(grid) or ry < 0 or ry >= len(grid[0]) or grid[rx][ry]:
rvalue = collision_cost
else:
rvalue = value[rx][ry]
v2 = success_prob * value[x][y] + failure_prob * (lvalue + rvalue) + cost_step
if v2 < value[x1][y1]:
value[x1][y1] = v2
update.append([x1, y1])
change = True
return value, policy
# As you see, dynamic programming for planning is very inefficient.
# It has to go through the entire map to search whether there is a change.
# Google map search is split the entire map into pieces, then calculate
# each piece, this is a litter better.