Averaging GPS segments

In this notebook we take a look at a possible simple solution for task defined in the competition Averaging GPS segments 2019. The task is to estime shapes of the road segmen betwwen crossings.

Data

For each segement we are given a set of gps trajectories. The goal is to estimate the shape of the underlying road segment. Typical strategy to do so is to average the trajectories.

Load data

class Segment:
    
    def __init__(self, id_, trajs, gt):
        self.id = id_
        self.trajs = trajs
        self.gt = gt
        self.avg = None
        self.meta = []
        self.lines = None
    
    
def list_to_np(points):
    i=0
    nppoints = np.empty((len(points),2))
    for point in points:
        nppoints[i,0] = point['x']
        nppoints[i,1] = point['y']
        i += 1
    return nppoints  
def load_from_file(path, file):
    
    trajs_dat = pythonSample.readTrajectoryDataset(os.path.join(path, file))
    traj_set = []
    for traj in trajs_dat:
        traj_np = list_to_np(traj)
        traj_set.append(traj_np)
    segment = Segment(id_=int(file.split('.')[0]),
                      trajs=traj_set,
                      gt=None)
    return segment
    
def load_from_folder(path, gtruth):
    segments = []
    gts = []
    for gt_traj in pythonSample.readTrajectoryDataset(gtruth):
        gt_np = list_to_np(gt_traj)
        gts.append(gt_np)
    for file in os.listdir(path):
        if file.endswith(".txt"):
            segment = load_from_file(path, file)
            segment.gt = gts[segment.id]
            segments.append(segment)
    segments = sorted(segments, key=lambda k: k.id) 
    return segments


Write Data

def np_to_list(traj):
    list_traj = []
    for point in np.split(traj, traj.shape[0]):
        list_traj.append({'x': point[0, 0], 'y': point[0, 1]})
    return list_traj

def segments_to_list(segments):
    list_segments = []
    for seg in segments:
        list_segments.append(np_to_list(seg.avg))
    return list_segments
        

Avereging

Avereging of a sete of time sieries with unknown corespondences among the data points lead to a huge configuration space and to high computational cost. The approach shown here uses the order of the points to determin the corepsondences among the points of the trajectories. The input generilized using Douglas-Peucker. see


def simple_avg(seg, epsilon, is_rdp=True, is_reverse=True, is_align=True, is_median=False):
    trajs = seg.trajs
    if is_rdp:
        seg.rdp_trajs = []
        for traj in trajs:
            seg.rdp_trajs.append(rdp(traj, epsilon))
            
        trajs = seg.rdp_trajs
    
    avg_traj = np.array([[0, 0], [1, 1]])
    
    max_traj = max(trajs,key=attrgetter('size'))
    new_trajs = []
    for traj in trajs:
        if is_reverse:
            d00 = np.absolute(max_traj[0,:] - traj[0, :]).sum()
            d0e = np.absolute(max_traj[0,:] - traj[-1, :]).sum()
            if d0e < d00:
                traj = traj[::-1,:]
        delta = max_traj.shape[0] - traj.shape[0]
        if delta == 0 : 
            new_traj = traj
        else:
            new_traj = np.empty(max_traj.shape)
            new_traj[:,:] = np.nan
            if is_align:
                lower = int(delta/2)
            else:
                lower = 0
            upper = traj.shape[0] + lower
            new_traj[lower : upper ,:] = traj

        new_trajs.append(new_traj)
    seg.rdp_trajs = new_trajs.copy()
    new_trajs = np.stack(new_trajs, axis=2)
    if is_median:
        avg_traj = np.nanmedian(new_trajs, axis=2)
    else:
        avg_traj = np.nanmean(new_trajs, axis=2)
    
    return avg_traj

C-sim

To evaluate the results C-sim and HC-sim can be used.

# reads a set of trajectories from a file
def readTrajectoryDataset(fileName):
    s = open(fileName, 'r').read()
    comp=s.split("\n")
    trajectory=[]
    trajectorySet=[]
    for i in range(0,len(comp)):
        comp[i]=comp[i].split(" ")
        if(len(comp[i])==2):
            # to double??
            point={
                "x":float(comp[i][0]),
                "y":float(comp[i][1])
            }
            trajectory.append(point)
        else:
            trajectorySet.append(trajectory)
            trajectory=[]
    
    return trajectorySet
    
def HC_SIM(t1,t2,MIN_EPS):
    csim=C_SIM(t1,t2,MIN_EPS*1)
    csim2=C_SIM(t1,t2,MIN_EPS*2)
    csim3=C_SIM(t1,t2,MIN_EPS*4)
    csim4=C_SIM(t1,t2,MIN_EPS*8)
    csim5=C_SIM(t1,t2,MIN_EPS*16)
    csim6=C_SIM(t1,t2,MIN_EPS*32)
    
    csim+=csim2
    csim+=csim3
    csim+=csim4
    csim+=csim5
    csim+=csim6
    
    csim/=6
    
    return csim
            
def C_SIM(t1, t2, EPS):
    cells1=getCells(t1,EPS)
    cells2=getCells(t2,EPS)
    
    c1=cells1["cells"]
    dil1=cells1["dil"]
    c2=cells2["cells"]
    dil2=cells2["dil"]
    
    cab=0
    cabd=0
    cadb=0
    ca=0
    cb=0
    
    for key,value in c1.items():
        ca+=1
        if(c2.get(key,-1)==1):
            cab+=1
        elif(dil2.get(key,-1)==1):
            cabd+=1
            
    for key,value in c2.items():
        cb+=1
        if(dil1.get(key,-1)==1 and c1.get(key,-1)!=1):
            cadb+=1
        
    sim=(cab+cabd+cadb)/(ca+cb-cab)
        
    return sim
    
def getCells(traj,SIZE):
    c1={}
    for j in range(1,len(traj)):
        px=int(round(traj[j-1]["x"]/SIZE))
        py=int(round(traj[j-1]["y"]/SIZE))
        x=int(round(traj[j]["x"]/SIZE))
        y=int(round(traj[j]["y"]/SIZE))
        
        
        c1[str(px)+"_"+str(py)]=1
        c1[str(x)+"_"+str(y)]=1
        
        minX=min(px,x)
        maxX=max(px,x)
        minY=min(py,y)
        maxY=max(py,y)
        deltaX=maxX-minX
        deltaY=maxY-minY
        
        #interpolation
        if(deltaX>deltaY):
            for i in range(minX+1,maxX):
                _y=round(py+(y-py)*(i-px)/(x-px))
                _x=i
                c1[str(_x)+"_"+str(_y)]=1
        else:
            for i in range(minY+1,maxY):
                _x=round(px+(x-px)*(i-py)/(y-py))
                _y=i
                c1[str(_x)+"_"+str(_y)]=1

    #dilation
    dil1={}
    for key,value in c1.items():
        comp=key.split("_")
        x=int(comp[0])
        y=int(comp[1])
        
        nx=str(x-1)
        ny=str(y-1)
        dil1[nx+"_"+ny]=1
        
        nx=str(x-1)
        ny=str(y)
        dil1[nx+"_"+ny]=1
        
        nx=str(x-1)
        ny=str(y+1)
        dil1[nx+"_"+ny]=1
        
        
        nx=str(x)
        ny=str(y-1)
        dil1[nx+"_"+ny]=1
        
        nx=str(x)
        ny=str(y+1)
        dil1[nx+"_"+ny]=1
        
        
        nx=str(x+1)
        ny=str(y-1)
        dil1[nx+"_"+ny]=1
        
        nx=str(x+1)
        ny=str(y)
        dil1[nx+"_"+ny]=1
        
        nx=str(x+1)
        ny=str(y+1)
        dil1[nx+"_"+ny]=1
        
    return {"cells":c1, "dil":dil1}

def avg_all(segments, avg_method, epsilon):
    for seg in segments:
        avg_traj = avg_method(seg, epsilon)
        seg.avg = avg_traj
        
    return segments

def traj_man_length(traj):
    return np.absolute(traj[:-1,:] - traj[1:,:]).sum().sum()
idx_min_points = 0
idx_max_points = 1
idx_gt_points = 2
idx_avg_points = 3
idx_gt_man = 4
idx_avg_man = 5
idx_dtw_dist = 6
idx_hausdorf_dist = 7
idx_frechet_dist = 8
idx_sim_dist = 9
def evaluate(segments):
    segments_props = np.empty((len(segments), 10))
    first_id = None
    for seg in segments:
        if not first_id:
            first_id = seg.id
        seg_idx = seg.id - first_id
        min_traj = min(seg.trajs,key=attrgetter('size'))
        max_traj = max(seg.trajs,key=attrgetter('size'))
        segments_props[seg_idx, idx_min_points] = min_traj.shape[0]
        segments_props[seg_idx, idx_max_points] = max_traj.shape[0]                         
        seg.meta.append("Min trajectorie length {}".format(min_traj.shape[0]))
        seg.meta.append("Max trajectorie length {}".format(max_traj.shape[0]))


        segments_props[seg_idx, idx_gt_points] = seg.gt.shape[0]                      
        segments_props[seg_idx, idx_avg_points] = seg.avg.shape[0]
        seg.meta.append("Trajectorie points gt:{} avg:{}".format(seg.gt.shape[0], seg.avg.shape[0]))

        gt_man_length = traj_man_length(seg.gt)
        avg_man_length = traj_man_length(seg.avg)
        segments_props[seg_idx, idx_gt_man] = gt_man_length                      
        segments_props[seg_idx, idx_avg_man] = avg_man_length
        seg.meta.append("Trajectorie manhattan length gt:{} avg:{}".format(gt_man_length, avg_man_length))

        dtw_dist, _ = fastdtw(seg.gt, seg.avg, dist=euclidean)  # second return is path
        segments_props[seg_idx, idx_dtw_dist] = dtw_dist
        seg.meta.append("DTW distance: {}".format(dtw_dist))

        hausdorf_dist = directed_hausdorff(seg.gt, seg.avg)[0]
        segments_props[seg_idx, idx_hausdorf_dist] = hausdorf_dist
        seg.meta.append("Hausdorf distance: {}".format(hausdorf_dist))

        frechet_dist = frechet(seg.gt, seg.avg)
        segments_props[seg_idx, idx_frechet_dist] = frechet_dist
        seg.meta.append("Frechet distance: {}".format(frechet_dist))
        
        sim_dist = HC_SIM(np_to_list(seg.gt), np_to_list(seg.avg), 0.005)
        segments_props[seg_idx, idx_sim_dist] = sim_dist
        seg.meta.append("Sim distance: {}".format(frechet_dist))

    return segments_props

Draw data


def plot_traj(traj, color, label=None, is_endpoints=False, linewidth=1.7, alpha=0.7):
    #for i in range(0,traj.shape[0]-1):
    #    plt.plot(traj[i:i+2,0], traj[i:i+2,1], 'p-', color=color, label=label)
    plt.plot(traj[:,0], traj[:,1], '.-', color=color, label=label, linewidth=linewidth, markersize=linewidth*4, alpha=alpha)
    #for i in range(0,traj.shape[0]):
    #    plt.annotate(i, (traj[i,0], traj[i,1]))
    
    if is_endpoints:
        if label:
            label = 'first point'
        plt.plot(traj[0,0], traj[0,1], '>', color='black', label=label)
    

def plot_seg(seg):
    fig, axes = plt.subplots()
    
    if seg.lines is not None:
        cmp = plt.cm.get_cmap('Purples')
        lines = seg.lines[-100:,:,:]
        norm_w = normalize(seg.lines_w[-100:].reshape(-1, 1), axis=0)[:,0]
        print(norm_w.shape)
        print(lines.shape)
        for i in range(0, lines.shape[0]):
            plot_traj(lines[i,:,:].T, cmp(1-norm_w[i]))
    
    label_done = False
    for traj in seg.trajs:
        if label_done:
            plot_traj(traj, 'grey')
        else:
            plot_traj(traj, 'grey', label='trajectories')
            label_done = True
    
    label_done = False
    for traj in seg.rdp_trajs:
        if label_done:
            plot_traj(traj, 'black', is_endpoints=True)
        else:
            plot_traj(traj, 'black', label='DP trajectories', is_endpoints=True)
            label_done = True
    
    plot_traj(seg.gt, 'red', label='ground truth', alpha=1)
    
    if seg.avg is not None:
        plot_traj(seg.avg, 'limegreen', 'average', linewidth=3.5, alpha=1)
    plt.legend()
    plt.xticks(np.arange(0,1.1,0.1))
    plt.yticks(np.arange(0,1.1,0.1))
    #plt.suptitle("Segment {}".format(seg.id))
    plt.gcf().text(0.15, -0.055*len(seg.meta), "\n".join(seg.meta), fontsize=14)

    

def plot_overview(segments_props):
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_min_points],
             'k.', color='blue', label='min points')
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_max_points],
             'k.', color='red', label='max points')
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_gt_points],
             'k.', color='green', label='ground truth points')
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_avg_points],
             'k.', color='black', label='avg points')
    plt.xlabel("Segmend id")
    plt.ylabel("#Points")
    plt.xticks(range(0,segments_props.shape[0],5))
    plt.yticks(range(0,int(segments_props[:,idx_max_points].max()),5))
    plt.legend()
    plt.grid()
    
    mean_delta_points = np.mean(segments_props[:,idx_gt_points] - segments_props[:,idx_avg_points])
    plt.gcf().text(0.15, -0.055, "Mean delta points {}".format(mean_delta_points), fontsize=14)
    
    
    ### Metrics
    
    ####### Man length
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_gt_man],
             'k.', color='green', label='ground truth man')
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_avg_man],
             'k.', color='black', label='avg man')
    plt.xlabel("Segmend id")
    plt.ylabel("Manhattan length")
    plt.xticks(range(0,segments_props.shape[0],5))
    #plt.yticks(range(0,int(segments_props[:,idx_max_points].max()),5))
    plt.legend()
    plt.grid()
    
    mean_delta_man = np.mean(segments_props[:,idx_gt_man] - segments_props[:,idx_avg_man])
    plt.gcf().text(0.15, -0.055, "Mean delta man length {}".format(mean_delta_man), fontsize=14)
    
    ###### DTW distance
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_dtw_dist],
             'k.', color='red')
    
    plt.xlabel("Segmend id")
    plt.ylabel("DTW distance")
    plt.xticks(range(0,segments_props.shape[0],5))
    #plt.yticks(range(0,int(segments_props[:,idx_max_points].max()),5))
    #plt.legend()
    plt.grid()
    
    mean_dtw_dist = np.mean(segments_props[:,idx_dtw_dist])
    plt.gcf().text(0.15, -0.055, "Mean DTW distance {}".format(mean_dtw_dist), fontsize=14)
    
    ###### Hausdorf dist
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_hausdorf_dist],
             'k.', color='red')
    
    plt.xlabel("Segmend id")
    plt.ylabel("Hausdorf distance")
    plt.xticks(range(0,segments_props.shape[0],5))
    #plt.yticks(range(0,int(segments_props[:,idx_max_points].max()),5))
    #plt.legend()
    plt.grid()
    
    mean_hausdorf_dist = np.mean(segments_props[:,idx_hausdorf_dist])
    plt.gcf().text(0.15, -0.055, "Mean hausdorf distance {}".format(mean_hausdorf_dist), fontsize=14)
    
    
    ###### Frechet dist
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_frechet_dist],
             'k.', color='red')
    
    plt.xlabel("Segmend id")
    plt.ylabel("Frechet distance")
    plt.xticks(range(0,segments_props.shape[0],5))
    #plt.yticks(range(0,int(segments_props[:,idx_max_points].max()),5))
    #plt.legend()
    plt.grid()
    
    mean_frechet_dist = np.mean(segments_props[:,idx_frechet_dist])
    plt.gcf().text(0.15, -0.055, "Mean frechet distance {}".format(mean_frechet_dist), fontsize=14)
    
    
    ###### Sim dist
    fig, axes = plt.subplots(figsize=(20,5), dpi=200)
    
    plt.plot(range(0,segments_props.shape[0]),
             segments_props[:,idx_sim_dist],
             'k.', color='red')
    
    plt.xlabel("Segmend id")
    plt.ylabel("Sim distance")
    plt.xticks(range(0,segments_props.shape[0],5))
    plt.grid()
    
    mean_sim_dist = np.mean(segments_props[:,idx_sim_dist])
    plt.gcf().text(0.15, -0.055, "Mean Sim distance {}".format(mean_sim_dist), fontsize=14)
    

def gen_results(segments):
    segments_props = evaluate(segments)
    for seg in segments:
        plot_seg(seg)

    plot_overview(segments_props)
def main(method, epsilon, is_rdp):
    
    segments = load_from_folder(pythonSample.inputDirectory, '../ground_truth.txt')[0:]
    print("avg all")
    avg_all(segments, method, epsilon)
    print("gen results")
    gen_results(segments)
    return segments

segments = main(simple_avg, 0.01, is_rdp=True)
avg all
gen results


/opt/conda/lib/python3.6/site-packages/matplotlib/pyplot.py:522: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)

png

png

png

png

png

png

png

png

Author: Artem Leichter
Last modified: 15.10.2019