Need assistance with syntax errors two python scripts:
tester:
cat testdata2.txt | python kmeansMapper.py | sort | python kmeansReducer.py
#kmeansReducer.py
#!/usr/bin/python
import sys
currId = None # this is the “current” key
currXs = []
currYs = []
id = None
# The input comes from standard input (line by line)
for line in sys.stdin:
line = line.strip()
ln = line.split('t')
id = ln[0]
if currId == id:
currXs.append(float(ln[1]))
currYs.append(float(ln[2]))
else:
if currId:
#calculate center
centerX = sum(currXs)/len(currXs)
centerY = sum(currYs)/len(currYs)
print '%s %s %s %s' % (centerX, centerY, currId, zip(currXs, currYs))
currXs = []
currYs = []
currId = id
currXs.append(float(ln[1]))
currYs.append(float(ln[2]))
# output the last key
if currId == id:
#calculate center
centerX = sum(currXs)/len(currXs)
centerY = sum(currYs)/len(currYs)
print '%s %s %s %s' % (centerX, centerY, currId, zip(currXs, currYs))
and
cat testdata2.txt | python kmeansMapper.py | sort | python kmeansReducer.py
kmeansMapper.py
#!/usr/bin/python
import sys
import math
fd = open('centers.txt', 'r')
centers = []
for line in fd:
line = line.strip()
vals = line.split('')
centers.extend([vals])
fd.close()
for line in sys.stdin:
line = line.strip()
vals = line.split('')
clusterNum = None
distance = None
i = 0
#compare to each center and store the smallest distance
for center in centers:
euclidDist = math.sqrt((float(vals[0])-float(center[0]))**2 + (float(vals[1])
if clusterNum:
if euclidDist
clusterNum = i+1
distance = euclidDist
else: #always record the first cluster
clusterNum = i+1
distance = euclidDist
i += 1
print clusterNum, 't', vals[0], 't', vals[1]
Dataset = testdata2.txt = {32 45, 23 67, 98 09, 56 87, 13 65, 87 67, 90 78,…}
Dataset = centers.txt = {4 32,55 20, 39 8,17 11 }
what's your question??? Yest it is a mapper and a reducer scripts