# find and unify duplicate files # -- drt@un.bewaff.net - http://c0re.jp/ import os from stat import * doreplace = 0 doprint = 1 dupecheck = {} dupelist = {} find = os.popen('find -x /usr/local/xitami/webpages/aldi/ -type f -exec gmd5sum {} \;' 'r') for x in find.readlines(): md5 = long("0x%s" % x[:32], 16) name = x[32:].strip() if md5 not in dupecheck: dupecheck[md5] = name else: if md5 not in dupelist: dupelist[md5] = [dupecheck[md5]] dupelist[md5].append(name) if doprint: for l in dupelist.values(): print l if doreplace: wastesize = 0 for l in dupelist.values(): modes = [] for x in l: modes.append((x, os.stat(x))) if len(modes) > 1: wastesize += modes[-1][-1][ST_SIZE] base = modes[0][0] basestat = modes [0][1] for (name, modes) in modes[1:]: if basestat[ST_UID] != modes[ST_UID] or \ basestat[ST_UID] != modes[ST_UID] or \ basestat[ST_MODE] != modes[ST_MODE]: print "permissions differ: %r" % name else: os.unlink(name) os.link(base, name) #os.utime(name, (modes[ST_ATIME], modes[ST_MTIME])) print wastesize