在一篇很牛的介紹python中list去重的部落格(http://www.peterbe.com/plog/uniqifiers-benchmark
)中收集到的:
view plaincopy to clipboardprint?<br /> 1. from random import shuffle, randint<br /> 2. import re<br /> 3. from sets import Set<br /> 4. def f1(seq): # Raymond Hettinger<br /> 5. # not order preserving<br /> 6. set = {}<br /> 7. map(set.__setitem__, seq, [])<br /> 8. return set.keys()<br /> 9.<br /> 10. def f2(seq): # *********<br /> 11. # order preserving<br /> 12. checked = []<br /> 13. for e in seq:<br /> 14. if e not in checked:<br /> 15. checked.append(e)<br /> 16. return checked<br /> 17. def f3(seq):<br /> 18. # Not order preserving<br /> 19. keys = {}<br /> 20. for e in seq:<br /> 21. keys[e] = 1<br /> 22. return keys.keys()<br /> 23. def f4(seq): # ********** order preserving<br /> 24. noDupes = []<br /> 25. [noDupes.append(i) for i in seq if not noDupes.count(i)]<br /> 26. return noDupes<br /> 27. def f5(seq, idfun=None): # Alex Martelli ******* order preserving<br /> 28. if idfun is None:<br /> 29. def idfun(x): return x<br /> 30. seen = {}<br /> 31. result = []<br /> 32. for item in seq:<br /> 33. marker = idfun(item)<br /> 34. # in old Python versions:<br /> 35. # if seen.has_key(marker)<br /> 36. # but in new ones:<br /> 37. if marker in seen: continue<br /> 38. seen[marker] = 1<br /> 39. result.append(item)<br /> 40. return result<br /> 41. def f5b(seq, idfun=None): # Alex Martelli ******* order preserving<br /> 42. if idfun is None:<br /> 43. def idfun(x): return x<br /> 44. seen = {}<br /> 45. result = []<br /> 46. for item in seq:<br /> 47. marker = idfun(item)<br /> 48. # in old Python versions:<br /> 49. # if seen.has_key(marker)<br /> 50. # but in new ones:<br /> 51. if marker not in seen:<br /> 52. seen[marker] = 1<br /> 53. result.append(item)<br /> 54.<br /> 55. return result<br /> 56. def f6(seq):<br /> 57. # Not order preserving<br /> 58. return list(Set(seq))<br /> 59. def f7(seq):<br /> 60. # Not order preserving<br /> 61. return list(set(seq))<br /> 62. def f8(seq): # Dave Kirby<br /> 63. # Order preserving<br /> 64. seen = set()<br /> 65. return [x for x in seq if x not in seen and not seen.add(x)]<br /> 66. def f9(seq):<br /> 67. # Not order preserving<br /> 68. return {}.fromkeys(seq).keys()<br /> 69. def f10(seq, idfun=None): # Andrew Dalke<br /> 70. # Order preserving<br /> 71. return list(_f10(seq, idfun))<br /> 72. def _f10(seq, idfun=None):<br /> 73. seen = set()<br /> 74. if idfun is None:<br /> 75. for x in seq:<br /> 76. if x in seen:<br /> 77. continue<br /> 78. seen.add(x)<br /> 79. yield x<br /> 80. else:<br /> 81. for x in seq:<br /> 82. x = idfun(x)<br /> 83. if x in seen:<br /> 84. continue<br /> 85. seen.add(x)<br /> 86. yield x<br /> 87.<br /> 88.<br /> 89. def f11(seq): # f10 but simpler<br /> 90. # Order preserving<br /> 91. return list(_f10(seq))<br /> 92. def _f11(seq):<br /> 93. seen = set()<br /> 94. for x in seq:<br /> 95. if x in seen:<br /> 96. continue<br /> 97. seen.add(x)<br /> 98. yield x<br /> 99.<br /> 100. import time<br /> 101. def timing(f, n, a):<br /> 102. print f.__name__,<br /> 103. r = range(n)<br /> 104. t1 = time.clock()<br /> 105. for i in r:<br /> 106. f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a); f(a)<br /> 107. t2 = time.clock()<br /> 108. print round(t2-t1, 3)<br /> 109.<br /> 110. def getRandomString(length=10, loweronly=1, numbersonly=0,<br /> 111. lettersonly=0):<br /> 112. """ return a very random string """<br /> 113. _letters = 'abcdefghijklmnopqrstuvwxyz'<br /> 114. if numbersonly:<br /> 115. l = list('0123456789')<br /> 116. elif lettersonly:<br /> 117. l = list(_letters + _letters.upper())<br /> 118. else:<br /> 119. lowercase = _letters+'0123456789'*2<br /> 120. l = list(lowercase + lowercase.upper())<br /> 121. shuffle(l)<br /> 122. s = ''.join(l)<br /> 123. if len(s) < length:<br /> 124. s = s + getRandomString(loweronly=1)<br /> 125. s = s[:length]<br /> 126. if loweronly:<br /> 127. return s.lower()<br /> 128. else:<br /> 129. return s<br /> 130. testdata = {}<br /> 131. for i in range(35):<br /> 132. k = getRandomString(5, lettersonly=1)<br /> 133. v = getRandomString(100 )<br /> 134. testdata[k] = v<br /> 135.<br /> 136. testdata = [int(x) for x in list('21354612')]<br /> 137. testdata += list('abcceeaa5efm')<br /> 138. class X:<br /> 139. def __init__(self, n):<br /> 140. self.foo = n<br /> 141. def __repr__(self):<br /> 142. return "<foo %r>"%self.foo<br /> 143. def __cmp__(self, e):<br /> 144. return cmp(self.foo, e.foo)<br /> 145.<br /> 146. testdata = []<br /> 147. for i in range(10000):<br /> 148. testdata.append(getRandomString(3, loweronly=True))<br /> 149. #testdata = ['f','g','c','d','b','a','a']<br /> 150. order_preserving = f2, f4, f5, f5b, f8, f10, f11<br /> 151. order_preserving = f5, f5b, f8, f10, f11<br /> 152. not_order_preserving = f1, f3, f6, f7, f9<br /> 153. testfuncs = order_preserving + not_order_preserving<br /> 154. for f in testfuncs:<br /> 155. if f in order_preserving:<br /> 156. print "*",<br /> 157. timing(f, 100, testdata)
作者的實驗結果為:
- * f5 10.1
- * f5b 9.99
- * f8 6.49
- * f10 6.57
- * f11 6.6
- f1 4.28
- f3 3.55
- f6 4.03
- f7 2.59
- f9 2.58
* f5 10.1<br /> * f5b 9.99<br /> * f8 6.49<br /> * f10 6.57<br /> * f11 6.6<br /> f1 4.28<br /> f3 3.55<br /> f6 4.03<br /> f7 2.59<br /> f9 2.58
看起來f8比較好