No Description

letter_coverage.py 5.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. """
  2. word_coverage.py
  3. Compute the minimum number of words from five-letter-words
  4. needed to cover N letters from the alphabet.
  5. This can be done in O(N^2) time with a dynamic program.
  6. For each word that we choose, we have to look at all other words
  7. to see how many letters those two cover, combined.
  8. https://charlesreid1.com/wiki/Five_Letter_Words
  9. https://charlesreid1.com/wiki/Letter_Coverage
  10. """
  11. from get_words import get_words
  12. import numpy as np
  13. from pprint import pprint
  14. def word2bitvector(word,N):
  15. """
  16. Turns a five-letter word into a bit vector representing character coverage.
  17. Uses 26 letters by default.
  18. """
  19. bit_vector = [False,]*N
  20. for c in word:
  21. i = ord(c)-ord('a')
  22. try:
  23. bit_vector[i] = True
  24. except IndexError:
  25. pass
  26. return np.array(bit_vector)
  27. def printbv(bv):
  28. """
  29. Pretty printing for boolean bit vector
  30. """
  31. result = ""
  32. for bit in bv:
  33. if bit:
  34. result += "1"
  35. else:
  36. result += "0"
  37. return result
  38. def btsolution(min_key, min_val, words, bt):
  39. """
  40. Reconstruct the sequence of words that gives maximum coverage and minimum word count.
  41. Input: minimum word key (last word), minimum value (number of words), backtrack (prior word)
  42. Output: list of words
  43. """
  44. solution = []
  45. solution.append(words[min_key])
  46. prior_key = bt[min_key]
  47. while prior_key != -1:
  48. solution.append(words[prior_key])
  49. prior_key = bt[prior_key]
  50. return reversed(solution)
  51. def get_dummy_words():
  52. return ["aa","ab","bc","aa","dd","de","bb"]
  53. if __name__=="__main__":
  54. # Searching for words covering first N letters
  55. N = 15
  56. words = get_words()
  57. words = words[:1000]
  58. # Initialization:
  59. # ----------------
  60. # Store best coverage bitvectors for each word
  61. bestcoverage_bv = [np.array([False]*N) for k in range(len(words))]
  62. # Store number of 1s for best coverage vector for each word
  63. ones_bv = [-1]*len(words)
  64. # Store number of words in best solution for each word
  65. ws = [0]*len(words)
  66. # Store prior word for backtracking
  67. bt = [-1]*len(words)
  68. # Fencepost: Initial Step
  69. # Word 0
  70. # ----------------
  71. i = 0
  72. # Start with word 0
  73. wi = words[i]
  74. # Best letter coverage bit vector
  75. bestcoverage_bv[i] = word2bitvector(words[i],N)
  76. # Length of 1s
  77. ones_bv[i] = sum(bestcoverage_bv[i])
  78. # Number of words in best solution:
  79. ws[i] = 1
  80. # Backtracking: first word has no prior word
  81. bt[i] = -1
  82. # Start by assuming the word by itself,
  83. # and then examine each possible pairing
  84. for i in range(1,len(words)):
  85. wi = words[i]
  86. # Start with bitvector of word i's coverage
  87. wi_bv = word2bitvector(wi,N)
  88. # Fencepost: initial step
  89. # Word i by itself
  90. # Assume word i is the first word in the solution,
  91. # and if we find a better combination with prior word,
  92. # overwrite this solution.
  93. # ------------------------
  94. # Best coverage so far (first guess) is word i by itself
  95. bestcoverage_bv[i] = wi_bv
  96. # Count ones in (first guess) best bitvector
  97. ones_bv[i] = sum(bestcoverage_bv[i])
  98. # Number of words in new best solution:
  99. ws[i] = 1
  100. # Backtracking
  101. bt[i] = -1
  102. # Boolean: is this the first word in the sequence of solutions?
  103. first = True
  104. # Now loop over the rest of the words,
  105. # and look for a better solution.
  106. for j in reversed(range(0,i)):
  107. # Get the prior word
  108. wj = words[j]
  109. # Get best coverage bitvector
  110. wj_bv = bestcoverage_bv[j]
  111. # (potential) new combined coverage vector
  112. bestcoverage_bv_i = np.logical_or(wi_bv, wj_bv)
  113. # Number of ones in (potential) new combined coverage vector
  114. ones_bv_i = sum(bestcoverage_bv_i)
  115. # Number of words in (potential) new best solution
  116. ws_i = ws[j]+1
  117. # If this solution is better than our current one,
  118. # overwrite the current solution.
  119. # (Better means, "more ones", or "same ones and fewer words".)
  120. #import pdb; pdb.set_trace();
  121. if( (ones_bv_i > ones_bv[i]) or (ones_bv_i==ones_bv[i] and ws_i < ws[i]) ):
  122. bestcoverage_bv[i] = bestcoverage_bv_i
  123. ones_bv[i] = ones_bv_i
  124. ws[i] = ws_i
  125. bt[i] = j
  126. # This word now follows another word in the sequence of solutions
  127. first = False
  128. # It's tempting to stop early,
  129. # but what if we find the perfect
  130. # solution right at the end?!?
  131. # Okay, now actually get the solution.
  132. # The solution is the maximum of ones_bv and the minimum of ws
  133. #
  134. # Start by finding the maximum(s) of ones_bv
  135. # Then check each corresponding index of ws
  136. ones_bv_indices = [k for k,v in enumerate(ones_bv) if v==max(ones_bv)]
  137. min_key = ones_bv_indices[0]
  138. min_val = ones_bv[ones_bv_indices[0]]
  139. for ix in reversed(ones_bv_indices[1:]):
  140. if(ones_bv[ix] < min_key):
  141. min_key = ix
  142. min_val = ones_bv[ix]
  143. solution = list(btsolution(min_key, min_val, words, bt))
  144. print("Takes "+str(len(solution))+" words to cover "+str(N)+" letters")
  145. pprint(solution)