diff --git a/sorter.rb b/sorter.rb index 4cf8991..b642075 100755 --- a/sorter.rb +++ b/sorter.rb @@ -13,6 +13,9 @@ # The output file is single-line JSON, use jq to format for reading. # # # #To Do: # +# * Fix word count # +# * add frequency # +# * split by dates for progress notes # ############################################################################ require 'json' require 'pp' @@ -55,6 +58,7 @@ def write_output_csv (output, filename) CSV.open(filename, 'wb') do |csv| csv << ["bin", "words", "total"] output.delete(:filename) + output.delete(:total_words) output.each_key do |key| line = [] line.push(key) @@ -129,6 +133,8 @@ def process_file (file_name, binfile, type) outfile.slice!('.txt') #puts outfile output[:filename] = outfile + output[:total_words] = text.split.size + puts output csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils bins.each_key do |bin_number| key = bin_number.to_sym @@ -175,14 +181,13 @@ def generate_master_output(dir_name, binfile) json_file = File.read(file_name) data_hash = JSON.parse(json_file) csv_row.push(data_hash["filename"]) + csv_row.push(data_hash["total_words"]) data_hash.delete("filename") - word_total = 0 + data_hash.delete("total_words") data_hash.each_key do |key| csv_row.push(data_hash[key]["words"]) csv_row.push(data_hash[key]["total"]) - word_total += data_hash[key]["total"] end - csv_row = csv_row.insert(1, word_total) csv << csv_row end end