diff --git a/sorter.rb b/sorter.rb index ad0f22b..09f9826 100755 --- a/sorter.rb +++ b/sorter.rb @@ -13,9 +13,7 @@ # The output file is single-line JSON, use jq to format for reading. # # # #To Do: # -# * Fix word count # # * add frequency # -# * split by dates for progress notes # ############################################################################ require 'json' require 'pp' @@ -141,6 +139,7 @@ def process_file (file_name, binfile, type) output[key] = Hash.new output[key][:words] = bin_counter(bins[bin_number], text) output[key][:total] = count_total(output[key]) + output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f end write_output_json(output,outfile + '-out.json') write_output_csv(output,outfile + '-out.csv') @@ -162,6 +161,7 @@ def process_file (file_name, binfile, type) output[key] = Hash.new output[key][:words] = bin_counter(bins[bin_number], text) output[key][:total] = count_total(output[key]) + output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f end write_output_json(output,outfile + '-out.json') write_output_csv(output,outfile + '-out.csv') @@ -191,8 +191,10 @@ def generate_master_output(dir_name, binfile) num += 1 words_head = "Bin " + num.to_s + " words" total_head = "Bin " + num.to_s + " total" + freq_head = "Bin " + num.to_s + " frequency" bin_header.push(words_head) bin_header.push(total_head) + bin_header.push(freq_head) end CSV.open('master.csv', 'wb') do |csv| header = ["File", "Total Words" ] + bin_header @@ -209,6 +211,7 @@ def generate_master_output(dir_name, binfile) data_hash.each_key do |key| csv_row.push(data_hash[key]["words"]) csv_row.push(data_hash[key]["total"]) + csv_row.push(data_hash[key]["frequency"]) end csv << csv_row end