added frequency to output

threading
Jeff Yates 2020-11-21 17:32:04 -05:00
parent b640e273ba
commit 2466a6b28b
1 changed files with 5 additions and 2 deletions

View File

@ -13,9 +13,7 @@
# The output file is single-line JSON, use jq to format for reading. # # The output file is single-line JSON, use jq to format for reading. #
# # # #
#To Do: # #To Do: #
# * Fix word count #
# * add frequency # # * add frequency #
# * split by dates for progress notes #
############################################################################ ############################################################################
require 'json' require 'json'
require 'pp' require 'pp'
@ -141,6 +139,7 @@ def process_file (file_name, binfile, type)
output[key] = Hash.new output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text) output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key]) output[key][:total] = count_total(output[key])
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
end end
write_output_json(output,outfile + '-out.json') write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv') write_output_csv(output,outfile + '-out.csv')
@ -162,6 +161,7 @@ def process_file (file_name, binfile, type)
output[key] = Hash.new output[key] = Hash.new
output[key][:words] = bin_counter(bins[bin_number], text) output[key][:words] = bin_counter(bins[bin_number], text)
output[key][:total] = count_total(output[key]) output[key][:total] = count_total(output[key])
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
end end
write_output_json(output,outfile + '-out.json') write_output_json(output,outfile + '-out.json')
write_output_csv(output,outfile + '-out.csv') write_output_csv(output,outfile + '-out.csv')
@ -191,8 +191,10 @@ def generate_master_output(dir_name, binfile)
num += 1 num += 1
words_head = "Bin " + num.to_s + " words" words_head = "Bin " + num.to_s + " words"
total_head = "Bin " + num.to_s + " total" total_head = "Bin " + num.to_s + " total"
freq_head = "Bin " + num.to_s + " frequency"
bin_header.push(words_head) bin_header.push(words_head)
bin_header.push(total_head) bin_header.push(total_head)
bin_header.push(freq_head)
end end
CSV.open('master.csv', 'wb') do |csv| CSV.open('master.csv', 'wb') do |csv|
header = ["File", "Total Words" ] + bin_header header = ["File", "Total Words" ] + bin_header
@ -209,6 +211,7 @@ def generate_master_output(dir_name, binfile)
data_hash.each_key do |key| data_hash.each_key do |key|
csv_row.push(data_hash[key]["words"]) csv_row.push(data_hash[key]["words"])
csv_row.push(data_hash[key]["total"]) csv_row.push(data_hash[key]["total"])
csv_row.push(data_hash[key]["frequency"])
end end
csv << csv_row csv << csv_row
end end