added frequency to output
parent
b640e273ba
commit
2466a6b28b
|
@ -13,9 +13,7 @@
|
||||||
# The output file is single-line JSON, use jq to format for reading. #
|
# The output file is single-line JSON, use jq to format for reading. #
|
||||||
# #
|
# #
|
||||||
#To Do: #
|
#To Do: #
|
||||||
# * Fix word count #
|
|
||||||
# * add frequency #
|
# * add frequency #
|
||||||
# * split by dates for progress notes #
|
|
||||||
############################################################################
|
############################################################################
|
||||||
require 'json'
|
require 'json'
|
||||||
require 'pp'
|
require 'pp'
|
||||||
|
@ -141,6 +139,7 @@ def process_file (file_name, binfile, type)
|
||||||
output[key] = Hash.new
|
output[key] = Hash.new
|
||||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||||
output[key][:total] = count_total(output[key])
|
output[key][:total] = count_total(output[key])
|
||||||
|
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
|
||||||
end
|
end
|
||||||
write_output_json(output,outfile + '-out.json')
|
write_output_json(output,outfile + '-out.json')
|
||||||
write_output_csv(output,outfile + '-out.csv')
|
write_output_csv(output,outfile + '-out.csv')
|
||||||
|
@ -162,6 +161,7 @@ def process_file (file_name, binfile, type)
|
||||||
output[key] = Hash.new
|
output[key] = Hash.new
|
||||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||||
output[key][:total] = count_total(output[key])
|
output[key][:total] = count_total(output[key])
|
||||||
|
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
|
||||||
end
|
end
|
||||||
write_output_json(output,outfile + '-out.json')
|
write_output_json(output,outfile + '-out.json')
|
||||||
write_output_csv(output,outfile + '-out.csv')
|
write_output_csv(output,outfile + '-out.csv')
|
||||||
|
@ -191,8 +191,10 @@ def generate_master_output(dir_name, binfile)
|
||||||
num += 1
|
num += 1
|
||||||
words_head = "Bin " + num.to_s + " words"
|
words_head = "Bin " + num.to_s + " words"
|
||||||
total_head = "Bin " + num.to_s + " total"
|
total_head = "Bin " + num.to_s + " total"
|
||||||
|
freq_head = "Bin " + num.to_s + " frequency"
|
||||||
bin_header.push(words_head)
|
bin_header.push(words_head)
|
||||||
bin_header.push(total_head)
|
bin_header.push(total_head)
|
||||||
|
bin_header.push(freq_head)
|
||||||
end
|
end
|
||||||
CSV.open('master.csv', 'wb') do |csv|
|
CSV.open('master.csv', 'wb') do |csv|
|
||||||
header = ["File", "Total Words" ] + bin_header
|
header = ["File", "Total Words" ] + bin_header
|
||||||
|
@ -209,6 +211,7 @@ def generate_master_output(dir_name, binfile)
|
||||||
data_hash.each_key do |key|
|
data_hash.each_key do |key|
|
||||||
csv_row.push(data_hash[key]["words"])
|
csv_row.push(data_hash[key]["words"])
|
||||||
csv_row.push(data_hash[key]["total"])
|
csv_row.push(data_hash[key]["total"])
|
||||||
|
csv_row.push(data_hash[key]["frequency"])
|
||||||
end
|
end
|
||||||
csv << csv_row
|
csv << csv_row
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue