changed total word count behaviour

threading
Jeff Yates 2020-11-21 13:08:58 -05:00
parent 4e1b862586
commit 174effc0fb
1 changed files with 8 additions and 3 deletions

View File

@ -13,6 +13,9 @@
# The output file is single-line JSON, use jq to format for reading. # # The output file is single-line JSON, use jq to format for reading. #
# # # #
#To Do: # #To Do: #
# * Fix word count #
# * add frequency #
# * split by dates for progress notes #
############################################################################ ############################################################################
require 'json' require 'json'
require 'pp' require 'pp'
@ -55,6 +58,7 @@ def write_output_csv (output, filename)
CSV.open(filename, 'wb') do |csv| CSV.open(filename, 'wb') do |csv|
csv << ["bin", "words", "total"] csv << ["bin", "words", "total"]
output.delete(:filename) output.delete(:filename)
output.delete(:total_words)
output.each_key do |key| output.each_key do |key|
line = [] line = []
line.push(key) line.push(key)
@ -129,6 +133,8 @@ def process_file (file_name, binfile, type)
outfile.slice!('.txt') outfile.slice!('.txt')
#puts outfile #puts outfile
output[:filename] = outfile output[:filename] = outfile
output[:total_words] = text.split.size
puts output
csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils csv.each { |bin| bins[bin[0]] = bin[1..].compact } #turn the csv array into a hash, remove nils
bins.each_key do |bin_number| bins.each_key do |bin_number|
key = bin_number.to_sym key = bin_number.to_sym
@ -175,14 +181,13 @@ def generate_master_output(dir_name, binfile)
json_file = File.read(file_name) json_file = File.read(file_name)
data_hash = JSON.parse(json_file) data_hash = JSON.parse(json_file)
csv_row.push(data_hash["filename"]) csv_row.push(data_hash["filename"])
csv_row.push(data_hash["total_words"])
data_hash.delete("filename") data_hash.delete("filename")
word_total = 0 data_hash.delete("total_words")
data_hash.each_key do |key| data_hash.each_key do |key|
csv_row.push(data_hash[key]["words"]) csv_row.push(data_hash[key]["words"])
csv_row.push(data_hash[key]["total"]) csv_row.push(data_hash[key]["total"])
word_total += data_hash[key]["total"]
end end
csv_row = csv_row.insert(1, word_total)
csv << csv_row csv << csv_row
end end
end end