2020-07-15 17:28:27 +00:00
|
|
|
#!/bin/env ruby
|
|
|
|
############################################################################
|
|
|
|
#Word sorter for Pat's dissertation #
|
|
|
|
#More documentation TBD #
|
|
|
|
#Current Usage: #
|
|
|
|
# Scans a file for groups of words and counts the totals #
|
|
|
|
# Input file: ./tester.txt #
|
|
|
|
# Wroud group file: ./bins.json #
|
|
|
|
# Output file: ./output.json #
|
|
|
|
# #
|
|
|
|
# The input file is human readable, easy to edit. #
|
|
|
|
# The output file is single-line JSON, use jq to format for reading. #
|
|
|
|
# #
|
|
|
|
#To Do: #
|
|
|
|
# * use STDIN to accept file to scan #
|
|
|
|
# * Support scaning multiple files #
|
|
|
|
# * Output to csv #
|
|
|
|
# * Strip out header and footer text #
|
|
|
|
############################################################################
|
|
|
|
require 'json'
|
|
|
|
require 'pp'
|
2020-07-15 17:41:06 +00:00
|
|
|
require 'csv'
|
2020-07-15 17:28:27 +00:00
|
|
|
|
2020-07-15 17:41:06 +00:00
|
|
|
#json = File.read('./bins.json')
|
|
|
|
csv = CSV.read('./bins.csv')
|
2020-07-15 17:28:27 +00:00
|
|
|
text = File.read('tester.txt')
|
2020-07-15 17:41:06 +00:00
|
|
|
#bins = JSON.parse(json) #Turn bins.json into a hash
|
2020-07-15 17:28:27 +00:00
|
|
|
output = Hash.new #Creating the output storage object
|
|
|
|
|
2020-07-15 17:41:06 +00:00
|
|
|
bins = Hash.new
|
|
|
|
csv.each { |bin| bins[bin[0].to_sym] = bin[1..].compact }
|
|
|
|
|
2020-07-15 17:28:27 +00:00
|
|
|
#bin_counter expects:
|
|
|
|
# bin - a hash with strings to search for
|
|
|
|
# test - the document text to search through
|
|
|
|
#This method returns a hash containg the strings and their frequency
|
|
|
|
def bin_counter (bin, text)
|
|
|
|
ret = Hash.new
|
|
|
|
bin.each do |word|
|
|
|
|
ret[word.to_sym] = text.scan(word).count.to_s
|
|
|
|
end
|
|
|
|
return ret
|
|
|
|
end
|
|
|
|
|
|
|
|
#count_total expects:
|
|
|
|
# bin - a hash created by bin_counter
|
|
|
|
#This method returns a hash with the total count of all words in a bin
|
|
|
|
def count_total (bin)
|
|
|
|
count = 0
|
|
|
|
bin[:words].each_key do |word|
|
|
|
|
count += bin[:words][word].to_i
|
|
|
|
end
|
|
|
|
return count
|
|
|
|
end
|
|
|
|
|
|
|
|
#write_output expects:
|
|
|
|
# output - a hash containing all of our output
|
|
|
|
#This method converts the output hash to JSON and writes it to output.json
|
|
|
|
def write_output (output)
|
|
|
|
outfile = File.open('./output.json','w')
|
|
|
|
outfile.write(output.to_json)
|
|
|
|
outfile.close
|
|
|
|
end
|
|
|
|
|
|
|
|
bins.each_key do |bin_number|
|
|
|
|
key = bin_number.to_sym
|
|
|
|
output[key] = Hash.new
|
|
|
|
output[key][:words] = bin_counter(bins[bin_number], text)
|
|
|
|
output[key][:total] = count_total(output[key])
|
|
|
|
end
|
|
|
|
write_output(output)
|
|
|
|
|