diff --git a/sorter.rb b/sorter.rb new file mode 100755 index 0000000..ffc9c6d --- /dev/null +++ b/sorter.rb @@ -0,0 +1,67 @@ +#!/bin/env ruby +############################################################################ +#Word sorter for Pat's dissertation # +#More documentation TBD # +#Current Usage: # +# Scans a file for groups of words and counts the totals # +# Input file: ./tester.txt # +# Wroud group file: ./bins.json # +# Output file: ./output.json # +# # +# The input file is human readable, easy to edit. # +# The output file is single-line JSON, use jq to format for reading. # +# # +#To Do: # +# * use STDIN to accept file to scan # +# * Support scaning multiple files # +# * Output to csv # +# * Strip out header and footer text # +############################################################################ +require 'json' +require 'pp' + +json = File.read('./bins.json') +text = File.read('tester.txt') +bins = JSON.parse(json) #Turn bins.json into a hash +output = Hash.new #Creating the output storage object + +#bin_counter expects: +# bin - a hash with strings to search for +# test - the document text to search through +#This method returns a hash containg the strings and their frequency +def bin_counter (bin, text) + ret = Hash.new + bin.each do |word| + ret[word.to_sym] = text.scan(word).count.to_s + end + return ret +end + +#count_total expects: +# bin - a hash created by bin_counter +#This method returns a hash with the total count of all words in a bin +def count_total (bin) + count = 0 + bin[:words].each_key do |word| + count += bin[:words][word].to_i + end + return count +end + +#write_output expects: +# output - a hash containing all of our output +#This method converts the output hash to JSON and writes it to output.json +def write_output (output) + outfile = File.open('./output.json','w') + outfile.write(output.to_json) + outfile.close +end + +bins.each_key do |bin_number| + key = bin_number.to_sym + output[key] = Hash.new + output[key][:words] = bin_counter(bins[bin_number], text) + output[key][:total] = count_total(output[key]) +end +write_output(output) +