#!/bin/env ruby ############################################################################ #Word sorter for Pat's dissertation # #More documentation TBD # #Current Usage: # # Scans a file for groups of words and counts the totals # # Input file: ./tester.txt # # Wroud group file: ./bins.json # # Output file: ./output.json # # # # The input file is human readable, easy to edit. # # The output file is single-line JSON, use jq to format for reading. # # # #To Do: # # * use STDIN to accept file to scan # # * Support scaning multiple files # # * Output to csv # # * Strip out header and footer text # ############################################################################ require 'json' require 'pp' require 'csv' #json = File.read('./bins.json') csv = CSV.read('./bins.csv') text = File.read('tester.txt') #bins = JSON.parse(json) #Turn bins.json into a hash output = Hash.new #Creating the output storage object bins = Hash.new csv.each { |bin| bins[bin[0].to_sym] = bin[1..].compact } #bin_counter expects: # bin - a hash with strings to search for # test - the document text to search through #This method returns a hash containg the strings and their frequency def bin_counter (bin, text) ret = Hash.new bin.each do |word| ret[word.to_sym] = text.scan(word).count.to_s end return ret end #count_total expects: # bin - a hash created by bin_counter #This method returns a hash with the total count of all words in a bin def count_total (bin) count = 0 bin[:words].each_key do |word| count += bin[:words][word].to_i end return count end #write_output expects: # output - a hash containing all of our output #This method converts the output hash to JSON and writes it to output.json def write_output (output) outfile = File.open('./output.json','w') outfile.write(output.to_json) outfile.close end bins.each_key do |bin_number| key = bin_number.to_sym output[key] = Hash.new output[key][:words] = bin_counter(bins[bin_number], text) output[key][:total] = count_total(output[key]) end write_output(output)