added split_text() and --type option
parent
96740f161a
commit
89bb9cf027
27
sorter.rb
27
sorter.rb
|
@ -5,7 +5,7 @@
|
||||||
#Current Usage: #
|
#Current Usage: #
|
||||||
# Scans a file for groups of words and counts the totals #
|
# Scans a file for groups of words and counts the totals #
|
||||||
# Input file: ./tester.txt #
|
# Input file: ./tester.txt #
|
||||||
# Wroud group file: ./bins.json #
|
# Word group file: ./bins.json #
|
||||||
# Output file: ./output.json #
|
# Output file: ./output.json #
|
||||||
# #
|
# #
|
||||||
# The input file is human readable, easy to edit. #
|
# The input file is human readable, easy to edit. #
|
||||||
|
@ -13,7 +13,7 @@
|
||||||
# #
|
# #
|
||||||
#To Do: #
|
#To Do: #
|
||||||
# * use STDIN to accept file to scan #
|
# * use STDIN to accept file to scan #
|
||||||
# * Support scaning multiple files #
|
# * Support scanning multiple files #
|
||||||
# * Output to csv #
|
# * Output to csv #
|
||||||
# * Strip out header and footer text #
|
# * Strip out header and footer text #
|
||||||
############################################################################
|
############################################################################
|
||||||
|
@ -25,7 +25,7 @@ require 'optparse'
|
||||||
#bin_counter expects:
|
#bin_counter expects:
|
||||||
# bin - a hash with strings to search for
|
# bin - a hash with strings to search for
|
||||||
# test - the document text to search through
|
# test - the document text to search through
|
||||||
#This method returns a hash containg the strings and their frequency
|
#This method returns a hash containing the strings and their frequency
|
||||||
def bin_counter (bin, text)
|
def bin_counter (bin, text)
|
||||||
ret = Hash.new
|
ret = Hash.new
|
||||||
bin.each do |word|
|
bin.each do |word|
|
||||||
|
@ -63,18 +63,36 @@ def strip_text (text, start, fin)
|
||||||
text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0]
|
text.lines(start,chomp: true)[1].lines(fin,chomp: true)[0]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def split_text (text, start, fin)
|
||||||
|
split1 = text.lines(start, chomp: true)[1..-1]
|
||||||
|
split2 = []
|
||||||
|
ret = ""
|
||||||
|
split1.each do |section|
|
||||||
|
split2 += section.lines(fin, chomp: true)
|
||||||
|
end
|
||||||
|
split2.each do |section|
|
||||||
|
ret += section
|
||||||
|
end
|
||||||
|
return ret
|
||||||
|
end
|
||||||
|
|
||||||
options = Hash.new
|
options = Hash.new
|
||||||
OptionParser.new do |opts|
|
OptionParser.new do |opts|
|
||||||
opts.banner = 'sorter.rb --options'
|
opts.banner = 'sorter.rb --options'
|
||||||
opts.on("-f", "--file file", "Name of the file to process") do |file|
|
opts.on("-f", "--file file", "Name of the file to process") do |file|
|
||||||
options[:file] = file
|
options[:file] = file
|
||||||
end
|
end
|
||||||
|
opts.on("-t", "--type type", "Type of file. Must be \"iat\" or \"pn\"") do |type|
|
||||||
|
options[:type] = type
|
||||||
|
end
|
||||||
end.parse!
|
end.parse!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
csv = CSV.read('./bins.csv')
|
csv = CSV.read('./bins.csv')
|
||||||
text = File.read(options[:file])
|
text = File.read(options[:file])
|
||||||
text = strip_text(text,'PLOVEINTAKE','PLOVECLOSING')
|
text = strip_text(text, 'PLOVEINTAKE', 'PLOVECLOSING') if options[:type] == 'iat'
|
||||||
|
text = split_text(text, 'Narrative:', 'Signatures:') if options[:type] == 'pn'
|
||||||
output = Hash.new #Creating the output storage object
|
output = Hash.new #Creating the output storage object
|
||||||
bins = Hash.new #This hash stores the bins
|
bins = Hash.new #This hash stores the bins
|
||||||
outfile = options[:file] + '-out.json'
|
outfile = options[:file] + '-out.json'
|
||||||
|
@ -90,3 +108,4 @@ bins.each_key do |bin_number|
|
||||||
end
|
end
|
||||||
write_output(output,outfile)
|
write_output(output,outfile)
|
||||||
|
|
||||||
|
puts text
|
||||||
|
|
Loading…
Reference in New Issue