Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
Jeff Yates | e2aaa7a2b5 | |
Jeff Yates | fa39e4d040 | |
Jeff Yates | ddb4003e66 |
|
@ -1,3 +0,0 @@
|
||||||
test-data/*
|
|
||||||
master.csv
|
|
||||||
bins.csv
|
|
33
sorter.rb
33
sorter.rb
|
@ -47,14 +47,9 @@ end
|
||||||
# output - a hash containing all of our output
|
# output - a hash containing all of our output
|
||||||
#This method converts the output hash to JSON and writes it to "output.json"
|
#This method converts the output hash to JSON and writes it to "output.json"
|
||||||
def write_output_json (output, filename)
|
def write_output_json (output, filename)
|
||||||
begin
|
outfile = File.open(filename,'w')
|
||||||
outfile = File.open(filename,'w')
|
outfile.write(output.to_json)
|
||||||
outfile.write(output.to_json)
|
outfile.close
|
||||||
outfile.close
|
|
||||||
rescue
|
|
||||||
pp output
|
|
||||||
abort
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def write_output_csv (output, filename)
|
def write_output_csv (output, filename)
|
||||||
|
@ -152,13 +147,8 @@ def process_file (file_name, binfile, type)
|
||||||
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
|
sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
|
||||||
sections.delete_at(0) #we can ignore the first chunk of text
|
sections.delete_at(0) #we can ignore the first chunk of text
|
||||||
sections.each do |chunk|
|
sections.each do |chunk|
|
||||||
begin
|
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
|
||||||
timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
|
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with underscores
|
||||||
timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
|
|
||||||
rescue
|
|
||||||
pp timestamp
|
|
||||||
abort
|
|
||||||
end
|
|
||||||
timestamp.strip!
|
timestamp.strip!
|
||||||
output = Hash.new #Creating the output storage object
|
output = Hash.new #Creating the output storage object
|
||||||
outfile = file_name + '_' + timestamp
|
outfile = file_name + '_' + timestamp
|
||||||
|
@ -171,7 +161,10 @@ def process_file (file_name, binfile, type)
|
||||||
output[key] = Hash.new
|
output[key] = Hash.new
|
||||||
output[key][:words] = bin_counter(bins[bin_number], text)
|
output[key][:words] = bin_counter(bins[bin_number], text)
|
||||||
output[key][:total] = count_total(output[key])
|
output[key][:total] = count_total(output[key])
|
||||||
output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
|
output[key][:frequency] = 0
|
||||||
|
#output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f if output[[:total_words] != 0
|
||||||
|
freq = output[key][:total].to_f / output[:total_words].to_f
|
||||||
|
output[key][:frequency] = freq.to_s
|
||||||
end
|
end
|
||||||
write_output_json(output,outfile + '-out.json')
|
write_output_json(output,outfile + '-out.json')
|
||||||
write_output_csv(output,outfile + '-out.csv')
|
write_output_csv(output,outfile + '-out.csv')
|
||||||
|
@ -186,10 +179,14 @@ end
|
||||||
#
|
#
|
||||||
#This method will process all .txt files in the supplied directory
|
#This method will process all .txt files in the supplied directory
|
||||||
def process_dir(dir_name, binfile, type)
|
def process_dir(dir_name, binfile, type)
|
||||||
|
threads = []
|
||||||
Dir.glob(dir_name + '*.txt') do |file_name|
|
Dir.glob(dir_name + '*.txt') do |file_name|
|
||||||
puts "Processing " + file_name
|
threads << Thread.new do
|
||||||
process_file(file_name, binfile, type)
|
puts "Processing " + file_name
|
||||||
|
process_file(file_name, binfile, type)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
threads.each { |thr| thr.join }
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_master_output(dir_name, binfile)
|
def generate_master_output(dir_name, binfile)
|
||||||
|
|
Loading…
Reference in New Issue