added .gitignore

added code to catch error in timestamp
fixed missing end
2020-11-27 09:36:08 -05:00 · 2020-11-22 15:58:47 -05:00 · 2020-11-22 15:37:46 -05:00 · 2020-11-22 15:37:03 -05:00 · 2020-11-22 15:29:25 -05:00
2 changed files with 21 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 test-data/*
 master.csv
 bins.csv
--- a/sorter.rb
+++ b/sorter.rb
@ -47,9 +47,14 @@ end
 # output - a hash containing all of our output
 #This method converts the output hash to JSON and writes it to "output.json"
 def write_output_json (output, filename)
-  outfile = File.open(filename,'w')
+  begin
-  outfile.write(output.to_json)
+    outfile = File.open(filename,'w')
-  outfile.close
+    outfile.write(output.to_json)
    outfile.close
  rescue
    pp output
    abort
  end
 end
 def write_output_csv (output, filename)
@ -147,8 +152,13 @@ def process_file (file_name, binfile, type)
    sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
    sections.delete_at(0) #we can ignore the first chunk of text
    sections.each do |chunk|
-      timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
+      begin
-      timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with underscores
+        timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
        timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
      rescue
        pp timestamp
        abort
      end
      timestamp.strip!
      output = Hash.new #Creating the output storage object
      outfile = file_name + '_' + timestamp
@ -161,10 +171,7 @@ def process_file (file_name, binfile, type)
        output[key] = Hash.new
        output[key][:words] = bin_counter(bins[bin_number], text)
        output[key][:total] = count_total(output[key])
-        output[key][:frequency] = 0
+        output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
        #output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f if output[[:total_words] != 0
        freq = output[key][:total].to_f / output[:total_words].to_f
        output[key][:frequency] = freq.to_s
      end
      write_output_json(output,outfile + '-out.json')
      write_output_csv(output,outfile + '-out.csv')
@ -179,14 +186,10 @@ end
 #
 #This method will process all .txt files in the supplied directory
 def process_dir(dir_name, binfile, type)
  threads = []
  Dir.glob(dir_name + '*.txt') do |file_name|
-    threads << Thread.new do
+    puts "Processing " + file_name
-      puts "Processing " + file_name
+    process_file(file_name, binfile, type)
      process_file(file_name, binfile, type)
    end
  end
  threads.each { |thr| thr.join }
 end
 def generate_master_output(dir_name, binfile)
Author	SHA1	Message	Date
Jeff Yates	ecd02eea4f	added .gitignore	2020-11-27 09:36:08 -05:00
Jeff Yates	1654510a0f	added code to catch error in timestamp	2020-11-22 15:58:47 -05:00
Jeff Yates	f02c088201	fixed missing end	2020-11-22 15:37:46 -05:00
Jeff Yates	a0abcac6e0	error handling for writing json	2020-11-22 15:37:03 -05:00
Jeff Yates	cd7adff9cf	print output hash for debugging	2020-11-22 15:29:25 -05:00