turned frequency into a string for json

fixed NaN error when dividing by 0
added threading to file processing
2020-11-22 15:24:29 -05:00 · 2020-11-22 15:18:37 -05:00 · 2020-11-21 19:11:29 -05:00
2 changed files with 15 additions and 21 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +0,0 @@
 test-data/*
 master.csv
 bins.csv
--- a/sorter.rb
+++ b/sorter.rb
@ -47,14 +47,9 @@ end
 # output - a hash containing all of our output
 #This method converts the output hash to JSON and writes it to "output.json"
 def write_output_json (output, filename)
  begin
  outfile = File.open(filename,'w')
  outfile.write(output.to_json)
  outfile.close
  rescue
    pp output
    abort
  end
 end
 def write_output_csv (output, filename)
@ -152,13 +147,8 @@ def process_file (file_name, binfile, type)
    sections = text.lines("Date and time:", chomp: true) #sections is an arrary of each date section from the text
    sections.delete_at(0) #we can ignore the first chunk of text
    sections.each do |chunk|
      begin
      timestamp = chunk.lines.delete_if {|line| line == "\r\n"}[0] #pulling out the timestamp
-        timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with unserscores
+      timestamp.tr!('/','-').tr!(':','').tr!(' ','_') #remove slashes and colons from timestamp, replaces spaces with underscores
      rescue
        pp timestamp
        abort
      end
      timestamp.strip!
      output = Hash.new #Creating the output storage object
      outfile = file_name + '_' + timestamp
@ -171,7 +161,10 @@ def process_file (file_name, binfile, type)
        output[key] = Hash.new
        output[key][:words] = bin_counter(bins[bin_number], text)
        output[key][:total] = count_total(output[key])
-        output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f
+        output[key][:frequency] = 0
        #output[key][:frequency] = output[key][:total].to_f / output[:total_words].to_f if output[[:total_words] != 0
        freq = output[key][:total].to_f / output[:total_words].to_f
        output[key][:frequency] = freq.to_s
      end
      write_output_json(output,outfile + '-out.json')
      write_output_csv(output,outfile + '-out.csv')
@ -186,11 +179,15 @@ end
 #
 #This method will process all .txt files in the supplied directory
 def process_dir(dir_name, binfile, type)
  threads = []
  Dir.glob(dir_name + '*.txt') do |file_name|
    threads << Thread.new do
      puts "Processing " + file_name
      process_file(file_name, binfile, type)
    end
  end
  threads.each { |thr| thr.join }
 end
 def generate_master_output(dir_name, binfile)
  file=File.open(binfile,"r")
Author	SHA1	Message	Date
Jeff Yates	e2aaa7a2b5	turned frequency into a string for json	2020-11-22 15:24:29 -05:00
Jeff Yates	fa39e4d040	fixed NaN error when dividing by 0	2020-11-22 15:18:37 -05:00
Jeff Yates	ddb4003e66	added threading to file processing	2020-11-21 19:11:29 -05:00