#!/usr/bin/python3
#Words Of Wisdom
# output some random text from some given collection of files, 
#   - primarily used to grab some random "words of wisdom" from my journals and writings

import os
import sys
import time
import filetype #for filetype (extension and mime type)
import chardet #for getting encoding
import subprocess
import random
import getopt

total_text = ''


#can we parse text out of this file or is it a libreoffice file? if so, return the contents
def attemptReadSampleFile(filepath):
    if v: print('checking {}'.format(filepath))
    if os.path.isfile(filepath):
        f = open(filepath, 'rb')
        ftype = filetype.guess(filepath)
        if v: print('filetype: {}'.format(str(ftype)))
        if ftype != None:
            if ftype.extension in ['py', 'c', 'cc', 'h', 'hh', 'java', 'rst', 'css', 'html', 'htm', 'js', 'php', 'sh']: #don't want code in the sample data
                if v: print('this file is code')
                return None
            if ftype.extension == 'odt' and filepath[len(filepath)-1] != '#': #openoffice doc and not a lock file
                subproc = subprocess.run(['odt2txt', filepath], encoding='utf-8', stdout=subprocess.PIPE)
                return subproc.stdout
            if ftype.extension == 'txt':
                print('AYO!')
                return str(f.read(), encoding='utf-8')
        else:
            fb = f.read() #file data (bytes) to detect encoding
            enc = str(chardet.detect(fb)['encoding'])
            if v: print('encoding: {}'.format(enc))
            if enc in ['ascii','utf-8']:
                return str(fb, encoding=enc)
            else:
                return None
    else: 
            if v: print('not file')
            return None


#paths=['/home/thomas/doc/fiction']
paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text
samplefiles=[] #the paths of the individual files from which we want to grab text
matchpattern='' #if we want to filter the files by some text pattern
time_min = -1 #threshold time. dont use files that are older

ags,vals = getopt.getopt(sys.argv[1:], 'h:e:')
for a,v in ags:
    if a == '-h':
        print('TODO usage info')
    if a == '-e': #selected file must match some text pattern
        matchpattern = v  #TODO      
    if a == '-t':
        time_min = int(v) #TODO

if len(sys.argv) > 1:
    paths = sys.argv[1:]


v=False #verbose 
tStart = time.time()
for p in paths:
        if v: print('path {}'.format(p))
        if os.path.isdir(p):
                for root,dirs,files in os.walk(p):
                        if v: print('walk {}: {} files, {} dirs'.format(root, len(files), len(dirs)))
                        for f in files:
                            samplefiles.append(root + '/' + f)
        else:
               samplefiles.append(p)

tEnd = time.time()
tDuration = tEnd - tStart
print('gathered {} candidate files in {} seconds, from paths {}'.format(len(samplefiles), tDuration, str(paths)))

#pick random file until we get an acceptable one
fi = random.randint(0, len(samplefiles))
t = attemptReadSampleFile(samplefiles[fi])
while t == None:
    del samplefiles[fi]
    fi = random.randint(0, len(samplefiles))
    t = attemptReadSampleFile(samplefiles[fi])

print()
mt = time.ctime(os.path.getmtime(samplefiles[fi]))
print('{} ;\n       last modified {} :\n  {}'.format(samplefiles[fi], mt, t))

lines = t.splitlines()
li = random.randint(0, len(t)) #line index
#ci = random.randint(0, len(t)) #character index
res = '\n'.join(lines[li: li+7])
print(res)