file_path = 'My Clippings.txt'
 
# FUNCTION #
def parse_clippings(file_path):
    clippings = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        current_clip = {}
        for line in lines:
            if line:
                if '=====' in line:  # Đánh dấu kết thúc của một trích dẫn
                    if current_clip:
                        clippings.append(current_clip)
                        current_clip = {}
                else:
                    if 'Your Highlight' in line or 'Your Bookmark' in line:  # Bắt đầu của một trích dẫn
                        current_clip['loc'] = line + '\nDATA:\n'
                    elif 'Location' in line:  # Vị trí trong sách
                        current_clip['location'] = line
                    elif 'Added on' in line:  # Ngày thêm trích dẫn
                        current_clip['date'] = line
                    else:  # Nội dung trích dẫn
                        if 'content' in current_clip:
                            current_clip['content'] += line
                        else:
                            current_clip['content'] = line
                    #print(current_clip)
    return clippings
 
def get_content(file_path, target_word):
    contents = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if '===' not in line and 'Your' not in line:
                contents.append(line)
    return contents
 
def remove_duplicates(arr):
    return list(set(arr))
 
def count_lines(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        line_count = sum(1 for line in file)
    return line_count
 
def count_lines_with_string(file_path, search_string):
    count = 0
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            if search_string in line:
                count += 1
    return count
 
def count_duplicates(arr):
    duplicates_dict = {}
    for item in arr:
        if item in duplicates_dict:
            duplicates_dict[item] += 1
        else:
            duplicates_dict[item] = 1
    return duplicates_dict
 
def remove_empty_items(duplicates_dict):
    keys_to_remove = [key for key, value in duplicates_dict.items() if not value]
    for key in keys_to_remove:
        duplicates_dict.pop(key)
 
 
def print_clippings_for_book(file_path, book_title, num_clippings=10):
    #book_title = 'at location' #for debug only
    total_lines = count_lines_with_string(file_path, book_title)
    print("[*] Tổng số note:" + str(total_lines)+'\n')
    if num_clippings == 0:
        num_clippings = total_lines
    else:
        num_clippings = num_clippings+1
    #print(num_clippings)
    clippings = parse_clippings(file_path)
    count = 0
 
    for clipping in clippings:
        data = clipping['content']
        if count >= num_clippings:
            break
        try:
            if book_title.lower() in data.lower() and 'DATA' in clipping['loc']:
                if data.strip() != '\n':
                    sentences = data.split('\n')
                    location_part = clipping['loc'].split('|')[0].strip()
                    location = location_part.split('location')[1].strip()
                    print('[' + str(count) + '] '  + sentences[2].strip() + ' *LOC: '+str(location))
                    print()
                    count+=1
        except Exception as e:
            print("Đã xảy ra một ngoại lệ:", e)
 
## BEGIN ##
ct = get_content(file_path, 'at location')
duplicates_dict = count_duplicates(ct)
remove_empty_items(duplicates_dict)
print('--------------------------------------------')
print("[*] Thông tin về Sách và số lượng note:\n")
for i,v in duplicates_dict.items():
    if v >2 and len(i)>1:
            print(f"{i.strip()}: {v} notes")
print('--------------------------------------------\n')
book_title = input("[*] Nhập tựa đề sách cần tra cứu: ") or 'nha_gia_kim'
num_clippings = int(input("[*] Nhập số lượng note: ")) or 10
print()
print("--- Đang tra cứu thông tin ---\n")
print('[*] Danh sách note từ cuốn ' + book_title)
print_clippings_for_book(file_path, book_title, num_clippings)
print("--- Hoàn tất tra cứu ---\n")
print('** Done **')
thoat = input("Nhập 0 để thoát ==> ")
if thoat == 0:
    exit