This commit is contained in:
Ignacio Serantes
2026-05-09 10:26:57 +02:00
parent 3fb55ee4f3
commit 6207cab27a
7 changed files with 622 additions and 177 deletions

View File

@@ -10,9 +10,136 @@ import sys
from pathlib import Path
from typing import Dict, Any, Iterator, Optional, Union
from baloo_tools import get_resolution
from baloo_tools import (get_info, get_tags)
from bagheera_query_parser_lib import parse_date
from pyparsing import (
alphanums, one_of, infix_notation,
Group, opAssoc, ParserElement, QuotedString, Word
)
ParserElement.enable_packrat()
def expression_contains_property(text):
pattern = r"\b(?!tags\b)\w+[ \t]*(?:>=|<=|!=|=|>|<|:)"
return bool(re.search(pattern, text, re.IGNORECASE))
def expression_contains_tags(text):
pattern = r"\btags\b[ \t]*(?:>=|<=|!=|=|>|<|:)"
return bool(re.search(pattern, text, re.IGNORECASE))
class EvaluateExpression:
def __init__(self):
# Pre-define the grammar structure during initialization
self.grammar = self._build_grammar()
def _compare_single(self, l_val, op, r_val):
"""
Atomic comparison logic for individual values.
Handles numeric conversion and standard operators.
"""
# Numeric conversion for mathematical operators
if op in (">", "<", ">=", "<="):
try:
# Attempt to treat both sides as floats
curr_l, curr_r = float(l_val), float(r_val)
except (ValueError, TypeError):
# Fallback to string comparison if conversion fails
curr_l, curr_r = str(l_val), str(r_val)
else:
# Default to string representation for other operators
curr_l, curr_r = str(l_val), str(r_val)
# Standard operator logic
if op == "=":
return l_val == r_val
if op == "!=":
return l_val != r_val
if op == ">":
return curr_l > curr_r
if op == "<":
return curr_l < curr_r
if op == ">=":
return curr_l >= curr_r
if op == "<=":
return curr_l <= curr_r
if op == ":":
return str(r_val).lower() in str(l_val).lower()
return False
def _compare(self, data, left_key, op, right_val):
"""
Main comparison router. Checks if the field is a list or a single value.
"""
# Normalize data keys to lowercase for case-insensitive lookup
normalized_data = {k.lower(): v for k, v in data.items()}
# Extract the left-hand value (the field from the JSON)
l_val = normalized_data.get(left_key.lower(), left_key)
# Extract the right-hand value (check if it's a literal or another field)
r_val = normalized_data.get(str(right_val).lower(), right_val)
# IF THE FIELD VALUE IS A LIST
if isinstance(l_val, list):
# Return True if ANY item in the list satisfies the condition
return any(self._compare_single(item, op, r_val) for item in l_val)
# IF THE FIELD VALUE IS A SINGLE DATA POINT
return self._compare_single(l_val, op, r_val)
def _build_grammar(self):
"""
Defines the pyparsing grammar for the expression engine.
"""
operators = one_of(">= <= != = > < :")
identifier = Word(alphanums + "_./\\")
quoted_string = QuotedString("'") | QuotedString('"')
operand = quoted_string | identifier
# Define basic condition (e.g., "width > 100" or "word")
condition = Group((operand + operators + operand) | operand)
# Attach the parse action to convert tokens into executable functions (lambdas)
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
return infix_notation(
condition,
[
("NOT", 1, opAssoc.RIGHT, lambda t: (
lambda data: not t[0][1](data))),
("AND", 2, opAssoc.LEFT, lambda t: (
lambda data: all(f(data) for f in t[0] if callable(f)))),
("OR", 2, opAssoc.LEFT, lambda t: (
lambda data: any(f(data) for f in t[0] if callable(f)))),
],
)
def _create_evaluator_func(self, tokens):
"""
Creates a closure that captures tokens and waits for the data dictionary.
"""
if len(tokens) == 1:
# Rule: Single term -> path CONTAINS term
return lambda data: self._compare(data, 'path', ':', tokens[0])
else:
# Rule: Explicit triplet (key, operator, value)
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
def compile(self, expression):
"""
Parses the expression once and returns a reusable function.
"""
try:
return self.grammar.parse_string(expression, parse_all=True)[0]
except Exception as e:
print(f"Compilation Error: {e}")
# Fallback: return a function that always fails gracefully
return lambda data: False
class BagheeraSearcher:
"""Class to handle Baloo searches and interact with the C wrapper."""
@@ -69,84 +196,8 @@ class BagheeraSearcher:
return lib
def check_keywords(
self, text: str, query: str, file_path: str = "", file_id: int = 0
) -> bool:
"""
Evaluates if a text meets a logical query.
Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes.
"""
if file_path:
try:
w, h = get_resolution(file_id)
except Exception:
w, h = -1, -1
def replace_dim(match: re.Match) -> str:
if w <= 0 or h <= 0:
return "__false__"
s = match.group(0).upper()
if "PORTRAIT" in s:
return "__true__" if w < h else "__false__"
if "LANDSCAPE" in s:
return "__true__" if w > h else "__false__"
if "SQUARE" in s:
return "__true__" if w == h else "__false__"
op = match.group(1)
ops_map = {
"=": w == h,
">": w > h,
"<": w < h,
">=": w >= h,
"<=": w <= h,
"!=": w != h,
}
return "__true__" if ops_map.get(op, False) else "__false__"
query = re.sub(
r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
query = re.sub(
r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
text = text.lower()
query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query)
tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query)
regex_parts = []
for t in tokens:
if t in ("(", ")"):
regex_parts.append(t)
elif t == "OR":
regex_parts.append("|")
elif t == "AND":
continue
elif t == "__true__":
regex_parts.append("(?=.*)")
elif t == "__false__":
regex_parts.append("(?!)")
else:
regex_parts.append(rf"(?=.*{re.escape(t)})")
final_regex = "".join(regex_parts).lower()
try:
return bool(re.search(f"^{final_regex}.*", text, re.DOTALL))
except re.error:
return False
def get_baloo_info(self, file_path: str) -> Dict[str, str]:
"""Retrieves properties for a specific file from Baloo."""
"""Extract properties for a specific file directly from file."""
result = self.baloo_lib.get_file_properties(file_path.encode("utf-8"))
if not result:
return {}
@@ -181,6 +232,8 @@ class BagheeraSearcher:
options: Dict[str, Any],
search_opts: Dict[str, Any],
files_count: int,
exclude_evaluator: Any,
exclude_sources: Dict[str, bool]
) -> Iterator[Dict[str, Any]]:
"""Executes a recursive search yielded item by item."""
options["query"] = query_text
@@ -195,15 +248,20 @@ class BagheeraSearcher:
continue
self.ids_processed.add(file_id)
rec_exclude = search_opts.get("recursive_exclude")
if not rec_exclude or not self.check_keywords(
item["path"], rec_exclude, item["path"], file_id
):
if exclude_evaluator:
file_info = {'path': item["path"]}
if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):
file_info = file_info | get_tags(file_id)
else:
file_info = None
if not file_info or not exclude_evaluator(file_info):
if files_count >= search_opts.get("offset", 0):
search_opts["limit"] -= 1
yield item
files_count += 1
def search(
@@ -215,6 +273,30 @@ class BagheeraSearcher:
"""
Main search generator. Yields file dictionaries.
"""
if search_opts['exclude']:
ee = EvaluateExpression()
exclude_evaluator = ee.compile(search_opts['exclude'])
exclude_sources = {}
if expression_contains_property(search_opts['exclude']):
exclude_sources['properties'] = True
if expression_contains_tags(search_opts['exclude']):
exclude_sources['tags'] = True
else:
exclude_evaluator = None
exclude_sources = {}
if search_opts['recursive_exclude']:
ee = EvaluateExpression()
recurse_exclude_evaluator = ee.compile(search_opts['recursive_exclude'])
recurse_exclude_sources = {}
if expression_contains_property(search_opts['recursive_exclude']):
recurse_exclude_sources['properties'] = True
if expression_contains_tags(search_opts['recursive_exclude']):
recurse_exclude_sources['tags'] = True
else:
recurse_exclude_evaluator = None
recurse_exclude_sources = {}
main_options["query"] = parse_date(query_text)
files = self._execute_query(main_options)
@@ -241,15 +323,22 @@ class BagheeraSearcher:
continue
self.ids_processed.add(file_id)
exclude_pattern = search_opts.get("exclude")
if not exclude_pattern or not self.check_keywords(
item["path"], exclude_pattern, item["path"], file_id
):
if exclude_evaluator:
file_info = {'path': item["path"]}
if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):
file_info = file_info | get_tags(file_id)
else:
file_info = None
if not file_info or not exclude_evaluator(file_info):
if is_recursive:
main_options["directory"] = item["path"]
yield from self.search_recursive(
query_text, main_options, search_opts, files_count
query_text, main_options, search_opts, files_count,
recurse_exclude_evaluator, recurse_exclude_sources
)
else:
yield item