First commit

This commit is contained in:
Ignacio Serantes
2026-03-22 18:13:22 +01:00
commit 3fb55ee4f3
19 changed files with 1928 additions and 0 deletions

88
README.md Normal file
View File

@@ -0,0 +1,88 @@
# Bagheera Search Tool
Bagheera is an advanced search utility and library for the **KDE Baloo** indexing service. It provides a flexible Python interface to perform logical queries, recursive searches, and metadata retrieval, specifically optimized for **KDE Frameworks 6 (KF6)**.
## Features
* **Modular Architecture**: Use it as a standalone CLI tool or as a Python library (`BagheeraSearcher`).
* **KF6 Native**: Compiled against `KF6Baloo` and `KF6CoreAddons` using `pkg-config` for robust path detection.
* **Natural Language Dates**: Supports queries like `MODIFIED YESTERDAY` or `MODIFIED LAST THREE WEEKS`.
* **Advanced Logic**: Complex filtering with `AND`, `OR`, and parenthesis, plus image dimension checks (`PORTRAIT`, `LANDSCAPE`, `SQUARE`).
* **Persistence**: Automatically remembers the last used `--sort` order in user configuration.
## Prerequisites
Before installing, ensure your system has the following KF6 development libraries and tools:
### Arch Linux
```bash
sudo pacman -S baloo6 kcoreaddons6 pkgconf gcc
```
### Fedora
```bash
sudo dnf install kf6-baloo-devel kf6-kcoreaddons-devel pkgconf-pkg-config gcc
```
### openSuSE
```bash
zypper install baloo6-6 kcoreaddons6-dev pkgconf gcc
```
### Ubuntu / Debian (Testing/Unstable)
```bash
sudo apt install libkf6baloo-dev libkf6coreaddons-dev pkg-config gcc
```
## Installation
1. Clone the repository and ensure your C wrapper source (baloo_wrapper.c) is present in the root directory.
2. Install via pip:
This will automatically trigger the compilation of the C wrapper and install Python dependencies like lmdb.
```bash
pip install .
```
## CLI Usage
The command bagheerasearch will be available after installation.
```bash
# Search for images modified this week
bagheerasearch --type image "MODIFIED THIS WEEK"
# Recursive search in a specific directory excluding portraits
bagheerasearch -d ~/Pictures -r "nature" --exclude "PORTRAIT"
# Show file IDs and use Konsole-friendly output
bagheerasearch "vacation" -i -k
```
## Library Usage
You can integrate Bagheera into your own projects:
```Python
from bagheera_search_lib import BagheeraSearcher
searcher = BagheeraSearcher()
results = searcher.search("MODIFIED TODAY", {"limit": 10}, {"recursive": None})
for item in results:
print(f"Found: {item['path']}")
```
## Configuration
User settings (like the last used sort order) are stored in:
~/.config/bagheerasearch/config.json
## License
Copyright (C) 2026 by Ignacio Serantes.

View File

@@ -0,0 +1,6 @@
from .bagheera_query_parser import BagheeraQueryParser
def parse_date(query):
parser = BagheeraQueryParser()
return parser.parse_date(query)

View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python
"""
Bagheera Query Parser
Converts natural language English date expressions into Baloo-compatible queries.
"""
import calendar
import re
from datetime import datetime, timedelta
from typing import Dict, Optional
class BagheeraQueryParser:
# Compile regex for number conversion once
NUMBER_MAP: Dict[str, int] = {
'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5,
'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10,
'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14,
'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18,
'NINETEEN': 19, 'TWENTY': 20
}
def __init__(self):
# Initialize today, but it will be refreshed on each parse_date call
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
def _convert_numbers(self, query: str) -> str:
"""
Replaces written numbers (ONE to TWENTY) with their numeric string equivalent.
Case insensitive.
"""
pattern = r'\b(' + '|'.join(self.NUMBER_MAP.keys()) + r')\b'
def replace(match):
key = match.group(0).upper()
return str(self.NUMBER_MAP.get(key, key))
return re.sub(pattern, replace, query, flags=re.IGNORECASE)
def _safe_replace_date(self, dt: datetime, year: Optional[int] = None,
month: Optional[int] = None,
day: Optional[int] = None) -> datetime:
"""Handles date replacement safely (e.g., Feb 29 on non-leap years)."""
try:
return dt.replace(
year=year if year is not None else dt.year,
month=month if month is not None else dt.month,
day=day if day is not None else dt.day
)
except ValueError:
# Likely Feb 29 issue, fallback to day 28
return dt.replace(
year=year if year is not None else dt.year,
month=month if month is not None else dt.month,
day=28
)
def _add_months(self, dt: datetime, months: int) -> datetime:
"""Robust month addition/subtraction."""
month = dt.month - 1 + months
year = dt.year + month // 12
month = month % 12 + 1
day = min(dt.day, calendar.monthrange(year, month)[1])
return dt.replace(year=year, month=month, day=day)
def _get_start_of_unit(self, dt: datetime, unit: str, offset: int = 0) -> datetime:
if unit == 'YEAR':
target_year = dt.year - offset
return dt.replace(year=target_year, month=1, day=1)
if unit == 'MONTH':
# Subtract offset months, then snap to day 1
target_dt = self._add_months(dt, -offset)
return target_dt.replace(day=1)
if unit == 'WEEK':
# Monday is 0
return dt - timedelta(days=dt.weekday() + (offset * 7))
if unit == 'DAY':
return dt - timedelta(days=offset)
return dt
def _subtract_units(self, dt: datetime, unit: str, n: int) -> datetime:
if unit == 'YEAR':
return self._safe_replace_date(dt, year=dt.year - n)
if unit == 'MONTH':
return self._add_months(dt, -n)
if unit == 'WEEK':
return dt - timedelta(weeks=n)
if unit == 'DAY':
return dt - timedelta(days=n)
def parse_date(self, query):
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
q = self._convert_numbers(query)
# 1. Reemplazo de TODAY / YESTERDAY
q = re.sub(r'\bMODIFIED\s+TODAY\b',
f"modified={self.today.strftime('%Y-%m-%d')}",
q, flags=re.IGNORECASE)
yest = self.today - timedelta(days=1)
q = re.sub(r'\bMODIFIED\s+YESTERDAY\b',
f"modified={yest.strftime('%Y-%m-%d')}",
q, flags=re.IGNORECASE)
# 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK)
# Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo
def replace_simple(m):
# Groups are uppercase due to regex, need normalization if strictly matching
mod, unit = m.groups()
mod = mod.upper()
unit = unit.upper()
if mod == "THIS":
start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d')
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
else:
# LAST unit: Start of previous unit -> Start of current unit
start = self._get_start_of_unit(self.today, unit,
offset=1).strftime('%Y-%m-%d')
end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d')
return f"(modified>={start} AND modified<{end})"
q = re.sub(r"\bMODIFIED\s+(LAST|THIS)\s+(YEAR|MONTH|WEEK)\b",
replace_simple, q, flags=re.IGNORECASE)
# 3. Reemplazo de LAST <N> (YEAR/MONTH/WEEK/DAY)
def replace_last_n(m):
n, unit = m.groups()
unit = unit.upper()
n_val = int(n)
# Rolling window: Now minus N units TO Now (exclusive of tomorrow)
if unit == 'DAY':
start = (self.today -
timedelta(days=max(0, n_val - 1))).strftime('%Y-%m-%d')
elif unit == 'WEEK':
start = (self.today -
timedelta(days=max(0, (n_val * 7) - 1))).strftime('%Y-%m-%d')
else:
start = self._subtract_units(
self.today, unit, n_val).strftime('%Y-%m-%d')
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
return f"(modified>={start} AND modified<{end})"
q = re.sub(r"\bMODIFIED\s+LAST\s+(\d+)\s+(YEAR|MONTH|WEEK|DAY)S?\b",
replace_last_n, q, flags=re.IGNORECASE)
# 4. Reemplazo de <N> AGO
def replace_ago(m):
n, unit = m.groups()
unit = unit.upper()
n_val = int(n)
# "2 MONTHS AGO": Whole calendar period of that month
# Base is Start-Of-Current-Unit
base_start = self._get_start_of_unit(self.today, unit, offset=0)
# Start: Base - N
start = self._subtract_units(base_start, unit, n_val)
# End: Base - (N-1)
end = self._subtract_units(base_start, unit, n_val - 1)
return f"(modified>={start.strftime(
'%Y-%m-%d')} AND modified<{end.strftime('%Y-%m-%d')})"
q = re.sub(r"\bMODIFIED\s+(\d+)\s+(YEAR|MONTH|WEEK|DAY)S?\s+AGO\b",
replace_ago, q, flags=re.IGNORECASE)
return q
if __name__ == '__main__':
# Pruebas unitarias básicas para el parseo de fechas
test_queries = [
"MODIFIED TODAY",
"first MODIFIED YESTERDAY last",
"MODIFIED ONE DAY AGO",
"MODIFIED TWO DAYS AGO",
"MODIFIED THREE DAYS AGO",
"MODIFIED LAST TWO DAYS",
"MODIFIED THIS WEEK",
"MODIFIED LAST WEEK",
"MODIFIED LAST TWO WEEKS",
"MODIFIED ONE WEEK AGO",
"MODIFIED TWO WEEKS AGO",
"MODIFIED THREE WEEKS AGO",
"MODIFIED THIS MONTH",
"MODIFIED LAST MONTH",
"MODIFIED LAST TWO MONTHS",
"MODIFIED ONE MONTH AGO",
"MODIFIED TWO MONTHS AGO",
"MODIFIED THREE MONTHS AGO",
"MODIFIED THIS YEAR",
"MODIFIED LAST YEAR",
"MODIFIED LAST TWO YEARS",
"MODIFIED ONE YEAR AGO",
"MODIFIED TWO YEARS AGO",
"MODIFIED THREE YEARS AGO",
"foto MODIFIED LAST 2 YEARS"
]
parser = BagheeraQueryParser()
print(f"Testing {__file__}:")
for q in test_queries:
print(f" Input: '{q}'")
print(f" Output: '{parser.parse_date(q)}'")
print("-" * 20)
test_queries = [
"MODIFIED TODAYMODIFIED TODAY",
"MODIFIED yesterday",
"MODIFIED THIS MONTHMODIFIED THIS WEEK",
"MODIFIED LAST YEARMODIFIED YESTERDAY",
"modified TODAY",
"modified today"
]
parser = BagheeraQueryParser()
print(f"Testing {__file__}:")
for q in test_queries:
print(f" Input: '{q}'")
print(f" Output: '{parser.parse_date(q)}'")
print("-" * 20)

View File

@@ -0,0 +1,170 @@
#!/usr/bin/env python
"""
Bagheera Query Parser
Converts natural language English date expressions into Baloo-compatible queries.
"""
import re
from datetime import datetime, timedelta
from typing import Dict
class BagheeraQueryParser:
def __init__(self):
# Actualizamos 'today' cada vez que se usa para evitar problemas si el proceso
# queda abierto días
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
def _convert_numbers(self, query: str) -> str:
"""
Replaces written numbers (ONE to TWENTY) with their numeric string equivalent.
Only processes uppercase words.
"""
# Mapping for numbers as requested (ONE to TWENTY)
number_map: Dict[str, int] = {
'ONE': 1, 'TWO': 2, 'THREE': 3, 'FOUR': 4, 'FIVE': 5,
'SIX': 6, 'SEVEN': 7, 'EIGHT': 8, 'NINE': 9, 'TEN': 10,
'ELEVEN': 11, 'TWELVE': 12, 'THIRTEEN': 13, 'FOURTEEN': 14,
'FIFTEEN': 15, 'SIXTEEN': 16, 'SEVENTEEN': 17, 'EIGHTEEN': 18,
'NINETEEN': 19, 'TWENTY': 20
}
words = query.split()
for i, word in enumerate(words):
# Solo intentamos convertir si la palabra está en mayúsculas
if word.isupper() and word in number_map:
words[i] = str(number_map[word])
return " ".join(words)
def _get_start_of_unit(self, dt, unit, offset=0):
if unit == 'YEAR':
return dt.replace(year=dt.year - offset, month=1, day=1)
if unit == 'MONTH':
month = dt.month - offset
year = dt.year
while month <= 0:
month += 12
year -= 1
return dt.replace(year=year, month=month, day=1)
if unit == 'WEEK':
return dt - timedelta(days=dt.weekday() + (offset * 7))
if unit == 'DAY':
return dt - timedelta(days=offset)
def _subtract_units(self, dt, unit, n):
if unit == 'YEAR':
return dt.replace(year=dt.year - n)
if unit == 'MONTH':
return self._get_start_of_unit(dt, 'MONTH', offset=n)
if unit == 'WEEK':
return dt - timedelta(weeks=n)
if unit == 'DAY':
return dt - timedelta(days=n)
def parse_date(self, query):
self.today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
q = self._convert_numbers(query)
# 1. Reemplazo de TODAY / YESTERDAY
q = re.sub(r'\bMODIFIED TODAY\b', f"modified={self.today.strftime('%Y-%m-%d')}",
q)
yest = self.today - timedelta(days=1)
q = re.sub(r'\bMODIFIED YESTERDAY\b', f"modified={yest.strftime('%Y-%m-%d')}",
q)
# 2. Reemplazo de (LAST/THIS) (YEAR/MONTH/WEEK)
# Usamos re.sub para encontrar el patrón en cualquier parte y reemplazarlo
def replace_simple(m):
mod, unit = m.groups()
if mod == "THIS":
start = self._get_start_of_unit(self.today, unit).strftime('%Y-%m-%d')
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
else:
start = self._get_start_of_unit(self.today, unit,
offset=1).strftime('%Y-%m-%d')
# end = (self._get_start_of_unit(self.today, unit) -
# timedelta(seconds=1)).strftime('%Y-%m-%d')
end = (self._get_start_of_unit(self.today, unit)).strftime('%Y-%m-%d')
return f"(modified>={start} AND modified<{end})"
q = re.sub(r"\bMODIFIED (LAST|THIS) (YEAR|MONTH|WEEK)\b", replace_simple, q)
# 3. Reemplazo de LAST <N> (YEAR/MONTH/WEEK/DAY)
def replace_last_n(m):
n, unit = m.groups()
start = self._subtract_units(self.today, unit, int(n)).strftime('%Y-%m-%d')
end = (self.today + timedelta(days=1)).strftime('%Y-%m-%d')
return f"(modified>={start} AND modified<{end})"
q = re.sub(r"\bMODIFIED LAST (\d+) (YEAR|MONTH|WEEK|DAY)S?\b",
replace_last_n, q)
# 4. Reemplazo de <N> AGO
def replace_ago(m):
n, unit = m.groups()
start = self._subtract_units(self.today, unit, int(n))
end = (start + timedelta(days=1)).strftime('%Y-%m-%d')
start = self._get_start_of_unit(self.today, unit, offset=1)
end = self._subtract_units(start, unit, int(n)-1).strftime('%Y-%m-%d')
start = self._subtract_units(start, unit, int(n))
return f"(modified>={start.strftime('%Y-%m-%d')} AND modified<{end})"
q = re.sub(r"\bMODIFIED (\d+) (YEAR|MONTH|WEEK|DAY)S? AGO\b", replace_ago, q)
return q
if __name__ == '__main__':
# Pruebas unitarias básicas para el parseo de fechas
test_queries = [
"MODIFIED TODAY",
"first MODIFIED YESTERDAY last",
"MODIFIED ONE DAY AGO",
"MODIFIED TWO DAYS AGO",
"MODIFIED THREE DAYS AGO",
"MODIFIED LAST TWO DAYS",
"MODIFIED THIS WEEK",
"MODIFIED LAST WEEK",
"MODIFIED LAST TWO WEEKS",
"MODIFIED ONE WEEK AGO",
"MODIFIED TWO WEEKS AGO",
"MODIFIED THREE WEEKS AGO",
"MODIFIED THIS MONTH",
"MODIFIED LAST MONTH",
"MODIFIED LAST TWO MONTHS",
"MODIFIED ONE MONTH AGO",
"MODIFIED TWO MONTHS AGO",
"MODIFIED THREE MONTHS AGO",
"MODIFIED THIS YEAR",
"MODIFIED LAST YEAR",
"MODIFIED LAST TWO YEARS",
"MODIFIED ONE YEAR AGO",
"MODIFIED TWO YEARS AGO",
"MODIFIED THREE YEARS AGO",
"foto MODIFIED LAST 2 YEARS"
]
parser = BagheeraQueryParser()
print(f"Testing {__file__}:")
for q in test_queries:
print(f" Input: '{q}'")
print(f" Output: '{parser.parse_date(q)}'")
print("-" * 20)
test_queries = [
"MODIFIED TODAYMODIFIED TODAY",
"MODIFIED yesterday",
"MODIFIED THIS MONTHMODIFIED THIS WEEK",
"MODIFIED LAST YEARMODIFIED YESTERDAY",
"modified TODAY",
"modified today"
]
parser = BagheeraQueryParser()
print(f"Testing {__file__}:")
for q in test_queries:
print(f" Input: '{q}'")
print(f" Output: '{parser.parse_date(q)}'")
print("-" * 20)

View File

@@ -0,0 +1,7 @@
from .bagheera_search import BagheeraSearcher
def search(query):
"""Interfaz simplificada para la librería."""
bs = BagheeraSearcher()
return bs.search(query)

View File

@@ -0,0 +1,313 @@
"""
Bagheera Search Library
A Python interface for the Baloo search wrapper.
"""
import ctypes
import json
import re
import sys
from pathlib import Path
from typing import Dict, Any, Iterator, Optional, Union
from baloo_tools import get_resolution
from bagheera_query_parser_lib import parse_date
class BagheeraSearcher:
"""Class to handle Baloo searches and interact with the C wrapper."""
def __init__(self, lib_path: Optional[Union[str, Path]] = None) -> None:
self.ids_processed: set[int] = set()
self.baloo_lib = self._load_baloo_wrapper(lib_path)
def _load_baloo_wrapper(self, custom_path: Optional[Union[str, Path]]) \
-> ctypes.CDLL:
"""Loads and configures the Baloo C wrapper library."""
if custom_path:
lib_path = Path(custom_path)
else:
lib_name = "libbaloo_wrapper.so"
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
base_dir = Path(getattr(sys, '_MEIPASS')) / 'lib'
else:
base_dir = Path(__file__).parent.absolute()
search_paths = [base_dir]
if sys.prefix != sys.base_prefix:
venv_base = Path(sys.prefix)
search_paths.append(venv_base / "lib64")
search_paths.append(venv_base / "lib")
search_paths.extend([
Path("/lib64"),
Path("/lib"),
Path("/usr/lib64"),
Path("/usr/lib"),
Path("/usr/local/lib64"),
Path("/usr/local/lib")
])
lib_path = None
for path in search_paths:
potential_path = path / lib_name
if potential_path.exists():
lib_path = potential_path
break
if lib_path is None or not lib_path.exists():
raise FileNotFoundError(
f"ERROR: Baloo wrapper '{lib_name}' not found at {search_paths}"
)
lib = ctypes.CDLL(str(lib_path))
lib.execute_baloo_query.argtypes = [ctypes.c_char_p]
lib.execute_baloo_query.restype = ctypes.c_char_p
lib.get_file_properties.argtypes = [ctypes.c_char_p]
lib.get_file_properties.restype = ctypes.c_char_p
return lib
def check_keywords(
self, text: str, query: str, file_path: str = "", file_id: int = 0
) -> bool:
"""
Evaluates if a text meets a logical query.
Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes.
"""
if file_path:
try:
w, h = get_resolution(file_id)
except Exception:
w, h = -1, -1
def replace_dim(match: re.Match) -> str:
if w <= 0 or h <= 0:
return "__false__"
s = match.group(0).upper()
if "PORTRAIT" in s:
return "__true__" if w < h else "__false__"
if "LANDSCAPE" in s:
return "__true__" if w > h else "__false__"
if "SQUARE" in s:
return "__true__" if w == h else "__false__"
op = match.group(1)
ops_map = {
"=": w == h,
">": w > h,
"<": w < h,
">=": w >= h,
"<=": w <= h,
"!=": w != h,
}
return "__true__" if ops_map.get(op, False) else "__false__"
query = re.sub(
r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
query = re.sub(
r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
text = text.lower()
query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query)
tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query)
regex_parts = []
for t in tokens:
if t in ("(", ")"):
regex_parts.append(t)
elif t == "OR":
regex_parts.append("|")
elif t == "AND":
continue
elif t == "__true__":
regex_parts.append("(?=.*)")
elif t == "__false__":
regex_parts.append("(?!)")
else:
regex_parts.append(rf"(?=.*{re.escape(t)})")
final_regex = "".join(regex_parts).lower()
try:
return bool(re.search(f"^{final_regex}.*", text, re.DOTALL))
except re.error:
return False
def get_baloo_info(self, file_path: str) -> Dict[str, str]:
"""Retrieves properties for a specific file from Baloo."""
result = self.baloo_lib.get_file_properties(file_path.encode("utf-8"))
if not result:
return {}
data_raw = result.decode("utf-8")
properties = {}
for entry in data_raw.split("|"):
if ":" in entry:
k, v = entry.split(":", 1)
properties[k] = v
return properties
def _execute_query(self, options: Dict[str, Any]) -> list:
"""Helper method to execute the query against the C wrapper."""
query_json = json.dumps(options).encode("utf-8")
result_ptr = self.baloo_lib.execute_baloo_query(query_json)
if not result_ptr:
return []
try:
raw_results = result_ptr.decode("utf-8")
return json.loads(raw_results)
except json.JSONDecodeError as e:
print(f"JSON decode error from Baloo wrapper: {e}")
return []
def search_recursive(
self,
query_text: str,
options: Dict[str, Any],
search_opts: Dict[str, Any],
files_count: int,
) -> Iterator[Dict[str, Any]]:
"""Executes a recursive search yielded item by item."""
options["query"] = query_text
files = self._execute_query(options)
for item in files:
if search_opts.get("limit", 0) <= 0:
break
file_id = int(item["id"], 16)
if file_id in self.ids_processed:
continue
self.ids_processed.add(file_id)
rec_exclude = search_opts.get("recursive_exclude")
if not rec_exclude or not self.check_keywords(
item["path"], rec_exclude, item["path"], file_id
):
if files_count >= search_opts.get("offset", 0):
search_opts["limit"] -= 1
yield item
files_count += 1
def search(
self,
query_text: str,
main_options: Dict[str, Any],
search_opts: Dict[str, Any],
) -> Iterator[Dict[str, Any]]:
"""
Main search generator. Yields file dictionaries.
"""
main_options["query"] = parse_date(query_text)
files = self._execute_query(main_options)
if not files:
return
is_recursive = search_opts.get("recursive") is not None
if is_recursive:
if search_opts.get("type"):
main_options["type"] = search_opts["type"]
elif "type" in main_options:
main_options.pop("type")
rec_query = search_opts.get("recursive")
query_text = parse_date(rec_query) if rec_query else ""
files_count = 0
for item in files:
if search_opts.get("limit", 0) <= 0:
break
file_id = int(item["id"], 16)
if file_id in self.ids_processed:
continue
self.ids_processed.add(file_id)
exclude_pattern = search_opts.get("exclude")
if not exclude_pattern or not self.check_keywords(
item["path"], exclude_pattern, item["path"], file_id
):
if is_recursive:
main_options["directory"] = item["path"]
yield from self.search_recursive(
query_text, main_options, search_opts, files_count
)
else:
yield item
files_count += 1
def reset_state(self) -> None:
"""Clears the processed IDs to allow for fresh consecutive searches."""
self.ids_processed.clear()
if __name__ == "__main__":
# Test de integración rápido
print(f"Testing {__file__} integration:")
try:
searcher = BagheeraSearcher()
print("✔ Library and wrapper loaded successfully.")
# Intento de búsqueda de prueba (limitado a 1 resultado)
test_main_opts = {"limit": 1}
test_search_opts = {"limit": 1}
print("Searching for recent files...")
results = list(searcher.search("MODIFIED TODAY", test_main_opts,
test_search_opts))
if results:
print(f"✔ Found: {results[0].get('path')}")
else:
print("? No files found for today, but search executed correctly.")
except FileNotFoundError as e:
print(f"✘ Setup error: {e}")
except Exception as e:
print(f"✘ Unexpected error: {e}")
if __name__ == "__main__":
# Integration test block
print(f"Testing {__file__} integration:")
try:
searcher = BagheeraSearcher()
print("✔ Library and wrapper loaded successfully.")
# Test search (limited to 1 result for today)
test_main_opts = {"limit": 1}
test_search_opts = {"limit": 1}
print("Searching for recent files...")
results = list(searcher.search(
"MODIFIED TODAY", test_main_opts, test_search_opts
))
if results:
print(f"✔ Found: {results[0].get('path')}")
else:
print("? No files found for today, but search executed correctly.")
except FileNotFoundError as e:
print(f"✘ Setup error: {e}")
except Exception as e:
print(f"✘ Unexpected error: {e}")

View File

@@ -0,0 +1,284 @@
"""
Bagheera Search Library
A Python interface for the Baloo search wrapper.
"""
import ctypes
import json
import re
import sys
from pathlib import Path
from typing import Dict, Any, Iterator, Optional, Union
from baloo_tools import get_resolution
from date_query_parser import parse_date
class BagheeraSearcher:
"""Class to handle Baloo searches and interact with the C wrapper."""
def __init__(self, lib_path: Optional[Union[str, Path]] = None) -> None:
self.ids_processed: set[int] = set()
self.baloo_lib = self._load_baloo_wrapper(lib_path)
def _load_baloo_wrapper(self, custom_path: Optional[Union[str, Path]]) \
-> ctypes.CDLL:
"""Loads and configures the Baloo C wrapper library."""
if custom_path:
lib_path = Path(custom_path)
else:
if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
current_dir = Path(getattr(sys, '_MEIPASS')) / 'lib'
else:
current_dir = Path(__file__).parent.absolute()
lib_name = "libbaloo_wrapper.so"
lib_path = current_dir / lib_name
if not lib_path.exists():
raise FileNotFoundError(
f"ERROR: Baloo wrapper '{lib_path.name}' not found at {lib_path}"
)
lib = ctypes.CDLL(str(lib_path))
lib.execute_baloo_query.argtypes = [ctypes.c_char_p]
lib.execute_baloo_query.restype = ctypes.c_char_p
lib.get_file_properties.argtypes = [ctypes.c_char_p]
lib.get_file_properties.restype = ctypes.c_char_p
return lib
def check_keywords(
self, text: str, query: str, file_path: str = "", file_id: int = 0
) -> bool:
"""
Evaluates if a text meets a logical query.
Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes.
"""
if file_path:
try:
w, h = get_resolution(file_id)
except Exception:
w, h = -1, -1
def replace_dim(match: re.Match) -> str:
if w <= 0 or h <= 0:
return "__false__"
s = match.group(0).upper()
if "PORTRAIT" in s:
return "__true__" if w < h else "__false__"
if "LANDSCAPE" in s:
return "__true__" if w > h else "__false__"
if "SQUARE" in s:
return "__true__" if w == h else "__false__"
op = match.group(1)
ops_map = {
"=": w == h,
">": w > h,
"<": w < h,
">=": w >= h,
"<=": w <= h,
"!=": w != h,
}
return "__true__" if ops_map.get(op, False) else "__false__"
query = re.sub(
r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
query = re.sub(
r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
text = text.lower()
query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query)
tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query)
regex_parts = []
for t in tokens:
if t in ("(", ")"):
regex_parts.append(t)
elif t == "OR":
regex_parts.append("|")
elif t == "AND":
continue
elif t == "__true__":
regex_parts.append("(?=.*)")
elif t == "__false__":
regex_parts.append("(?!)")
else:
regex_parts.append(rf"(?=.*{re.escape(t)})")
final_regex = "".join(regex_parts)
try:
return bool(re.search(f"^{final_regex}.*", text, re.DOTALL))
except re.error:
return False
def get_baloo_info(self, file_path: str) -> Dict[str, str]:
"""Retrieves properties for a specific file from Baloo."""
result = self.baloo_lib.get_file_properties(file_path.encode("utf-8"))
if not result:
return {}
data_raw = result.decode("utf-8")
properties = {}
for entry in data_raw.split("|"):
if ":" in entry:
k, v = entry.split(":", 1)
properties[k] = v
return properties
def _execute_query(self, options: Dict[str, Any]) -> list:
"""Helper method to execute the query against the C wrapper."""
query_json = json.dumps(options).encode("utf-8")
result_ptr = self.baloo_lib.execute_baloo_query(query_json)
if not result_ptr:
return []
try:
raw_results = result_ptr.decode("utf-8")
return json.loads(raw_results)
except json.JSONDecodeError as e:
print(f"JSON decode error from Baloo wrapper: {e}")
return []
def search_recursive(
self,
query_text: str,
options: Dict[str, Any],
search_opts: Dict[str, Any],
files_count: int,
) -> Iterator[Dict[str, Any]]:
"""Executes a recursive search yielded item by item."""
options["query"] = query_text
files = self._execute_query(options)
for item in files:
if search_opts.get("limit", 0) <= 0:
break
file_id = int(item["id"], 16)
if file_id in self.ids_processed:
continue
self.ids_processed.add(file_id)
rec_exclude = search_opts.get("recursive_exclude")
if not rec_exclude or not self.check_keywords(
item["path"], rec_exclude, item["path"], file_id
):
if files_count >= search_opts.get("offset", 0):
search_opts["limit"] -= 1
yield item
files_count += 1
def search(
self,
query_text: str,
main_options: Dict[str, Any],
search_opts: Dict[str, Any],
) -> Iterator[Dict[str, Any]]:
"""
Main search generator. Yields file dictionaries.
"""
main_options["query"] = parse_date(query_text)
files = self._execute_query(main_options)
if not files:
return
is_recursive = search_opts.get("recursive") is not None
if is_recursive:
if search_opts.get("type"):
main_options["type"] = search_opts["type"]
elif "type" in main_options:
main_options.pop("type")
rec_query = search_opts.get("recursive")
query_text = parse_date(rec_query) if rec_query else ""
files_count = 0
for item in files:
if search_opts.get("limit", 0) <= 0:
break
file_id = int(item["id"], 16)
if file_id in self.ids_processed:
continue
self.ids_processed.add(file_id)
exclude_pattern = search_opts.get("exclude")
if not exclude_pattern or not self.check_keywords(
item["path"], exclude_pattern, item["path"], file_id
):
if is_recursive:
main_options["directory"] = item["path"]
yield from self.search_recursive(
query_text, main_options, search_opts, files_count
)
else:
yield item
files_count += 1
def reset_state(self) -> None:
"""Clears the processed IDs to allow for fresh consecutive searches."""
self.ids_processed.clear()
# from bagheera_search_lib import BagheeraSearcher
#
# def main():
# # ... tu lógica de argparse existente ...
#
# try:
# # Inicializamos la librería
# searcher = BagheeraSearcher()
#
# # Consumimos el generador
# for file_info in searcher.search(query_text, main_options, other_options):
# output = file_info['path']
# if other_options.get('konsole'):
# output = f"file:/'{output}'"
# if other_options.get('id'):
# output += f" [ID: {file_info['id']}]"
#
# print(output)
#
# except FileNotFoundError as e:
# print(e)
# sys.exit(1)
#
# if __name__ == "__main__":
# try:
# # Inicializamos la librería
# searcher = BagheeraSearcher()
# # Consumimos el generador
# for file_info in searcher.search(query_text, main_options, other_options):
# output = file_info['path']
# if other_options.get('konsole'):
# output = f"file:/'{output}'"
# if other_options.get('id'):
# output += f" [ID: {file_info['id']}]"
# print(output)
# except FileNotFoundError as e:
# print(e)
# sys.exit(1)

25
bagheerasearch Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/env bash
# From https://stackoverflow.com/questions/59895/getting-the-source-directory-of-a-bash-script-from-within
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
APPPATH="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
pushd . >/dev/null
cd "$APPPATH" || exit
if [ -d "$APPPATH/.venv" ]; then
source "$APPPATH/.venv/bin/activate"
fi
"$APPPATH/bagheerasearch.py" "$@"
if [ -n "$VIRTUAL_ENV" ]; then
deactivate
fi
popd >/dev/null || exit

269
bagheerasearch.py Executable file
View File

@@ -0,0 +1,269 @@
#!/usr/bin/env python3
# flake8: noqa: E501
"""
Bagheera Search Tool - CLI Client
"""
__appname__ = "BagheeraSearch"
__version__ = "1.0"
__author__ = "Ignacio Serantes"
__email__ = "kde@aynoa.net"
__license__ = "LGPL"
__status__ = "Production"
# "Prototype, Development, Alpha, Beta, Production, Stable, Deprecated"
import argparse
import json
import signal
import sys
from pathlib import Path
# from baloo_tools import get_resolution
# from date_query_parser import parse_date
from bagheera_search_lib import BagheeraSearcher
# --- CONFIGURATION ---
PROG_NAME = "Bagheera Search Tool"
PROG_ID = "bagheerasearch"
PROG_VERSION = "1.0"
PROG_BY = "Ignacio Serantes"
PROG_DATE = "2026-03-19"
CONFIG_DIR = Path.home() / ".config" / PROG_ID
CONFIG_FILE = CONFIG_DIR / "config.json"
def load_config() -> dict:
"""Loads user configuration from disk."""
if CONFIG_FILE.exists():
try:
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, OSError) as e:
print(f"Warning: Could not load config file: {e}")
return {}
def save_config(config: dict) -> None:
"""Saves user configuration to disk."""
try:
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
with open(CONFIG_FILE, "w", encoding="utf-8") as f:
json.dump(config, f, indent=4)
except OSError as e:
print(f"Warning: Could not save config file: {e}")
def print_help_query() -> None:
"""Prints the detailed help for query syntax."""
help_query = f"""Help updated to 2025-01-01.
Baloo offers a rich syntax for searching through your files. Certain attributes of a file can be searched through.
For example 'type' can be used to filter for files based on their general type:
type:Audio or type:Document
The following comparison operators are supported, but note that 'not equal' operator is not available.
· : - contains (only for text comparison)
· = - equal
· > - greater than
· >= - greater than or equal to
· < - less than
· <= - less than or equal to
Currently the following types are supported:
· Archive
· Folder
· Audio
· Video
· Image
· Document
· Spreadsheet
· Presentation
· Text
These expressions can be combined using AND or OR and additional parenthesis, but note that 'NOT' logical operator is not available.
[... omitted for brevity, but includes the full list of searchable properties as in your original script ...]
{PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine.
Supported natural language sentences and patterns for queries are:
· MODIFIED TODAY
· MODIFIED YESTERDAY
· MODIFIED THIS [ DAY | WEEK | MONTH | YEAR ]
· LAST <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ]
· <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] AGO
<NUMBER> can be any number or a number text from ONE to TWENTY.
Remarks: LAST DAY, if used, is interpreted as YESTERDAY.
Supported expressions for --exclude and --recursive-exclude are:
· width<CMP_OP>height - only if file has width and height properties
· height<CMP_OP>width - only if file has width and height properties
· PORTRAIT - only if file width is greater or equal to height
· LANDSCAPE - only if file height is greater or equal to width
· SQUARE - only if file width equals to height
<CMP_OP> can be: != | >= | <= | = | > | <"""
print(help_query)
def print_version() -> None:
"""Prints version information."""
print(f"{PROG_NAME} v{PROG_VERSION} - {PROG_DATE}")
print(
f"Copyright (C) {PROG_DATE[:4]} by {PROG_BY} and, mostly, "
"the good people at KDE"
)
def signal_handler(sig, frame) -> None:
"""Handles Ctrl+C gracefully."""
print("\nSearch canceled at user request.")
sys.exit(0)
def main():
parser = argparse.ArgumentParser(
description="An improved search tool for Baloo"
)
parser.add_argument("query", nargs="?", help="list of words to query for")
parser.add_argument("-d", "--directory", help="limit search to specified directory")
parser.add_argument("-e", "--exclude", help="Search exclude pattern")
parser.add_argument("-i", "--id", action="store_true", help="show document IDs")
parser.add_argument("-k", "--konsole", action="store_true", help="show files using file:/ and quotes")
parser.add_argument("-l", "--limit", type=int, help="the maximum number of results")
parser.add_argument("-o", "--offset", type=int, help="offset from which to start the search")
parser.add_argument("-r", "--recursive", nargs="?", const="", default=None, help="enable recurse with or without a query")
parser.add_argument("-n", "--recursive-indent", help="recursive indent character")
parser.add_argument("-x", "--recursive-exclude", help="recursion exclude pattern")
parser.add_argument("-s", "--sort", help="sorting criteria <auto|none>")
parser.add_argument("-t", "--type", help="type of Baloo data to be searched")
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
parser.add_argument("--day", type=int, help="day fixed filter, --month is required")
parser.add_argument("--month", type=int, help="month fixed filter, --year is required")
parser.add_argument("--year", type=int, help="year filter fixed filter")
parser.add_argument("--help-query", action="store_true", help="show query syntax help")
parser.add_argument("--version", action="store_true", help="show version information")
args, unknown_args = parser.parse_known_args()
query_parts = [args.query] if args.query else []
if unknown_args:
query_parts.extend(unknown_args)
query_text = " ".join(query_parts)
if args.day is not None and args.month is None:
raise ValueError("Missing --month (required when --day is used)")
if args.month is not None and args.year is None:
raise ValueError("Missing --year (requered when --month is used)")
if args.help_query:
print_help_query()
return
if args.version:
print_version()
return
if not query_text and not args.recursive and not args.type and not args.directory:
parser.print_help()
return
# Configuration and Sort restoring
user_config = load_config()
if args.sort:
user_config["last_sort_order"] = args.sort
save_config(user_config)
elif "last_sort_order" in user_config:
args.sort = user_config["last_sort_order"]
# Build options dictionary
main_options = {}
if args.recursive is not None:
main_options["type"] = "folder"
else:
if args.limit is not None:
main_options["limit"] = args.limit
if args.offset is not None:
main_options["offset"] = args.offset
if args.type:
main_options["type"] = args.type
if args.directory:
main_options["directory"] = args.directory
if args.year is not None:
main_options["year"] = args.year
if args.month is not None:
main_options["month"] = args.month
if args.day is not None:
main_options["day"] = args.day
if args.sort:
main_options["sort"] = args.sort
other_options = {
"exclude": args.exclude,
"id": args.id,
"konsole": args.konsole,
"limit": args.limit if args.limit and args.recursive is not None else 99999999999,
"offset": args.offset if args.offset and args.recursive is not None else 0,
"recursive": args.recursive,
"recursive_indent": args.recursive_indent or "",
"recursive_exclude": args.recursive_exclude,
"sort": args.sort,
"type": args.type if args.recursive is not None else None,
"verbose": args.verbose,
}
if other_options["verbose"]:
print(f"Query: '{query_text}'")
print(f"Main Options: {main_options}")
print(f"Other Options: {other_options}")
print("-" * 30)
try:
searcher = BagheeraSearcher()
files_count = 0
# Consumir el generador de la librería
for item in searcher.search(query_text, main_options, other_options):
if other_options["konsole"]:
output = f"file:/'{item['path']}'"
else:
output = item["path"]
if other_options["id"]:
output += f" [ID: {item['id']}]"
print(output)
files_count += 1
if other_options["verbose"]:
if files_count == 0:
print("No results found.")
else:
print(f"Total: {files_count} files found.")
except FileNotFoundError as e:
print(e)
sys.exit(1)
except Exception as e:
print(f"Error executing search: {e}")
sys.exit(1)
if __name__ == "__main__":
signal.signal(signal.SIGINT, signal_handler)
try:
main()
except Exception as e:
print(f"Critical error: {e}")
sys.exit(1)

7
baloo_tools/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
from .baloo_tools import BalooTools
def get_resolution(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_resolution(id)

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Baloo Tools Library
Helper functions to interact directly with the Baloo LMDB index.
"""
import json
import lmdb
import os
import sys
from typing import Tuple
class BalooTools:
"""Class to interact directly with the Baloo LMDB index."""
def __init__(self) -> None:
"""Initializes the connection path to the Baloo index."""
self.baloo_db_path = os.path.join(
os.path.expanduser("~"), ".local/share/baloo/index"
)
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
"""
Retrieves the width and height of an image/video from the Baloo index.
Args:
file_id: The integer ID of the file.
sep: Separator used (unused currently, kept for compatibility).
Returns:
A tuple of (width, height) integers. Returns (-1, -1) if not found.
"""
try:
# Using context manager ensures the environment is closed properly
with lmdb.Environment(
self.baloo_db_path,
subdir=False,
readonly=True,
lock=False,
max_dbs=20
) as env:
document_data_db = env.open_db(b'documentdatadb')
with env.begin() as txn:
cursor = txn.cursor(document_data_db)
# Convert ID to 8-byte little-endian format
file_id_bytes = int.to_bytes(
file_id, length=8, byteorder='little', signed=False
)
if cursor.set_range(file_id_bytes):
for key, value in cursor:
if key != file_id_bytes:
break
try:
jvalue = json.loads(value.decode())
# Baloo stores width in '26' and height in '27'
return jvalue.get('26', -1), jvalue.get('27', -1)
except (json.JSONDecodeError, KeyError):
return -1, -1
except lmdb.Error as e:
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
return -1, -1
# Helper function to maintain compatibility with bagheera_search_lib.py
# since it imports `get_resolution` directly.
def get_resolution(file_id: int, sep: str = 'x') -> Tuple[int, int]:
"""Standalone helper function to instantiate BalooTools and get resolution."""
tools = BalooTools()
return tools.get_resolution(file_id, sep)
if __name__ == '__main__':
# CLI execution support for testing
if len(sys.argv) > 1:
try:
target_id = int(sys.argv[1], 16)
width, height = get_resolution(target_id)
print(f"{width} {height}")
except ValueError:
print("Error: Please provide a valid hexadecimal file ID.", file=sys.stderr)
sys.exit(1)

View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.16)
project(baloo_query_wrapper)
find_package(ECM REQUIRED NO_MODULE)
set(CMAKE_MODULE_PATH ${ECM_MODULE_PATH})
find_package(Qt6 REQUIRED COMPONENTS Core)
find_package(KF6 REQUIRED COMPONENTS Baloo FileMetaData)
add_library(baloo_wrapper SHARED baloo_wrapper.cpp)
target_link_libraries(baloo_wrapper
Qt6::Core
KF6::Baloo
KF6::BalooEngine
KF6::FileMetaData
KF6::CoreAddons
)
# Importante: C++17 es necesario para el literal u"|"
set_target_properties(baloo_wrapper PROPERTIES
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
# Aseguramos que los símbolos marcados como default sean visibles
C_VISIBILITY_PRESET default
CXX_VISIBILITY_PRESET default
VISIBILITY_INLINES_HIDDEN OFF
)

View File

@@ -0,0 +1,9 @@
import ctypes
import glob
import os
_current_dir = os.path.dirname(__file__)
_so_files = glob.glob(os.path.join(_current_dir, "baloo_wrapper*.so"))
if _so_files:
baloo_lib = ctypes.CDLL(_so_files[0])

View File

@@ -0,0 +1,114 @@
#include <Baloo/Query>
#include <Baloo/ResultIterator>
#include <KFileMetaData/ExtractorCollection>
#include <KFileMetaData/SimpleExtractionResult>
#include <KFileMetaData/PropertyInfo>
#include <QMimeDatabase>
#include <QString>
#include <QStringList>
#include <QJsonDocument>
#include <QJsonObject>
#include <QJsonArray>
#include <vector>
#include <string>
// Usamos extern "C" para evitar el "name mangling" de C++
extern "C" {
// Añadimos el atributo para forzar la visibilidad pública del símbolo
__attribute__((visibility("default")))
const char* execute_baloo_query(const char* options_json) {
// Parse JSON options
QJsonDocument doc = QJsonDocument::fromJson(options_json);
QJsonObject options = doc.object();
Baloo::Query q;
if (options.contains("query")) {
q.setSearchString(options["query"].toString());
}
if (options.contains("limit")) {
q.setLimit(options["limit"].toInt());
}
if (options.contains("offset")) {
q.setOffset(options["offset"].toInt());
}
if (options.contains("type")) {
q.addType(options["type"].toString());
}
if (options.contains("directory")) {
q.setIncludeFolder(options["directory"].toString());
}
if (options.contains("year")) {
if (options.contains("month")) {
if (options.contains("day")) {
q.setDateFilter(options["year"].toInt(), options["month"].toInt(), options["day"].toInt());
} else {
q.setDateFilter(options["year"].toInt(), options["month"].toInt(), 0);
}
} else {
q.setDateFilter(options["year"].toInt(), 0, 0);
}
}
if (options.contains("sort")) {
if (options["sort"].toString() == QStringLiteral("auto")) {
q.setSortingOption(Baloo::Query::SortNone);
} else if (options["sort"].toString() == QStringLiteral("none")) {
q.setSortingOption(Baloo::Query::SortAuto);
}
}
Baloo::ResultIterator it = q.exec();
QJsonArray results;
while (it.next()) {
QJsonObject result;
result["path"] = it.filePath();
result["id"] = QString::fromUtf8(it.documentId());
results.append(result);
}
QJsonDocument responseDoc(results);
static std::string output;
output = responseDoc.toJson(QJsonDocument::Compact).toStdString();
return output.c_str();
}
}
extern "C" {
// Forzamos visibilidad para que ctypes lo vea
__attribute__((visibility("default")))
const char* get_file_properties(const char* path) {
QString filePath = QString::fromUtf8(path);
// Detectar tipo MIME
QMimeDatabase mimeDb;
QString mimeType = mimeDb.mimeTypeForFile(filePath).name();
// Obtener extractores para ese tipo
KFileMetaData::ExtractorCollection extractors;
QList<KFileMetaData::Extractor*> exList = extractors.fetchExtractors(mimeType);
// Extraer metadatos
KFileMetaData::SimpleExtractionResult result(filePath, mimeType);
for (KFileMetaData::Extractor* ex : exList) {
ex->extract(&result);
}
const auto props = result.properties();
if (props.isEmpty()) return "";
static std::string output;
output = "";
// Formateamos las propiedades como un string simple: "Clave:Valor|Clave:Valor"
for (auto it = props.constBegin(); it != props.constEnd(); ++it) {
KFileMetaData::PropertyInfo pi(it.key());
output += pi.name().toStdString() + ":" + it.value().toString().toStdString() + "|";
}
return output.c_str();
}
}

48
build.sh Executable file
View File

@@ -0,0 +1,48 @@
#!/usr/bin/env bash
source .venv/bin/activate
case $1 in
-v)
shift
case $1 in
3.8) PYINSTALLER=pyinstaller-3.8;;
3.9) PYINSTALLER=pyinstaller-3.9;;
3.10) PYINSTALLER=pyinstaller-3.10;;
3.11) PYINSTALLER=pyinstaller-3.11;;
3.12) PYINSTALLER=pyinstaller-3.12;;
3.13) PYINSTALLER=pyinstaller-3.13;;
3.14) PYINSTALLER=pyinstaller-3.14;;
*) PYINSTALLER=pyinstaller;;
esac
;;
--version=3.8) PYINSTALLER=pyinstaller-3.8;;
--version=3.9) PYINSTALLER=pyinstaller-3.9;;
--version=3.10) PYINSTALLER=pyinstaller-3.10;;
--version=3.11) PYINSTALLER=pyinstaller-3.11;;
--version=3.12) PYINSTALLER=pyinstaller-3.12;;
--version=3.13) PYINSTALLER=pyinstaller-3.13;;
--version=3.14) PYINSTALLER=pyinstaller-3.14;;
*) PYINSTALLER=pyinstaller;;
esac
# $PYINSTALLER \
# --add-binary 'desktop/Desktogram.png:desktop' \
# --add-binary 'locale/en/LC_MESSAGES/messages.mo:locale/en/LC_MESSAGES' \
# --add-binary 'locale/es/LC_MESSAGES/messages.mo:locale/es/LC_MESSAGES' \
# --add-binary 'locale/gl/LC_MESSAGES/messages.mo:locale/gl/LC_MESSAGES' \
# --add-data 'js/downloader.js:js' \
# --noconsole \
# -F tagmanager.py
# Sólo en windows.
# --icon=desktop/TagsManager.png \
# --hidden-import=imagesize \
# --hidden-import=word2number \
$PYINSTALLER \
--add-binary="baloo_wrapper/build/libbaloo_wrapper.so:lib" \
--onefile \
-F bagheerasearch.py
deactivate

29
build_baloo_wrappers.sh Executable file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/bash
OVERWRITE=0
while [ -n "$1" ]; do
case "$1" in
-o|--overwrite)
OVERWRITE=1
;;
esac
shift
done
MAIN_PATH="$PWD"
DEST_PATH="$MAIN_PATH/bagheera_search_lib"
LIB_NAME="libbaloo_wrapper.so"
WORK_PATH="$MAIN_PATH/baloo_wrapper"
FUNC_NAME1="execute_baloo_query"
FUNC_NAME2="get_file_properties"
if (( OVERWRITE )) || ! [ -f "$DEST_PATH/$LIB_NAME" ] && [ -d "$WORK_PATH" ]; then
BUILD_PATH="$WORK_PATH/build"
rm -Rf "$BUILD_PATH" && mkdir -p "$BUILD_PATH" && cd "$BUILD_PATH" && cmake .. && make && cp "$LIB_NAME" "$DEST_PATH"
ldd "$DEST_PATH/$LIB_NAME" | grep Baloo
nm -D "$DEST_PATH/$LIB_NAME" | grep "$FUNC_NAME1"
nm -D "$DEST_PATH/$LIB_NAME" | grep "$FUNC_NAME2"
fi

1
requirements.txt Normal file
View File

@@ -0,0 +1 @@
lmdb

127
setup.py Normal file
View File

@@ -0,0 +1,127 @@
import os
import subprocess
import sys
from setuptools import setup
from setuptools.command.install import install
from setuptools.command.develop import develop
from setuptools.command.build_ext import build_ext
def compile_wrapper():
"""
Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda
detectada para KFileMetaData en KF6.
"""
base_path = os.path.abspath(os.path.dirname(__file__))
source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp')
output_lib = os.path.join(base_path, 'libbaloo_wrapper.so')
if not os.path.exists(source_file):
print(f"✘ Error: Source file not found at {source_file}")
sys.exit(1)
# Paquetes para pkg-config (nombres comunes en KF6)
packages = [
'KF6Baloo',
'KF6BalooEngine',
'KF6FileMetadata',
'KF6CoreAddons',
'Qt6Core'
]
cflags = []
libs = []
print("Detecting KF6 dependencies...")
for pkg in packages:
try:
cf = subprocess.check_output(['pkg-config', '--cflags', pkg],
text=True).split()
lb = subprocess.check_output(['pkg-config', '--libs', pkg],
text=True).split()
cflags.extend(cf)
libs.extend(lb)
print(f" [OK] {pkg}")
except subprocess.CalledProcessError:
print(f" [!] Warning: pkg-config could not find {pkg}")
# CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA:
# Añadimos el nivel intermedio para que <KFileMetaData/ExtractorCollection>
# se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/
extra_includes = [
'-I/usr/include/KF6',
'-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/
'-I/usr/include/qt6',
'-I/usr/include/qt6/QtCore'
]
cflags = list(set(cflags + extra_includes))
libs = list(set(libs))
# Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2]
compile_cmd = [
'g++', '-shared', '-o', output_lib,
'-fPIC', '-std=c++17',
source_file
] + cflags + libs
try:
print(f"Executing compilation:\n{' '.join(compile_cmd)}")
subprocess.check_call(compile_cmd)
if os.path.exists(output_lib):
print(f"✔ Successfully compiled: {output_lib}")
else:
raise FileNotFoundError("Compilation finished but .so file is missing.")
except subprocess.CalledProcessError as e:
print(f"\n✘ Compilation failed (Exit code {e.returncode}).")
sys.exit(1)
class CustomInstall(install):
def run(self):
compile_wrapper()
super().run()
class CustomDevelop(develop):
def run(self):
compile_wrapper()
super().run()
class CustomBuildExt(build_ext):
def run(self):
compile_wrapper()
super().run()
setup(
name="bagheerasearch",
version="1.0.0",
author="Ignacio Serantes",
description="Bagheera Search Tool & Lib (KF6/C++17)",
py_modules=["bagheerasearch"],
package_dir={
"": ".",
"bagheera_query_parser_lib": "bagheera_query_parser_lib",
"bagheera_search_lib": "bagheera_search_lib",
"baloo_tools": "baloo_tools",
},
packages=[
"bagheera_query_parser_lib",
"bagheera_search_lib",
"baloo_tools"
],
install_requires=["lmdb"],
entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']},
cmdclass={
'install': CustomInstall,
'develop': CustomDevelop,
'build_ext': CustomBuildExt,
},
data_files=[('lib', ['libbaloo_wrapper.so'])],
include_package_data=True,
zip_safe=False,
)

90
syntax.txt Normal file
View File

@@ -0,0 +1,90 @@
bagheera [options] query [--recursive [query]] [options]
options:
-a | --date Date filter. Format year[-month[-day]]
-d | --directory Base directory to execute main query.
-h | --help [help] Print help. Optional help parameter can be 'attributes', 'dates', 'examples', 'having', 'query', 'recursive' or 'types'.
-g | --having <expression> Results not matching expression are ignored.
-i | --ids Add file ID prior to file name.
-l | --limit Maximum number of results to print.
-o | --offset Offset to first result to print.
-r | --recursive [query] Enable directory results recursion. An optional query for recursive results could be used.
-e | --recursive-depth Maximun directory recursion depth.
-c | --recursive-having <expression> Recursive results not matching expression are ignored.
-y | --recursive-type <type> File type filter for recursive results, use --help types to obtain information about available types.
-t | --silent Print basic info only, aka only print summary.
-s | --sort < sort [ < ASC | DES > ] > Sort obtained results, by default results are not sorted. Sort types available are: automatic, date, default and name.
-t | --type <type> File type filter for results, use --help types to obtain information about available types.
-v | --verbose More verbosity.
--version Print version information.
Remarks:
· query searchs only for independent words with size greather than 3, even if phrase is quoted or double quoted.
· having filters only in results obtained by queries, but supports quoted or double quoted phrases and is not limited by word size.
· the NOT logical operator has a high processing and memory cost, so it is preferable to limit its use.
· compare two attributes is possible but is more expensive than compare a constant value. The same remark with NOT applies here.
· recursive mode recurse over all directories found, but without a recursive query, or recursive-depth param, could obtain many results if there are many trees and leafs in directory arborescence.
· when there are duplicate parameters only last parameter value is used and previous parameter values are ignored.
· sort parameter must process all results first, so there is a performance impact that depends on the size of the result set.
Syntax reference:
query :== <expression> | [ NOT <sep> ] ( <expression> )
expression :== [ NOT <sep> ] <term> [ <sep> [ < AND | OR > <sep> ] [ NOT <sep> ] <term> ]
<sep> ::= <space> | <tab>
<term> ::= <simple_term> | <quoted_term> | <double_quoted_term>
<simple_term> ::= <value> | [ <attribute> <logop> [ comp_value ] ] | <date_expr> | <image_orientation_expr>
<quoted_term> ::= <quote> <simple_term> <quote>
<double_quoted_term> ::= <double_quote> <simple_term> <double_quote>
<comp_value> ::= <attribute> | <value>
<value> ::= <date> | <integer> | <number> | <rating> | <string>
<attribute> ::= album | albumartist | artist | author | composer | lyricist | height | person | title | width | ...
<date> ::= aaaammdd | aaaa-mm-dd | dd-mm-aaaa | dd/mm/aaaa | [ + | - ] <number> [ d | m | w | y ]
<integer> ::= <unsigned integer>
<number> ::= <integer> [ . <integer> ]
<rating> ::= <1..10>
<string> ::= < <char> | <quote> [ <char_without_quote> ] <quote> | <double_quote> [ <char_without_double_quote> ] <double_quote> >...
<char> ::= < any utf-8 character except <quote> or <double_quote> >
<char_without_quote> ::= < any utf-8 character except <quote> >
<char_without_double_quote> ::= < any utf-8 character except <double_quote> >
<space> ::= < >
<tab> ::= < >
<quote> ::= <'>
<double_quote> ::= <">
<logop> ::= = | : | != | <> | >= | <= | > | <
<date_expr> ::= MODIFIED < TODAY | YESTERDAY | LAST [date_length] <date_period> | [date_length] <date_period> AGO >
<date_length> ::= <number> | ONE | TWO | THREE | ...
<date_period> ::== DAYS | DAY | MONTHS | MONTH | WEEKS | WEEK | YEARS | YEAR
<image_orientation_expr> ::= LANDSCAPE | PORTRAID | SQUARE
<having_expr> ::= <having_term> | [ NOT <sep> ] ( <having_term> )
<having_term> ::= [ NOT <sep> ] <having_simple_term> [ <sep> [ < AND | OR > <sep> ] [ NOT <sep> ] <having_simple_term> ]
<having_simple_term> ::= <string> | <quoted_string> | <double_quoted_string>