This commit is contained in:
Ignacio Serantes
2026-05-09 10:26:57 +02:00
parent 3fb55ee4f3
commit 6207cab27a
7 changed files with 622 additions and 177 deletions

View File

@@ -1,7 +1,14 @@
from .bagheera_search import BagheeraSearcher from .bagheera_search import (
BagheeraSearcher, EvaluateExpression
)
def search(query): def search(query):
"""Interfaz simplificada para la librería.""" """Interfaz simplificada para la librería."""
bs = BagheeraSearcher() bs = BagheeraSearcher()
return bs.search(query) return bs.search(query)
def create_evaluator(expression):
ee = EvaluateExpression()
return ee.compile(expression)

View File

@@ -10,9 +10,136 @@ import sys
from pathlib import Path from pathlib import Path
from typing import Dict, Any, Iterator, Optional, Union from typing import Dict, Any, Iterator, Optional, Union
from baloo_tools import get_resolution from baloo_tools import (get_info, get_tags)
from bagheera_query_parser_lib import parse_date from bagheera_query_parser_lib import parse_date
from pyparsing import (
alphanums, one_of, infix_notation,
Group, opAssoc, ParserElement, QuotedString, Word
)
ParserElement.enable_packrat()
def expression_contains_property(text):
pattern = r"\b(?!tags\b)\w+[ \t]*(?:>=|<=|!=|=|>|<|:)"
return bool(re.search(pattern, text, re.IGNORECASE))
def expression_contains_tags(text):
pattern = r"\btags\b[ \t]*(?:>=|<=|!=|=|>|<|:)"
return bool(re.search(pattern, text, re.IGNORECASE))
class EvaluateExpression:
def __init__(self):
# Pre-define the grammar structure during initialization
self.grammar = self._build_grammar()
def _compare_single(self, l_val, op, r_val):
"""
Atomic comparison logic for individual values.
Handles numeric conversion and standard operators.
"""
# Numeric conversion for mathematical operators
if op in (">", "<", ">=", "<="):
try:
# Attempt to treat both sides as floats
curr_l, curr_r = float(l_val), float(r_val)
except (ValueError, TypeError):
# Fallback to string comparison if conversion fails
curr_l, curr_r = str(l_val), str(r_val)
else:
# Default to string representation for other operators
curr_l, curr_r = str(l_val), str(r_val)
# Standard operator logic
if op == "=":
return l_val == r_val
if op == "!=":
return l_val != r_val
if op == ">":
return curr_l > curr_r
if op == "<":
return curr_l < curr_r
if op == ">=":
return curr_l >= curr_r
if op == "<=":
return curr_l <= curr_r
if op == ":":
return str(r_val).lower() in str(l_val).lower()
return False
def _compare(self, data, left_key, op, right_val):
"""
Main comparison router. Checks if the field is a list or a single value.
"""
# Normalize data keys to lowercase for case-insensitive lookup
normalized_data = {k.lower(): v for k, v in data.items()}
# Extract the left-hand value (the field from the JSON)
l_val = normalized_data.get(left_key.lower(), left_key)
# Extract the right-hand value (check if it's a literal or another field)
r_val = normalized_data.get(str(right_val).lower(), right_val)
# IF THE FIELD VALUE IS A LIST
if isinstance(l_val, list):
# Return True if ANY item in the list satisfies the condition
return any(self._compare_single(item, op, r_val) for item in l_val)
# IF THE FIELD VALUE IS A SINGLE DATA POINT
return self._compare_single(l_val, op, r_val)
def _build_grammar(self):
"""
Defines the pyparsing grammar for the expression engine.
"""
operators = one_of(">= <= != = > < :")
identifier = Word(alphanums + "_./\\")
quoted_string = QuotedString("'") | QuotedString('"')
operand = quoted_string | identifier
# Define basic condition (e.g., "width > 100" or "word")
condition = Group((operand + operators + operand) | operand)
# Attach the parse action to convert tokens into executable functions (lambdas)
condition.set_parse_action(lambda t: self._create_evaluator_func(t[0]))
return infix_notation(
condition,
[
("NOT", 1, opAssoc.RIGHT, lambda t: (
lambda data: not t[0][1](data))),
("AND", 2, opAssoc.LEFT, lambda t: (
lambda data: all(f(data) for f in t[0] if callable(f)))),
("OR", 2, opAssoc.LEFT, lambda t: (
lambda data: any(f(data) for f in t[0] if callable(f)))),
],
)
def _create_evaluator_func(self, tokens):
"""
Creates a closure that captures tokens and waits for the data dictionary.
"""
if len(tokens) == 1:
# Rule: Single term -> path CONTAINS term
return lambda data: self._compare(data, 'path', ':', tokens[0])
else:
# Rule: Explicit triplet (key, operator, value)
return lambda data: self._compare(data, tokens[0], tokens[1], tokens[2])
def compile(self, expression):
"""
Parses the expression once and returns a reusable function.
"""
try:
return self.grammar.parse_string(expression, parse_all=True)[0]
except Exception as e:
print(f"Compilation Error: {e}")
# Fallback: return a function that always fails gracefully
return lambda data: False
class BagheeraSearcher: class BagheeraSearcher:
"""Class to handle Baloo searches and interact with the C wrapper.""" """Class to handle Baloo searches and interact with the C wrapper."""
@@ -69,84 +196,8 @@ class BagheeraSearcher:
return lib return lib
def check_keywords(
self, text: str, query: str, file_path: str = "", file_id: int = 0
) -> bool:
"""
Evaluates if a text meets a logical query.
Supports: AND, OR, ( ), dimensions (width=height, etc.), and shapes.
"""
if file_path:
try:
w, h = get_resolution(file_id)
except Exception:
w, h = -1, -1
def replace_dim(match: re.Match) -> str:
if w <= 0 or h <= 0:
return "__false__"
s = match.group(0).upper()
if "PORTRAIT" in s:
return "__true__" if w < h else "__false__"
if "LANDSCAPE" in s:
return "__true__" if w > h else "__false__"
if "SQUARE" in s:
return "__true__" if w == h else "__false__"
op = match.group(1)
ops_map = {
"=": w == h,
">": w > h,
"<": w < h,
">=": w >= h,
"<=": w <= h,
"!=": w != h,
}
return "__true__" if ops_map.get(op, False) else "__false__"
query = re.sub(
r"\b(PORTRAIT|LANDSCAPE|SQUARE)\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
query = re.sub(
r"\bwidth\s*(<=|>=|!=|<|>|=)\s*height\b",
replace_dim,
query,
flags=re.IGNORECASE,
)
text = text.lower()
query = re.sub(r"(?<=\w)\s+(?=\w)", " AND ", query)
tokens = re.findall(r"\(|\)|OR|AND|[^\s()]+", query)
regex_parts = []
for t in tokens:
if t in ("(", ")"):
regex_parts.append(t)
elif t == "OR":
regex_parts.append("|")
elif t == "AND":
continue
elif t == "__true__":
regex_parts.append("(?=.*)")
elif t == "__false__":
regex_parts.append("(?!)")
else:
regex_parts.append(rf"(?=.*{re.escape(t)})")
final_regex = "".join(regex_parts).lower()
try:
return bool(re.search(f"^{final_regex}.*", text, re.DOTALL))
except re.error:
return False
def get_baloo_info(self, file_path: str) -> Dict[str, str]: def get_baloo_info(self, file_path: str) -> Dict[str, str]:
"""Retrieves properties for a specific file from Baloo.""" """Extract properties for a specific file directly from file."""
result = self.baloo_lib.get_file_properties(file_path.encode("utf-8")) result = self.baloo_lib.get_file_properties(file_path.encode("utf-8"))
if not result: if not result:
return {} return {}
@@ -181,6 +232,8 @@ class BagheeraSearcher:
options: Dict[str, Any], options: Dict[str, Any],
search_opts: Dict[str, Any], search_opts: Dict[str, Any],
files_count: int, files_count: int,
exclude_evaluator: Any,
exclude_sources: Dict[str, bool]
) -> Iterator[Dict[str, Any]]: ) -> Iterator[Dict[str, Any]]:
"""Executes a recursive search yielded item by item.""" """Executes a recursive search yielded item by item."""
options["query"] = query_text options["query"] = query_text
@@ -195,15 +248,20 @@ class BagheeraSearcher:
continue continue
self.ids_processed.add(file_id) self.ids_processed.add(file_id)
rec_exclude = search_opts.get("recursive_exclude")
if not rec_exclude or not self.check_keywords( if exclude_evaluator:
item["path"], rec_exclude, item["path"], file_id file_info = {'path': item["path"]}
): if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):
file_info = file_info | get_tags(file_id)
else:
file_info = None
if not file_info or not exclude_evaluator(file_info):
if files_count >= search_opts.get("offset", 0): if files_count >= search_opts.get("offset", 0):
search_opts["limit"] -= 1 search_opts["limit"] -= 1
yield item yield item
files_count += 1 files_count += 1
def search( def search(
@@ -215,6 +273,30 @@ class BagheeraSearcher:
""" """
Main search generator. Yields file dictionaries. Main search generator. Yields file dictionaries.
""" """
if search_opts['exclude']:
ee = EvaluateExpression()
exclude_evaluator = ee.compile(search_opts['exclude'])
exclude_sources = {}
if expression_contains_property(search_opts['exclude']):
exclude_sources['properties'] = True
if expression_contains_tags(search_opts['exclude']):
exclude_sources['tags'] = True
else:
exclude_evaluator = None
exclude_sources = {}
if search_opts['recursive_exclude']:
ee = EvaluateExpression()
recurse_exclude_evaluator = ee.compile(search_opts['recursive_exclude'])
recurse_exclude_sources = {}
if expression_contains_property(search_opts['recursive_exclude']):
recurse_exclude_sources['properties'] = True
if expression_contains_tags(search_opts['recursive_exclude']):
recurse_exclude_sources['tags'] = True
else:
recurse_exclude_evaluator = None
recurse_exclude_sources = {}
main_options["query"] = parse_date(query_text) main_options["query"] = parse_date(query_text)
files = self._execute_query(main_options) files = self._execute_query(main_options)
@@ -241,15 +323,22 @@ class BagheeraSearcher:
continue continue
self.ids_processed.add(file_id) self.ids_processed.add(file_id)
exclude_pattern = search_opts.get("exclude")
if not exclude_pattern or not self.check_keywords( if exclude_evaluator:
item["path"], exclude_pattern, item["path"], file_id file_info = {'path': item["path"]}
): if exclude_sources.get('properties'):
file_info = file_info | get_info(file_id)
if exclude_sources.get('tags'):
file_info = file_info | get_tags(file_id)
else:
file_info = None
if not file_info or not exclude_evaluator(file_info):
if is_recursive: if is_recursive:
main_options["directory"] = item["path"] main_options["directory"] = item["path"]
yield from self.search_recursive( yield from self.search_recursive(
query_text, main_options, search_opts, files_count query_text, main_options, search_opts, files_count,
recurse_exclude_evaluator, recurse_exclude_sources
) )
else: else:
yield item yield item

View File

@@ -5,7 +5,7 @@ Bagheera Search Tool - CLI Client
""" """
__appname__ = "BagheeraSearch" __appname__ = "BagheeraSearch"
__version__ = "1.0" __version__ = "1.1"
__author__ = "Ignacio Serantes" __author__ = "Ignacio Serantes"
__email__ = "kde@aynoa.net" __email__ = "kde@aynoa.net"
__license__ = "LGPL" __license__ = "LGPL"
@@ -24,9 +24,9 @@ from bagheera_search_lib import BagheeraSearcher
# --- CONFIGURATION --- # --- CONFIGURATION ---
PROG_NAME = "Bagheera Search Tool" PROG_NAME = "Bagheera Search Tool"
PROG_ID = "bagheerasearch" PROG_ID = "bagheerasearch"
PROG_VERSION = "1.0" PROG_VERSION = __version__
PROG_BY = "Ignacio Serantes" PROG_BY = __author__
PROG_DATE = "2026-03-19" PROG_DATE = "2026-05-09"
CONFIG_DIR = Path.home() / ".config" / PROG_ID CONFIG_DIR = Path.home() / ".config" / PROG_ID
CONFIG_FILE = CONFIG_DIR / "config.json" CONFIG_FILE = CONFIG_DIR / "config.json"
@@ -61,9 +61,9 @@ Baloo offers a rich syntax for searching through your files. Certain attributes
For example 'type' can be used to filter for files based on their general type: For example 'type' can be used to filter for files based on their general type:
type:Audio or type:Document type:Audio OR type:Document
The following comparison operators are supported, but note that 'not equal' operator is not available. The following comparison operators are supported, but note that 'not equal' (!=) operator is not available.
· : - contains (only for text comparison) · : - contains (only for text comparison)
· = - equal · = - equal
· > - greater than · > - greater than
@@ -72,7 +72,6 @@ The following comparison operators are supported, but note that 'not equal' oper
· <= - less than or equal to · <= - less than or equal to
Currently the following types are supported: Currently the following types are supported:
· Archive · Archive
· Folder · Folder
· Audio · Audio
@@ -83,9 +82,77 @@ Currently the following types are supported:
· Presentation · Presentation
· Text · Text
These expressions can be combined using AND or OR and additional parenthesis, but note that 'NOT' logical operator is not available. These expressions can be combined using logical operators 'AND' or 'OR' and additional parenthesis, but note that 'NOT' logical operator is not available.
The full list of properties which can be searched is listed below. They are grouped by file types.
All Files
· filename
· mimetype
· modified
· rating
· tags
· userComment
Audio
· Album
· AlbumArtist
· Artist
· BitRate
· Channels
· Comment
· Composer
· Duration
· Genre
· Lyricist
· ReleaseYear
· SampleRate
· TrackNumber
Documents
· Author
· Copyright
· CreationDate
· Generator
· Keywords
· Language
· LineCount
· PageCount
· Publisher
· Subject
· Title
· WordCount
Media
· AspectRatio
· FrameRate
· Height
· ImageDateTime
· ImageMake
· ImageModel
· ImageOrientation
· Images
· PhotoApertureValue
· PhotoDateTimeOriginal
· PhotoExposureBiasValue
· PhotoExposureTime
· PhotoFlash
· PhotoFNumber
· PhotoFocalLength
· PhotoFocalLengthIn35mmFilm
· PhotoGpsAltitude
· PhotoGpsLatitude
· PhotoGpsLongitude
· PhotoISOSpeedRatings
· PhotoMeteringMode
· PhotoPixelXDimension
· PhotoPixelYDimension
· PhotoSaturation
· PhotoSharpness
· PhotoWhiteBalance
· Width
[... omitted for brevity, but includes the full list of searchable properties as in your original script ...]
{PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine. {PROG_NAME} recognizes some natural language sentences in English, as long as they are capitalized, and transforms them into queries that can be interpreted by the search engine.
@@ -93,21 +160,18 @@ Supported natural language sentences and patterns for queries are:
· MODIFIED TODAY · MODIFIED TODAY
· MODIFIED YESTERDAY · MODIFIED YESTERDAY
· MODIFIED THIS [ DAY | WEEK | MONTH | YEAR ] · MODIFIED THIS [ DAY | WEEK | MONTH | YEAR ]
· LAST <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] · MODIFIED LAST <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ]
· <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] AGO · MODIFIED <NUMBER> [ DAYS | WEEKS | MONTHS | YEARS ] AGO
<NUMBER> can be any number or a number text from ONE to TWENTY. <NUMBER> can be any number or a number text from ONE to TWENTY.
Remarks: LAST DAY, if used, is interpreted as YESTERDAY.
Supported expressions for --exclude and --recursive-exclude are: The --exclude and --recursive-exclude options allow you to filter files out of the results. The syntax for both options supports parentheses and logical operators (AND, OR, and NOT) to combine multiple patterns.
· width<CMP_OP>height - only if file has width and height properties In addition to standard query comparison operators, the not equal (!=) operator is available for comparing properties against specific values. Furthermore, you can compare two properties directly; for example, 'width > height' is a valid expression.
· height<CMP_OP>width - only if file has width and height properties Remarks:
· PORTRAIT - only if file width is greater or equal to height · All text comparison are case insensitive.
· LANDSCAPE - only if file height is greater or equal to width · Tags comparisons are performed against both individual full tag string (using the '/' character as a level separator) and each individual level. All individual level values are normalized to lowercase and stripped of accents or diacritics. For example, a file tagged as 'Opera,Person/María Callas,Singer' would match any of the following elements: ['Opera', 'Person/María Callas', 'Singer', 'callas', 'maria', 'opera', 'person', 'singer']."
· SQUARE - only if file width equals to height · Only text and numeric data are supported."""
<CMP_OP> can be: != | >= | <= | = | > | <"""
print(help_query) print(help_query)
@@ -146,7 +210,7 @@ def main():
parser.add_argument("--day", type=int, help="day fixed filter, --month is required") parser.add_argument("--day", type=int, help="day fixed filter, --month is required")
parser.add_argument("--month", type=int, help="month fixed filter, --year is required") parser.add_argument("--month", type=int, help="month fixed filter, --year is required")
parser.add_argument("--year", type=int, help="year filter fixed filter") parser.add_argument("--year", type=int, help="year fixed filter")
parser.add_argument("--help-query", action="store_true", help="show query syntax help") parser.add_argument("--help-query", action="store_true", help="show query syntax help")
parser.add_argument("--version", action="store_true", help="show version information") parser.add_argument("--version", action="store_true", help="show version information")
@@ -163,7 +227,7 @@ def main():
raise ValueError("Missing --month (required when --day is used)") raise ValueError("Missing --month (required when --day is used)")
if args.month is not None and args.year is None: if args.month is not None and args.year is None:
raise ValueError("Missing --year (requered when --month is used)") raise ValueError("Missing --year (required when --month is used)")
if args.help_query: if args.help_query:
print_help_query() print_help_query()

View File

@@ -1,7 +1,19 @@
from .baloo_tools import BalooTools from .baloo_tools import BalooTools
def get_info(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_info(id)
def get_resolution(id): def get_resolution(id):
"""Interfaz simplificada para la librería.""" """Interfaz simplificada para la librería."""
tools = BalooTools() tools = BalooTools()
return tools.get_resolution(id) return tools.get_resolution(id)
def get_tags(id):
"""Interfaz simplificada para la librería."""
tools = BalooTools()
return tools.get_tags(id)

View File

@@ -8,9 +8,97 @@ Helper functions to interact directly with the Baloo LMDB index.
import json import json
import lmdb import lmdb
import os import os
import re
import sys import sys
from typing import Tuple from typing import Tuple
PROPERTIES_ID_MAP = {
'0': 'Empty',
'1': 'BitRate',
'2': 'Channels',
'3': 'Duration',
'4': 'Genre',
'5': 'SampleRate',
'6': 'TrackNumber',
'7': 'ReleaseYear',
'8': 'Comment',
'9': 'Artist',
'10': 'Album',
'11': 'AlbumArtist',
'12': 'Composer',
'13': 'Lyricist',
'14': 'Author',
'15': 'Title',
'16': 'Subject',
'17': 'Generator',
'18': 'PageCount',
'19': 'WordCount',
'20': 'LineCount',
'21': 'Language',
'22': 'Copyright',
'23': 'Publisher',
'24': 'CreationDate',
'25': 'Keywords',
'26': 'Width',
'27': 'Height',
'28': 'AspectRatio',
'29': 'FrameRate',
'30': 'Manufacturer',
'31': 'Model',
'32': 'ImageDateTime',
'33': 'ImageOrientation',
'34': 'PhotoFlash',
'35': 'PhotoPixelXDimension',
'36': 'PhotoPixelYDimension',
'37': 'PhotoDateTimeOriginal',
'38': 'PhotoFocalLength',
'39': 'PhotoFocalLengthIn35mmFilm',
'40': 'PhotoExposureTime',
'41': 'PhotoFNumber',
'42': 'PhotoApertureValue',
'43': 'PhotoExposureBiasValue',
'44': 'PhotoWhiteBalance',
'45': 'PhotoMeteringMode',
'46': 'PhotoISOSpeedRatings',
'47': 'PhotoSaturation',
'48': 'PhotoSharpness',
'49': 'PhotoGpsLatitude',
'50': 'PhotoGpsLongitude',
'51': 'PhotoGpsAltitude',
'52': 'TranslationUnitsTotal',
'53': 'TranslationUnitsWithTranslation',
'54': 'TranslationUnitsWithDraftTranslation',
'55': 'TranslationLastAuthor',
'56': 'TranslationLastUpDate',
'57': 'TranslationTemplateDate',
'58': 'OriginUrl',
'59': 'OriginEmailSubject',
'60': 'OriginEmailSender',
'61': 'OriginEmailMessageId',
'62': 'DiscNumber',
'63': 'Location',
'64': 'Performer',
'65': 'Ensemble',
'66': 'Arranger',
'67': 'Conductor',
'68': 'Opus',
'69': 'Label',
'70': 'Compilation',
'71': 'License',
'72': 'Rating',
'73': 'Lyrics',
'74': 'ReplayGainAlbumPeak',
'75': 'ReplayGainAlbumGain',
'76': 'ReplayGainTrackPeak',
'77': 'ReplayGainTrackGain',
'78': 'Description',
'79': 'VideoCodec',
'80': 'AudioCodec',
'81': 'PixelFormat',
'82': 'ColorSpace',
'83': 'AssistiveAlternateDescription'
}
class BalooTools: class BalooTools:
"""Class to interact directly with the Baloo LMDB index.""" """Class to interact directly with the Baloo LMDB index."""
@@ -21,16 +109,15 @@ class BalooTools:
os.path.expanduser("~"), ".local/share/baloo/index" os.path.expanduser("~"), ".local/share/baloo/index"
) )
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]: def get_info(self, file_id: int) -> json:
""" """
Retrieves the width and height of an image/video from the Baloo index. Retrieves file metadata from the Baloo index.
Args: Args:
file_id: The integer ID of the file. file_id: The integer ID of the file.
sep: Separator used (unused currently, kept for compatibility).
Returns: Returns:
A tuple of (width, height) integers. Returns (-1, -1) if not found. A json with all file metadata fields.
""" """
try: try:
# Using context manager ensures the environment is closed properly # Using context manager ensures the environment is closed properly
@@ -58,16 +145,86 @@ class BalooTools:
try: try:
jvalue = json.loads(value.decode()) jvalue = json.loads(value.decode())
# Baloo stores width in '26' and height in '27' return {PROPERTIES_ID_MAP.get(k, k):
return jvalue.get('26', -1), jvalue.get('27', -1) v for k, v in jvalue.items()}
except (json.JSONDecodeError, KeyError): except (json.JSONDecodeError, KeyError):
return -1, -1 return {}
except lmdb.Error as e: except lmdb.Error as e:
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr) print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
return {}
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
"""
Retrieves the width and height of an image/video from the Baloo index.
Args:
file_id: The integer ID of the file.
sep: Separator used (unused currently, kept for compatibility).
Returns:
A tuple of (width, height) integers. Returns (-1, -1) if not found.
"""
file_info = self.get_info(file_id)
try:
return file_info.get('26', -1), file_info.get('27', -1)
except (json.JSONDecodeError, KeyError):
return -1, -1 return -1, -1
def get_tags(self, file_id: int) -> json:
"""
Retrieves a string with all file tags from the Baloo index.
Args:
file_id: The integer ID of the file.
Returns:
A json with a field called tags with all tags comma separated.
"""
try:
# Using context manager ensures the environment is closed properly
with lmdb.Environment(
self.baloo_db_path,
subdir=False,
readonly=True,
lock=False,
max_dbs=20
) as env:
document_data_db = env.open_db(b'docxatrrterms')
with env.begin() as txn:
cursor = txn.cursor(document_data_db)
# Convert ID to 8-byte little-endian format
file_id_bytes = int.to_bytes(
file_id, length=8, byteorder='little', signed=False
)
if cursor.set_range(file_id_bytes):
for key, value in cursor:
if key != file_id_bytes:
break
text = value.decode('utf-8', errors='replace')
text = re.sub(r'\x00(?![T])', '', text)
parts = re.split(r'[\x00\x01]', text)
tags = []
for p in parts:
p = p.strip()
if p:
tag = p.removeprefix('TAG-').removeprefix('TA')
tags.append(tag)
return {'tags': tags}
# return {'tags': ",".join(tags)}
except lmdb.Error as e:
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
return {}
# Helper function to maintain compatibility with bagheera_search_lib.py # Helper function to maintain compatibility with bagheera_search_lib.py
# since it imports `get_resolution` directly. # since it imports `get_resolution` directly.

39
pyproject.toml Normal file
View File

@@ -0,0 +1,39 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "bagheerasearch"
version = "1.1.0"
authors = [
{ name="Ignacio Serantes" },
]
description = "Bagheera Search Tool"
readme = "README.md"
requires-python = ">=3.8"
dependencies = [
"lmdb",
"pyparsing",
]
[project.scripts]
bagheerasearch = "bagheerasearch:main"
[tool.setuptools]
py-modules = ["bagheerasearch"]
packages = [
"bagheera_query_parser_lib",
"bagheera_search_lib",
"baloo_tools"
]
include-package-data = true
zip-safe = false
[tool.setuptools.package-dir]
"" = "."
"bagheera_query_parser_lib" = "bagheera_query_parser_lib"
"bagheera_search_lib" = "bagheera_search_lib"
"baloo_tools" = "baloo_tools"
[tool.setuptools.package-data]
"*" = ["libbaloo_wrapper.so"]

193
setup.py
View File

@@ -6,12 +6,7 @@ from setuptools.command.install import install
from setuptools.command.develop import develop from setuptools.command.develop import develop
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
def compile_wrapper(): def compile_wrapper():
"""
Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda
detectada para KFileMetaData en KF6.
"""
base_path = os.path.abspath(os.path.dirname(__file__)) base_path = os.path.abspath(os.path.dirname(__file__))
source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp') source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp')
output_lib = os.path.join(base_path, 'libbaloo_wrapper.so') output_lib = os.path.join(base_path, 'libbaloo_wrapper.so')
@@ -20,37 +15,22 @@ def compile_wrapper():
print(f"✘ Error: Source file not found at {source_file}") print(f"✘ Error: Source file not found at {source_file}")
sys.exit(1) sys.exit(1)
# Paquetes para pkg-config (nombres comunes en KF6) packages = ['KF6Baloo', 'KF6BalooEngine', 'KF6FileMetadata', 'KF6CoreAddons', 'Qt6Core']
packages = [
'KF6Baloo',
'KF6BalooEngine',
'KF6FileMetadata',
'KF6CoreAddons',
'Qt6Core'
]
cflags = [] cflags = []
libs = [] libs = []
print("Detecting KF6 dependencies...")
for pkg in packages: for pkg in packages:
try: try:
cf = subprocess.check_output(['pkg-config', '--cflags', pkg], cf = subprocess.check_output(['pkg-config', '--cflags', pkg], text=True).split()
text=True).split() lb = subprocess.check_output(['pkg-config', '--libs', pkg], text=True).split()
lb = subprocess.check_output(['pkg-config', '--libs', pkg],
text=True).split()
cflags.extend(cf) cflags.extend(cf)
libs.extend(lb) libs.extend(lb)
print(f" [OK] {pkg}")
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
print(f" [!] Warning: pkg-config could not find {pkg}") print(f" [!] Warning: pkg-config could not find {pkg}")
# CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA:
# Añadimos el nivel intermedio para que <KFileMetaData/ExtractorCollection>
# se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/
extra_includes = [ extra_includes = [
'-I/usr/include/KF6', '-I/usr/include/KF6',
'-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/ '-I/usr/include/KF6/KFileMetaData',
'-I/usr/include/qt6', '-I/usr/include/qt6',
'-I/usr/include/qt6/QtCore' '-I/usr/include/qt6/QtCore'
] ]
@@ -58,7 +38,6 @@ def compile_wrapper():
cflags = list(set(cflags + extra_includes)) cflags = list(set(cflags + extra_includes))
libs = list(set(libs)) libs = list(set(libs))
# Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2]
compile_cmd = [ compile_cmd = [
'g++', '-shared', '-o', output_lib, 'g++', '-shared', '-o', output_lib,
'-fPIC', '-std=c++17', '-fPIC', '-std=c++17',
@@ -68,60 +47,158 @@ def compile_wrapper():
try: try:
print(f"Executing compilation:\n{' '.join(compile_cmd)}") print(f"Executing compilation:\n{' '.join(compile_cmd)}")
subprocess.check_call(compile_cmd) subprocess.check_call(compile_cmd)
if os.path.exists(output_lib):
print(f"✔ Successfully compiled: {output_lib}")
else:
raise FileNotFoundError("Compilation finished but .so file is missing.")
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print(f"\n✘ Compilation failed (Exit code {e.returncode}).") print(f"\n✘ Compilation failed.")
sys.exit(1) sys.exit(1)
class CustomInstall(install): class CustomInstall(install):
def run(self): def run(self):
compile_wrapper() compile_wrapper()
super().run() super().run()
class CustomDevelop(develop): class CustomDevelop(develop):
def run(self): def run(self):
compile_wrapper() compile_wrapper()
super().run() super().run()
class CustomBuildExt(build_ext): class CustomBuildExt(build_ext):
def run(self): def run(self):
compile_wrapper() compile_wrapper()
super().run() super().run()
# Solo dejamos cmdclass, el resto se lee de pyproject.toml
setup( setup(
name="bagheerasearch",
version="1.0.0",
author="Ignacio Serantes",
description="Bagheera Search Tool & Lib (KF6/C++17)",
py_modules=["bagheerasearch"],
package_dir={
"": ".",
"bagheera_query_parser_lib": "bagheera_query_parser_lib",
"bagheera_search_lib": "bagheera_search_lib",
"baloo_tools": "baloo_tools",
},
packages=[
"bagheera_query_parser_lib",
"bagheera_search_lib",
"baloo_tools"
],
install_requires=["lmdb"],
entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']},
cmdclass={ cmdclass={
'install': CustomInstall, 'install': CustomInstall,
'develop': CustomDevelop, 'develop': CustomDevelop,
'build_ext': CustomBuildExt, 'build_ext': CustomBuildExt,
}, }
data_files=[('lib', ['libbaloo_wrapper.so'])],
include_package_data=True,
zip_safe=False,
) )
# import os
# import subprocess
# import sys
# from setuptools import setup
# from setuptools.command.install import install
# from setuptools.command.develop import develop
# from setuptools.command.build_ext import build_ext
#
#
# def compile_wrapper():
# """
# Compila libbaloo_wrapper.so forzando la ruta de inclusión profunda
# detectada para KFileMetaData en KF6.
# """
# base_path = os.path.abspath(os.path.dirname(__file__))
# source_file = os.path.join(base_path, 'baloo_wrapper', 'baloo_wrapper.cpp')
# output_lib = os.path.join(base_path, 'libbaloo_wrapper.so')
#
# if not os.path.exists(source_file):
# print(f"✘ Error: Source file not found at {source_file}")
# sys.exit(1)
#
# # Paquetes para pkg-config (nombres comunes en KF6)
# packages = [
# 'KF6Baloo',
# 'KF6BalooEngine',
# 'KF6FileMetadata',
# 'KF6CoreAddons',
# 'Qt6Core'
# ]
#
# cflags = []
# libs = []
#
# print("Detecting KF6 dependencies...")
# for pkg in packages:
# try:
# cf = subprocess.check_output(['pkg-config', '--cflags', pkg],
# text=True).split()
# lb = subprocess.check_output(['pkg-config', '--libs', pkg],
# text=True).split()
# cflags.extend(cf)
# libs.extend(lb)
# print(f" [OK] {pkg}")
# except subprocess.CalledProcessError:
# print(f" [!] Warning: pkg-config could not find {pkg}")
#
# # CONFIGURACIÓN DE RUTAS SEGÚN TU SISTEMA:
# # Añadimos el nivel intermedio para que <KFileMetaData/ExtractorCollection>
# # se encuentre en /usr/include/KF6/KFileMetaData/KFileMetaData/
# extra_includes = [
# '-I/usr/include/KF6',
# '-I/usr/include/KF6/KFileMetaData', # Permite resolver KFileMetaData/
# '-I/usr/include/qt6',
# '-I/usr/include/qt6/QtCore'
# ]
#
# cflags = list(set(cflags + extra_includes))
# libs = list(set(libs))
#
# # Comando de compilación C++17 replicando tu CMakeLists.txt [cite: 1, 2]
# compile_cmd = [
# 'g++', '-shared', '-o', output_lib,
# '-fPIC', '-std=c++17',
# source_file
# ] + cflags + libs
#
# try:
# print(f"Executing compilation:\n{' '.join(compile_cmd)}")
# subprocess.check_call(compile_cmd)
#
# if os.path.exists(output_lib):
# print(f"✔ Successfully compiled: {output_lib}")
# else:
# raise FileNotFoundError("Compilation finished but .so file is missing.")
#
# except subprocess.CalledProcessError as e:
# print(f"\n✘ Compilation failed (Exit code {e.returncode}).")
# sys.exit(1)
#
#
# class CustomInstall(install):
# def run(self):
# compile_wrapper()
# super().run()
#
#
# class CustomDevelop(develop):
# def run(self):
# compile_wrapper()
# super().run()
#
#
# class CustomBuildExt(build_ext):
# def run(self):
# compile_wrapper()
# super().run()
#
#
# setup(
# name="bagheerasearch",
# version="1.0.0",
# author="Ignacio Serantes",
# description="Bagheera Search Tool & Lib (KF6/C++17)",
# py_modules=["bagheerasearch"],
# package_dir={
# "": ".",
# "bagheera_query_parser_lib": "bagheera_query_parser_lib",
# "bagheera_search_lib": "bagheera_search_lib",
# "baloo_tools": "baloo_tools",
# },
# packages=[
# "bagheera_query_parser_lib",
# "bagheera_search_lib",
# "baloo_tools"
# ],
# install_requires=["lmdb"],
# entry_points={'console_scripts': ['bagheerasearch=bagheerasearch:main']},
# cmdclass={
# 'install': CustomInstall,
# 'develop': CustomDevelop,
# 'build_ext': CustomBuildExt,
# },
# data_files=[('lib', ['libbaloo_wrapper.so'])],
# include_package_data=True,
# zip_safe=False,
# )