v1.1.0
This commit is contained in:
@@ -1,7 +1,19 @@
|
||||
from .baloo_tools import BalooTools
|
||||
|
||||
|
||||
def get_info(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_info(id)
|
||||
|
||||
|
||||
def get_resolution(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_resolution(id)
|
||||
|
||||
|
||||
def get_tags(id):
|
||||
"""Interfaz simplificada para la librería."""
|
||||
tools = BalooTools()
|
||||
return tools.get_tags(id)
|
||||
|
||||
@@ -8,9 +8,97 @@ Helper functions to interact directly with the Baloo LMDB index.
|
||||
import json
|
||||
import lmdb
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Tuple
|
||||
|
||||
PROPERTIES_ID_MAP = {
|
||||
'0': 'Empty',
|
||||
'1': 'BitRate',
|
||||
'2': 'Channels',
|
||||
'3': 'Duration',
|
||||
'4': 'Genre',
|
||||
'5': 'SampleRate',
|
||||
'6': 'TrackNumber',
|
||||
'7': 'ReleaseYear',
|
||||
'8': 'Comment',
|
||||
'9': 'Artist',
|
||||
'10': 'Album',
|
||||
'11': 'AlbumArtist',
|
||||
'12': 'Composer',
|
||||
'13': 'Lyricist',
|
||||
'14': 'Author',
|
||||
'15': 'Title',
|
||||
'16': 'Subject',
|
||||
'17': 'Generator',
|
||||
'18': 'PageCount',
|
||||
'19': 'WordCount',
|
||||
'20': 'LineCount',
|
||||
'21': 'Language',
|
||||
'22': 'Copyright',
|
||||
'23': 'Publisher',
|
||||
'24': 'CreationDate',
|
||||
'25': 'Keywords',
|
||||
'26': 'Width',
|
||||
'27': 'Height',
|
||||
'28': 'AspectRatio',
|
||||
'29': 'FrameRate',
|
||||
'30': 'Manufacturer',
|
||||
'31': 'Model',
|
||||
'32': 'ImageDateTime',
|
||||
'33': 'ImageOrientation',
|
||||
'34': 'PhotoFlash',
|
||||
'35': 'PhotoPixelXDimension',
|
||||
'36': 'PhotoPixelYDimension',
|
||||
'37': 'PhotoDateTimeOriginal',
|
||||
'38': 'PhotoFocalLength',
|
||||
'39': 'PhotoFocalLengthIn35mmFilm',
|
||||
'40': 'PhotoExposureTime',
|
||||
'41': 'PhotoFNumber',
|
||||
'42': 'PhotoApertureValue',
|
||||
'43': 'PhotoExposureBiasValue',
|
||||
'44': 'PhotoWhiteBalance',
|
||||
'45': 'PhotoMeteringMode',
|
||||
'46': 'PhotoISOSpeedRatings',
|
||||
'47': 'PhotoSaturation',
|
||||
'48': 'PhotoSharpness',
|
||||
'49': 'PhotoGpsLatitude',
|
||||
'50': 'PhotoGpsLongitude',
|
||||
'51': 'PhotoGpsAltitude',
|
||||
'52': 'TranslationUnitsTotal',
|
||||
'53': 'TranslationUnitsWithTranslation',
|
||||
'54': 'TranslationUnitsWithDraftTranslation',
|
||||
'55': 'TranslationLastAuthor',
|
||||
'56': 'TranslationLastUpDate',
|
||||
'57': 'TranslationTemplateDate',
|
||||
'58': 'OriginUrl',
|
||||
'59': 'OriginEmailSubject',
|
||||
'60': 'OriginEmailSender',
|
||||
'61': 'OriginEmailMessageId',
|
||||
'62': 'DiscNumber',
|
||||
'63': 'Location',
|
||||
'64': 'Performer',
|
||||
'65': 'Ensemble',
|
||||
'66': 'Arranger',
|
||||
'67': 'Conductor',
|
||||
'68': 'Opus',
|
||||
'69': 'Label',
|
||||
'70': 'Compilation',
|
||||
'71': 'License',
|
||||
'72': 'Rating',
|
||||
'73': 'Lyrics',
|
||||
'74': 'ReplayGainAlbumPeak',
|
||||
'75': 'ReplayGainAlbumGain',
|
||||
'76': 'ReplayGainTrackPeak',
|
||||
'77': 'ReplayGainTrackGain',
|
||||
'78': 'Description',
|
||||
'79': 'VideoCodec',
|
||||
'80': 'AudioCodec',
|
||||
'81': 'PixelFormat',
|
||||
'82': 'ColorSpace',
|
||||
'83': 'AssistiveAlternateDescription'
|
||||
}
|
||||
|
||||
|
||||
class BalooTools:
|
||||
"""Class to interact directly with the Baloo LMDB index."""
|
||||
@@ -21,16 +109,15 @@ class BalooTools:
|
||||
os.path.expanduser("~"), ".local/share/baloo/index"
|
||||
)
|
||||
|
||||
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
|
||||
def get_info(self, file_id: int) -> json:
|
||||
"""
|
||||
Retrieves the width and height of an image/video from the Baloo index.
|
||||
Retrieves file metadata from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
sep: Separator used (unused currently, kept for compatibility).
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) integers. Returns (-1, -1) if not found.
|
||||
A json with all file metadata fields.
|
||||
"""
|
||||
try:
|
||||
# Using context manager ensures the environment is closed properly
|
||||
@@ -58,15 +145,85 @@ class BalooTools:
|
||||
|
||||
try:
|
||||
jvalue = json.loads(value.decode())
|
||||
# Baloo stores width in '26' and height in '27'
|
||||
return jvalue.get('26', -1), jvalue.get('27', -1)
|
||||
return {PROPERTIES_ID_MAP.get(k, k):
|
||||
v for k, v in jvalue.items()}
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return -1, -1
|
||||
return {}
|
||||
|
||||
except lmdb.Error as e:
|
||||
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
||||
|
||||
return -1, -1
|
||||
return {}
|
||||
|
||||
def get_resolution(self, file_id: int, sep: str = 'x') -> Tuple[int, int]:
|
||||
"""
|
||||
Retrieves the width and height of an image/video from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
sep: Separator used (unused currently, kept for compatibility).
|
||||
|
||||
Returns:
|
||||
A tuple of (width, height) integers. Returns (-1, -1) if not found.
|
||||
"""
|
||||
file_info = self.get_info(file_id)
|
||||
try:
|
||||
return file_info.get('26', -1), file_info.get('27', -1)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
return -1, -1
|
||||
|
||||
def get_tags(self, file_id: int) -> json:
|
||||
"""
|
||||
Retrieves a string with all file tags from the Baloo index.
|
||||
|
||||
Args:
|
||||
file_id: The integer ID of the file.
|
||||
|
||||
Returns:
|
||||
A json with a field called tags with all tags comma separated.
|
||||
"""
|
||||
try:
|
||||
# Using context manager ensures the environment is closed properly
|
||||
with lmdb.Environment(
|
||||
self.baloo_db_path,
|
||||
subdir=False,
|
||||
readonly=True,
|
||||
lock=False,
|
||||
max_dbs=20
|
||||
) as env:
|
||||
document_data_db = env.open_db(b'docxatrrterms')
|
||||
|
||||
with env.begin() as txn:
|
||||
cursor = txn.cursor(document_data_db)
|
||||
|
||||
# Convert ID to 8-byte little-endian format
|
||||
file_id_bytes = int.to_bytes(
|
||||
file_id, length=8, byteorder='little', signed=False
|
||||
)
|
||||
|
||||
if cursor.set_range(file_id_bytes):
|
||||
for key, value in cursor:
|
||||
if key != file_id_bytes:
|
||||
break
|
||||
|
||||
text = value.decode('utf-8', errors='replace')
|
||||
text = re.sub(r'\x00(?![T])', '', text)
|
||||
parts = re.split(r'[\x00\x01]', text)
|
||||
|
||||
tags = []
|
||||
for p in parts:
|
||||
p = p.strip()
|
||||
if p:
|
||||
tag = p.removeprefix('TAG-').removeprefix('TA')
|
||||
tags.append(tag)
|
||||
|
||||
return {'tags': tags}
|
||||
# return {'tags': ",".join(tags)}
|
||||
|
||||
except lmdb.Error as e:
|
||||
print(f"Warning: Failed to access Baloo LMDB index: {e}", file=sys.stderr)
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
# Helper function to maintain compatibility with bagheera_search_lib.py
|
||||
|
||||
Reference in New Issue
Block a user