python_arango_ogm.db.pao_migration_builder

View Source
  1import json
  2import os
  3from pathlib import Path
  4from typing import Dict, Sequence, List
  5
  6from python_arango_ogm.db import pao_model
  7from python_arango_ogm.db.pao_indexes import Index, IndexTypeEnum
  8from python_arango_ogm.db.pao_migration_model import PAOMigrationModel
  9from python_arango_ogm.db.pao_model_discovery import PAOModelDiscovery
 10from python_arango_ogm.utils import str_util
 11
 12MIGRATION_FILE_TEMPLATE = """
 13def up(db):
 14    {migration_up}
 15    
 16def down(db):
 17    {migration_down}
 18"""
 19
 20INDENT = ' ' * 4
 21
 22
 23class PAOMigrationBuilder:
 24    ADD_HASH_INDEX_STR = "{indent}{coll_var}.add_hash_index(name='{idx_name}', fields={fields}, unique={unique}, deduplicate=True)"
 25    ADD_TTL_INDEX_STR = "{indent}{coll_var}.add_ttl_index({fields}, name='{idx_name}', expiry_time={expiry_time}"
 26
 27    def __init__(self, target_path: str = '.', overwrite: bool = False):
 28        self.target_path = target_path
 29        self.overwrite = overwrite
 30        app_package = os.getenv('PAO_APP_PACKAGE')
 31        if not app_package:
 32            raise RuntimeError("PAO_APP_PACKAGE must be defined in the environment (or a .env.test file)")
 33        self.models_module_name = f"{app_package}.models"
 34        app_root = app_package.replace('.', '/')
 35        p = Path(self.target_path).joinpath(app_root)
 36        self.migration_pathname = p.joinpath("migrations")
 37
 38        print("MIGRATION_PATHNAME:", self.migration_pathname)
 39        if not self.migration_pathname.exists(follow_symlinks=False):
 40            self.migration_pathname.mkdir()
 41
 42        self._sync_existing_migrations()
 43
 44    def create_blank_migration(self, name):
 45        self._sync_existing_migrations()
 46        mig_filename = self.new_migration_filename(name)
 47        with open(mig_filename, 'w') as f:
 48            f.write(MIGRATION_FILE_TEMPLATE.format(migration_up='pass', migration_down='pass'))
 49
 50    def create_model_migrations(self):
 51        discovery = PAOModelDiscovery()
 52        model_hash: Dict[str, type[pao_model.PAOModel]] = discovery.discover()
 53
 54        graph_edges = []
 55
 56        mig = self.build_migration(PAOMigrationModel, model_hash)
 57
 58        # Special collection to track migrations; should always be first migration:
 59        if "pao_migrations" not in self.existing_migrations.values():
 60            self.create_model_migration(PAOMigrationModel, mig['mod_schema'], mig['hash_indexes'], mig['other_indexes'])
 61
 62        for mod in model_hash.values():
 63            mig = self.build_migration(mod, model_hash)
 64            self.create_model_migration(mod, mig['mod_schema'], mig['hash_indexes'], mig['other_indexes'])
 65            graph_edges.extend(mig['graph_edges'])
 66
 67        self.create_graph_migration(graph_edges)
 68
 69    def create_model_migration(
 70            self,
 71            mod: type[pao_model.PAOModel],
 72            mod_schema: dict,
 73            hash_indexes: Sequence,
 74            other_indexes: Sequence
 75    ):
 76        """
 77            Create migration for given model, schema, hash_indexes and other_indexes
 78        """
 79        coll_name = mod.collection_name()
 80        coll_var = f"{coll_name}_collection"
 81
 82        mig_filename, updating, existing_mig_filename = self._determine_filename_updating(coll_name)
 83        mig_text, schema_var = self._build_migration_file_text(
 84            coll_name=coll_name,
 85            coll_var=coll_var,
 86            mod_schema=mod_schema,
 87            hash_indexes=hash_indexes,
 88            other_indexes=other_indexes
 89        )
 90
 91        noop = False
 92        if updating:
 93            with open(self.migration_pathname.joinpath(existing_mig_filename)) as f:
 94                existing_text = f.read()
 95
 96            if existing_text == mig_text:
 97                # Don't save file
 98                noop = True
 99                print("Migration is the same; skipping.")
100            else:
101                mig = []
102                mig.append(f"\n{INDENT}{coll_var}=db.collections('{coll_name}')")
103                mig.append(f"{INDENT}{coll_var}.configure(schema={schema_var})")
104                mig.append(f"{INDENT}{coll_var}_indexes={coll_var}.indexes()")
105                mig.append(f"{INDENT}[{coll_var}.delete_index(idx, ignore_missing=True) for idx in {coll_var}_indexes]")
106                prepend_text = "\n".join(mig)
107                mig_text, _ = self._build_migration_file_text(
108                    coll_name=coll_name,
109                    coll_var=coll_var,
110                    mod_schema=mod_schema,
111                    hash_indexes=hash_indexes,
112                    other_indexes=other_indexes,
113                    prepare_collection_txt=prepend_text
114                )
115
116        # Write to file:
117        if not noop:
118            pathname = self.migration_pathname.joinpath(mig_filename)
119            with open(pathname, mode="w") as file:
120                file.write(mig_text)
121            self._sync_existing_migrations()
122
123    def create_graph_migration(self, graph_edges: []):
124        """ Create migration for graph_edges: """
125        graph_name = os.getenv('PAO_GRAPH_NAME')
126        if graph_name is None:
127            raise ValueError("PAO_GRAPH_NAME must be defined in the environment (or a .env file)")
128
129        print("Creating migration for graph: ", graph_edges)
130        mig_text = self._build_graph_migration_text(graph_edges, graph_name)
131        graph_mig_filename = self._find_existing_migration(graph_name, suffix=".py")
132        noop = False
133        if graph_mig_filename:
134            with open(self.migration_pathname.joinpath(graph_mig_filename)) as f:
135                existing_text = f.read()
136
137            if existing_text == mig_text:
138                # Don't save file
139                noop = True
140                print("Graph Migration is the same; skipping.")
141
142        if not noop:
143            mig_filename = self.new_migration_filename(graph_name)
144            mig_pathname = self.migration_pathname.joinpath(mig_filename)
145            with open(mig_pathname, mode="w") as file:
146                file.write(mig_text)
147
148            self._sync_existing_migrations()
149
150    def new_migration_filename(self, name: str):
151        """ Return filename for a new migration using given name as suffix """
152        print(f"NEW_MIGRATION: [{name}]", len(self.existing_migrations))
153        print(self.existing_migrations)
154        mig_num = len(self.existing_migrations) + 1
155        mig_filename = f"{mig_num:04}_{name}.py"
156        return mig_filename
157
158    def build_index_migrations(self, coll_var: str, hash_indexes: Sequence, other_indexes: Sequence):
159        """ Build migrations for indexes """
160        up_migration = []
161        down_migration = []
162
163        # Build indexes defined on Field objects:
164        self._build_field_index_migrations(coll_var, hash_indexes, up_migration, down_migration)
165
166        # Build indexes defined as Index objects:
167        self._build_other_indexes(coll_var, other_indexes, up_migration, down_migration)
168
169        return up_migration, down_migration
170
171    def build_migration(self, mod: type[pao_model.PAOModel], model_hash: Dict[str, type[pao_model.PAOModel]]) -> Dict[
172        str, any]:
173        indexes = [e for e in dir(mod) if isinstance(getattr(mod, e), Index)]
174
175        mod_schema, hash_indexes = self.build_schema(mod)
176        graph_edges = self.build_model_edges(mod, model_hash)
177        other_indexes = []
178        for oi in indexes:
179            index: Index = getattr(mod, oi)
180            other_indexes.append({
181                'fields': index.fields,
182                'index_type': index.index_type,
183                'name': index.name,
184                'expiry_seconds': index.expiry_seconds,
185                'unique': index.unique,
186            })
187
188        return {
189            'mod_schema': mod_schema,
190            'hash_indexes': hash_indexes,
191            'graph_edges': graph_edges,
192            'other_indexes': other_indexes
193        }
194
195    def build_schema(self, mod: type[pao_model.PAOModel]) -> tuple[Dict, Sequence]:
196        required = []
197        hash_indexes = []
198        properties = {}
199
200        fields = [f for f in dir(mod) if isinstance(getattr(mod, f), pao_model.Field)]
201
202        for f in fields:
203            field: pao_model.Field = getattr(mod, f)
204            properties[f] = field.build_schema_properties()
205            if field.required:
206                required.append(f)
207            if field.index_name or field.unique:
208                hash_indexes.append({'name': field.index_name, 'fields': [f], 'unique': field.unique, })
209
210        print("MOD:", mod.__name__)
211        mod_schema = dict(
212            rule=dict(
213                properties=properties,
214                additionalProperties=mod.ADDITIONAL_PROPERTIES,
215            ),
216            level=mod.LEVEL,
217        )
218        if len(required):
219            mod_schema['rule']['required'] = required
220
221        return mod_schema, hash_indexes
222
223    def build_model_edges(self, mod: type[pao_model.PAOModel], model_hash: Dict[str, type[pao_model.PAOModel]]) -> \
224            Sequence[Dict]:
225        """ Build model edges and return as a list of dictionaries """
226        graph_edges = []
227        edges = [e for e in dir(mod) if isinstance(getattr(mod, e), pao_model.PAOEdgeDef)]
228        for e in edges:
229            edge: pao_model.PAOEdgeDef = getattr(mod, e)
230            to_model: type[pao_model.PAOModel] = model_hash[edge.to_model] if isinstance(edge.to_model,
231                                                                                         str) else edge.to_model
232            from_name = mod.collection_name()
233            to_name = to_model.collection_name()
234            edge_name = f"{from_name}__{to_name}"
235            graph_edges.append({
236                'edge_collection': edge_name,
237                'from_vertex_collections': [from_name],
238                'to_vertex_collections': [to_name]
239            })
240        return graph_edges
241
242    def _build_migration_file_text(
243            self,
244            coll_name: str,
245            coll_var: str,
246            mod_schema: dict,
247            hash_indexes: Sequence,
248            other_indexes: Sequence,
249            prepare_collection_txt: str = None
250    ) -> tuple[str, str]:
251        up_migration = []
252        down_migration = []
253        schema_var = self._add_migration_schema_up(coll_name, mod_schema, up_migration)
254
255        if prepare_collection_txt:
256            up_migration.append(prepare_collection_txt)
257        else:
258            up_migration.append(f"\n{INDENT}{coll_var}=db.create_collection('{coll_name}', {schema_var})")
259            up_migration.append(f"{INDENT}{coll_var}.configure(schema={schema_var})")
260        down_migration.append(f"{coll_var}=db.collections('{coll_name}')")
261
262        # Index migrations:
263        idx_up, idx_down = self.build_index_migrations(coll_var, hash_indexes, other_indexes)
264        up_migration.extend(idx_up)
265        down_migration.extend(idx_down)
266
267        # Down-migration - Delete whole collection:
268        down_migration.append(f"{INDENT}db.delete_collection('{coll_name}', ignore_missing=True)")
269
270        # Format migration stuff into text for file:
271        mig_up = "\n".join(up_migration)
272        mig_down = "\n".join(down_migration)
273        mig_text = MIGRATION_FILE_TEMPLATE.format(migration_up=mig_up, migration_down=mig_down)
274        return mig_text, schema_var
275
276    def _build_other_indexes(
277            self,
278            coll_var: str,
279            other_indexes: Sequence,
280            up_migration: List,
281            down_migration: List
282    ):
283        """
284        Build indexes defined as Index objects:
285        """
286        for idx in other_indexes:
287            idx_type: IndexTypeEnum = idx['index_type']
288            idx_name = idx['name']
289            if idx_type == IndexTypeEnum.INVERTED:
290                up_migration.append(f"{INDENT}{coll_var}.add_inverted_index(name='{idx_name}', fields={idx['fields']}")
291            elif idx_type == IndexTypeEnum.GEO:
292                up_migration.append(f"{INDENT}{coll_var}.add_geo_index(name='{idx_name}', fields={idx['fields']}")
293            elif idx_type == IndexTypeEnum.TTL:
294                up_migration.append(self.ADD_TTL_INDEX_STR.format(
295                    indent=INDENT,
296                    coll_var=coll_var,
297                    fields=idx['fields'],
298                    idx_name=idx_name,
299                    expiry_time=idx['expiry_seconds']
300                ))
301            elif idx_type == IndexTypeEnum.HASH:
302                up_migration.append(self.ADD_HASH_INDEX_STR.format(
303                    indent=INDENT,
304                    coll_var=coll_var,
305                    idx_name=idx_name,
306                    fields=idx['fields'],
307                    unique=idx['unique'],
308                ))
309            # Add down migration for index:
310            down_migration.append(f"{INDENT}{coll_var}.delete_index('{idx_name}')")
311
312    def _build_graph_migration_text(self, graph_edges, graph_name):
313        delete_graph_str = f"db.delete_graph('{graph_name}', ignore_missing=True)"
314        graph_json = self._fix_python_json(json.dumps(graph_edges, indent=4)[2:-2])
315        graph_mig = []
316        graph_mig.append(delete_graph_str)
317        graph_mig.append(f"{INDENT}db.create_graph('{graph_name}', [")
318        graph_mig.append(str_util.indent(graph_json, 2))
319        graph_mig.append(f"{INDENT}])")
320        mig_up = "\n".join(graph_mig)
321        mig_down = delete_graph_str
322        mig_text = MIGRATION_FILE_TEMPLATE.format(migration_up=mig_up, migration_down=mig_down)
323        return mig_text
324
325    def _build_field_index_migrations(
326            self,
327            coll_var: str,
328            hash_indexes: Sequence,
329            up_migration: List,
330            down_migration: List
331    ):
332        """ Build indexes defined on Field objects: """
333        for hash_index in hash_indexes:
334            idx_name = hash_index['name']
335            up_migration.append(
336                self.ADD_HASH_INDEX_STR.format(
337                    indent=INDENT,
338                    coll_var=coll_var,
339                    idx_name=idx_name,
340                    fields=hash_index['fields'],
341                    unique=hash_index['unique'],
342                )
343            )
344            down_migration.append(f"{INDENT}{coll_var}.delete_index('{idx_name}')")
345
346    def _determine_filename_updating(self, coll_name: str) -> tuple[str, bool, str]:
347        # Determine whether migration already exists
348        # and whether we are overwriting.  If not, we
349        # to replace the schema in a new migration.
350        # Likewise, if there are existing indexes
351        # that differ from those specified in the model, they should
352        # be removed and re-added
353
354        existing_mig_filename = self._find_existing_migration(coll_name, suffix=".py")
355        updating = False
356        if self.overwrite:
357            mig_filename = existing_mig_filename or self.new_migration_filename(coll_name)
358        else:
359            # Don't overwrite the migration; add a new one that updates the schema:
360            print(f"NOT OVERWRITE: [{coll_name}]")
361            mig_filename = self.new_migration_filename(coll_name)
362            updating = existing_mig_filename is not None
363
364        return mig_filename, updating, existing_mig_filename
365
366    def _add_migration_schema_up(self, coll_name: str, mod_schema: dict, up_migration: List) -> str:
367        schema_json = self._fix_python_json(json.dumps(mod_schema, indent=4)[2:-2])
368        print("SCHEMA_JSON:", schema_json)
369        schema_var = f"{coll_name.upper()}_SCHEMA"
370        up_migration.append(f"{schema_var}={{")
371        up_migration.append(str_util.indent(schema_json, 2))
372        up_migration.append(f"{INDENT}}}")
373        return schema_var
374
375    def _fix_python_json(self, json_str: str):
376        json_str = json_str.replace(': true', ': True')
377        return json_str.replace(': false', ': False')
378
379    def _sync_existing_migrations(self):
380        """ Synchronize self.existing_migrations with migrations on disk """
381
382        def get_migration_name(migration_filename: str) -> str:
383            return migration_filename.split('_', 1)[-1]
384
385        migs = [c.stem for c in self.migration_pathname.iterdir() if not c.is_dir() and c.suffix == '.py']
386        self.existing_migrations = {m: get_migration_name(m) for m in sorted(migs)}
387        print("self.existing_migrations:", self.existing_migrations)
388
389    def _find_existing_migration(self, collection_name: str, suffix: str = None):
390        """ Find existing migration based on collection name """
391
392        try:
393            values = list(self.existing_migrations.values())
394            idx = values.index(collection_name)
395        except ValueError:
396            mig_name = None
397        else:
398            keys = list(self.existing_migrations.keys())
399            print(f"Looking for {idx} in {len(keys)}", keys, keys[idx])
400            mig_name = keys[idx] + str(suffix)
401        return mig_name
MIGRATION_FILE_TEMPLATE = '\ndef up(db):\n {migration_up}\n \ndef down(db):\n {migration_down}\n'
INDENT = ' '